aboutsummaryrefslogtreecommitdiffstats
path: root/nikola/pygments_code_block_directive.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/pygments_code_block_directive.py')
-rw-r--r--nikola/pygments_code_block_directive.py401
1 files changed, 401 insertions, 0 deletions
diff --git a/nikola/pygments_code_block_directive.py b/nikola/pygments_code_block_directive.py
new file mode 100644
index 0000000..ac91f3c
--- /dev/null
+++ b/nikola/pygments_code_block_directive.py
@@ -0,0 +1,401 @@
+# -*- coding: utf-8 -*-
+#$Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $
+#$Revision: 2443 $
+
+# :Author: a Pygments author|contributor; Felix Wiemann; Guenter Milde
+# :Date: $Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $
+# :Copyright: This module has been placed in the public domain.
+#
+# This is a merge of `Using Pygments in ReST documents`_ from the pygments_
+# documentation, and a `proof of concept`_ by Felix Wiemann.
+#
+# ========== ===========================================================
+# 2007-06-01 Removed redundancy from class values.
+# 2007-06-04 Merge of successive tokens of same type
+# (code taken from pygments.formatters.others).
+# 2007-06-05 Separate docutils formatter script
+# Use pygments' CSS class names (like the html formatter)
+# allowing the use of pygments-produced style sheets.
+# 2007-06-07 Merge in the formatting of the parsed tokens
+# (misnamed as docutils_formatter) as class DocutilsInterface
+# 2007-06-08 Failsave implementation (fallback to a standard literal block
+# if pygments not found)
+# ========== ===========================================================
+#
+# ::
+
+"""Define and register a code-block directive using pygments"""
+
+
+# Requirements
+# ------------
+# ::
+
+import codecs
+from copy import copy
+import os
+import urlparse
+
+from docutils import nodes, core
+from docutils.parsers.rst import directives
+
+pygments = None
+try:
+ import pygments
+ from pygments.lexers import get_lexer_by_name
+ from pygments.formatters.html import _get_ttype_class
+except ImportError:
+ pass
+
+
+# Customisation
+# -------------
+#
+# Do not insert inline nodes for the following tokens.
+# (You could add e.g. Token.Punctuation like ``['', 'p']``.) ::
+
+unstyled_tokens = ['']
+
+
+# DocutilsInterface
+# -----------------
+#
+# This interface class combines code from
+# pygments.formatters.html and pygments.formatters.others.
+#
+# It does not require anything of docutils and could also become a part of
+# pygments::
+
+class DocutilsInterface(object):
+ """Parse `code` string and yield "classified" tokens.
+
+ Arguments
+
+ code -- string of source code to parse
+ language -- formal language the code is written in.
+
+ Merge subsequent tokens of the same token-type.
+
+ Yields the tokens as ``(ttype_class, value)`` tuples,
+ where ttype_class is taken from pygments.token.STANDARD_TYPES and
+ corresponds to the class argument used in pygments html output.
+
+ """
+
+ def __init__(self, code, language, custom_args={}):
+ self.code = code
+ self.language = language
+ self.custom_args = custom_args
+
+ def lex(self):
+ """Get lexer for language (use text as fallback)"""
+ try:
+ if self.language and unicode(self.language).lower() != 'none':
+ lexer = get_lexer_by_name(self.language.lower(),
+ **self.custom_args
+ )
+ else:
+ lexer = get_lexer_by_name('text', **self.custom_args)
+ except ValueError:
+ # what happens if pygment isn't present ?
+ lexer = get_lexer_by_name('text')
+ return pygments.lex(self.code, lexer)
+
+ def join(self, tokens):
+ """join subsequent tokens of same token-type
+ """
+ tokens = iter(tokens)
+ (lasttype, lastval) = tokens.next()
+ for ttype, value in tokens:
+ if ttype is lasttype:
+ lastval += value
+ else:
+ yield(lasttype, lastval)
+ (lasttype, lastval) = (ttype, value)
+ yield(lasttype, lastval)
+
+ def __iter__(self):
+ """parse code string and yield "clasified" tokens
+ """
+ try:
+ tokens = self.lex()
+ except IOError:
+ yield ('', self.code)
+ return
+
+ for ttype, value in self.join(tokens):
+ yield (_get_ttype_class(ttype), value)
+
+
+# code_block_directive
+# --------------------
+# ::
+
+def code_block_directive(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Parse and classify content of a code_block."""
+ if 'include' in options:
+ try:
+ if 'encoding' in options:
+ encoding = options['encoding']
+ else:
+ encoding = 'utf-8'
+ content = codecs.open(
+ options['include'], 'r', encoding).read().rstrip()
+ except (IOError, UnicodeError): # no file or problem reading it
+ content = u''
+ line_offset = 0
+ if content:
+ # here we define the start-at and end-at options
+ # so that limit is included in extraction
+ # this is different than the start-after directive of docutils
+ # (docutils/parsers/rst/directives/misc.py L73+)
+ # which excludes the beginning
+ # the reason is we want to be able to define a start-at like
+ # def mymethod(self)
+ # and have such a definition included
+
+ after_text = options.get('start-at', None)
+ if after_text:
+ # skip content in include_text before
+ # *and NOT incl.* a matching text
+ after_index = content.find(after_text)
+ if after_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "start-at" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['start-at'])
+ content = content[after_index:]
+ line_offset = len(content[:after_index].splitlines())
+
+ after_text = options.get('start-after', None)
+ if after_text:
+ # skip content in include_text before
+ # *and incl.* a matching text
+ after_index = content.find(after_text)
+ if after_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "start-after" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['start-after'])
+ line_offset = len(content[:after_index +
+ len(after_text)].splitlines())
+ content = content[after_index + len(after_text):]
+
+ # same changes here for the same reason
+ before_text = options.get('end-at', None)
+ if before_text:
+ # skip content in include_text after
+ # *and incl.* a matching text
+ before_index = content.find(before_text)
+ if before_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "end-at" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['end-at'])
+ content = content[:before_index + len(before_text)]
+
+ before_text = options.get('end-before', None)
+ if before_text:
+ # skip content in include_text after
+ # *and NOT incl.* a matching text
+ before_index = content.find(before_text)
+ if before_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "end-before" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['end-before'])
+ content = content[:before_index]
+
+ else:
+ content = u'\n'.join(content)
+
+ if 'tabsize' in options:
+ tabw = options['tabsize']
+ else:
+ tabw = int(options.get('tab-width', 8))
+
+ content = content.replace('\t', ' ' * tabw)
+
+ withln = "linenos" in options
+ if not "linenos_offset" in options:
+ line_offset = 0
+
+ language = arguments[0]
+ # create a literal block element and set class argument
+ code_block = nodes.literal_block(classes=["code", language])
+
+ if withln:
+ lineno = 1 + line_offset
+ total_lines = content.count('\n') + 1 + line_offset
+ lnwidth = len(str(total_lines))
+ fstr = "\n%%%dd " % lnwidth
+ code_block += nodes.inline(fstr[1:] % lineno, fstr[1:] % lineno,
+ classes=['linenumber'])
+
+ # parse content with pygments and add to code_block element
+ content = content.rstrip()
+ if pygments is None:
+ code_block += nodes.Text(content, content)
+ else:
+ # The [:-1] is because pygments adds a trailing \n which looks bad
+ l = list(DocutilsInterface(content, language, options))
+ if l[-1] == ('', u'\n'):
+ l = l[:-1]
+ for cls, value in l:
+ if withln and "\n" in value:
+ # Split on the "\n"s
+ values = value.split("\n")
+ # The first piece, pass as-is
+ code_block += nodes.Text(values[0], values[0])
+ # On the second and later pieces, insert \n and linenos
+ linenos = range(lineno, lineno + len(values))
+ for chunk, ln in zip(values, linenos)[1:]:
+ if ln <= total_lines:
+ code_block += nodes.inline(fstr % ln, fstr % ln,
+ classes=['linenumber'])
+ code_block += nodes.Text(chunk, chunk)
+ lineno += len(values) - 1
+
+ elif cls in unstyled_tokens:
+ # insert as Text to decrease the verbosity of the output.
+ code_block += nodes.Text(value, value)
+ else:
+ code_block += nodes.inline(value, value, classes=[cls])
+
+ return [code_block]
+
+# Custom argument validators
+# --------------------------
+# ::
+#
+# Move to separated module??
+
+
+def string_list(argument):
+ """
+ Converts a space- or comma-separated list of values into a python list
+ of strings.
+ (Directive option conversion function)
+ Based in positive_int_list of docutils.parsers.rst.directives
+ """
+ if ',' in argument:
+ entries = argument.split(',')
+ else:
+ entries = argument.split()
+ return entries
+
+
+def string_bool(argument):
+ """
+ Converts True, true, False, False in python boolean values
+ """
+ if argument is None:
+ msg = 'argument required but none supplied; choose "True" or "False"'
+ raise ValueError(msg)
+
+ elif argument.lower() == 'true':
+ return True
+ elif argument.lower() == 'false':
+ return False
+ else:
+ raise ValueError('"%s" unknown; choose from "True" or "False"'
+ % argument)
+
+
+def csharp_unicodelevel(argument):
+ return directives.choice(argument, ('none', 'basic', 'full'))
+
+
+def lhs_litstyle(argument):
+ return directives.choice(argument, ('bird', 'latex'))
+
+
+def raw_compress(argument):
+ return directives.choice(argument, ('gz', 'bz2'))
+
+
+def listings_directive(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ fname = arguments[0]
+ options['include'] = os.path.join('listings', fname)
+ target = urlparse.urlunsplit(("link", 'listing', fname, '', ''))
+ generated_nodes = [core.publish_doctree('`%s <%s>`_' % (fname, target))[0]]
+ generated_nodes += code_block_directive(name, [arguments[1]],
+ options, content, lineno, content_offset, block_text,
+ state, state_machine)
+ return generated_nodes
+
+code_block_directive.arguments = (1, 0, 1)
+listings_directive.arguments = (2, 0, 1)
+code_block_directive.content = 1
+listings_directive.content = 1
+code_block_directive.options = {'include': directives.unchanged_required,
+ 'start-at': directives.unchanged_required,
+ 'end-at': directives.unchanged_required,
+ 'start-after': directives.unchanged_required,
+ 'end-before': directives.unchanged_required,
+ 'linenos': directives.unchanged,
+ 'linenos_offset': directives.unchanged,
+ 'tab-width': directives.unchanged,
+ # generic
+ 'stripnl': string_bool,
+ 'stripall': string_bool,
+ 'ensurenl': string_bool,
+ 'tabsize': directives.positive_int,
+ 'encoding': directives.encoding,
+ # Lua
+ 'func_name_hightlighting': string_bool,
+ 'disabled_modules': string_list,
+ # Python Console
+ 'python3': string_bool,
+ # Delphi
+ 'turbopascal': string_bool,
+ 'delphi': string_bool,
+ 'freepascal': string_bool,
+ 'units': string_list,
+ # Modula2
+ 'pim': string_bool,
+ 'iso': string_bool,
+ 'objm2': string_bool,
+ 'gm2ext': string_bool,
+ # CSharp
+ 'unicodelevel': csharp_unicodelevel,
+ # Literate haskell
+ 'litstyle': lhs_litstyle,
+ # Raw
+ 'compress': raw_compress,
+ # Rst
+ 'handlecodeblocks': string_bool,
+ # Php
+ 'startinline': string_bool,
+ 'funcnamehighlighting': string_bool,
+ 'disabledmodules': string_list,
+ }
+
+listings_directive.options = copy(code_block_directive.options)
+listings_directive.options.pop('include')
+
+# .. _doctutils: http://docutils.sf.net/
+# .. _pygments: http://pygments.org/
+# .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/
+# .. _proof of concept:
+# http://article.gmane.org/gmane.text.docutils.user/3689
+#
+# Test output
+# -----------
+#
+# If called from the command line, call the docutils publisher to render the
+# input::
+
+if __name__ == '__main__':
+ from docutils.core import publish_cmdline, default_description
+ from docutils.parsers.rst import directives
+ directives.register_directive('code-block', code_block_directive)
+ description = "code-block directive test output" + default_description
+ try:
+ import locale
+ locale.setlocale(locale.LC_ALL, '')
+ except Exception:
+ pass
+ publish_cmdline(writer_name='html', description=description)