1 files changed, 401 insertions, 0 deletions
diff --git a/nikola/pygments_code_block_directive.py b/nikola/pygments_code_block_directive.py
new file mode 100644
index 0000000..ac91f3c
--- /dev/null
+++ b/nikola/pygments_code_block_directive.py
@@ -0,0 +1,401 @@
+# -*- coding: utf-8 -*-
+#$Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $
+#$Revision: 2443 $
+
+# :Author: a Pygments author|contributor; Felix Wiemann; Guenter Milde
+# :Date: $Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $
+# :Copyright: This module has been placed in the public domain.
+#
+# This is a merge of `Using Pygments in ReST documents`_ from the pygments_
+# documentation, and a `proof of concept`_ by Felix Wiemann.
+#
+# ========== ===========================================================
+# 2007-06-01 Removed redundancy from class values.
+# 2007-06-04 Merge of successive tokens of same type
+#            (code taken from pygments.formatters.others).
+# 2007-06-05 Separate docutils formatter script
+#            Use pygments' CSS class names (like the html formatter)
+#            allowing the use of pygments-produced style sheets.
+# 2007-06-07 Merge in the formatting of the parsed tokens
+#            (misnamed as docutils_formatter) as class DocutilsInterface
+# 2007-06-08 Failsave implementation (fallback to a standard literal block
+#            if pygments not found)
+# ========== ===========================================================
+#
+# ::
+
+"""Define and register a code-block directive using pygments"""
+
+
+# Requirements
+# ------------
+# ::
+
+import codecs
+from copy import copy
+import os
+import urlparse
+
+from docutils import nodes, core
+from docutils.parsers.rst import directives
+
+pygments = None
+try:
+    import pygments
+    from pygments.lexers import get_lexer_by_name
+    from pygments.formatters.html import _get_ttype_class
+except ImportError:
+    pass
+
+
+# Customisation
+# -------------
+#
+# Do not insert inline nodes for the following tokens.
+# (You could add e.g. Token.Punctuation like ``['', 'p']``.) ::
+
+unstyled_tokens = ['']
+
+
+# DocutilsInterface
+# -----------------
+#
+# This interface class combines code from
+# pygments.formatters.html and pygments.formatters.others.
+#
+# It does not require anything of docutils and could also become a part of
+# pygments::
+
+class DocutilsInterface(object):
+    """Parse `code` string and yield "classified" tokens.
+
+    Arguments
+
+      code     -- string of source code to parse
+      language -- formal language the code is written in.
+
+    Merge subsequent tokens of the same token-type.
+
+    Yields the tokens as ``(ttype_class, value)`` tuples,
+    where ttype_class is taken from pygments.token.STANDARD_TYPES and
+    corresponds to the class argument used in pygments html output.
+
+    """
+
+    def __init__(self, code, language, custom_args={}):
+        self.code = code
+        self.language = language
+        self.custom_args = custom_args
+
+    def lex(self):
+        """Get lexer for language (use text as fallback)"""
+        try:
+            if self.language and unicode(self.language).lower() != 'none':
+                lexer = get_lexer_by_name(self.language.lower(),
+                                        **self.custom_args
+                                        )
+            else:
+                lexer = get_lexer_by_name('text', **self.custom_args)
+        except ValueError:
+            # what happens if pygment isn't present ?
+            lexer = get_lexer_by_name('text')
+        return pygments.lex(self.code, lexer)
+
+    def join(self, tokens):
+        """join subsequent tokens of same token-type
+        """
+        tokens = iter(tokens)
+        (lasttype, lastval) = tokens.next()
+        for ttype, value in tokens:
+            if ttype is lasttype:
+                lastval += value
+            else:
+                yield(lasttype, lastval)
+                (lasttype, lastval) = (ttype, value)
+        yield(lasttype, lastval)
+
+    def __iter__(self):
+        """parse code string and yield "clasified" tokens
+        """
+        try:
+            tokens = self.lex()
+        except IOError:
+            yield ('', self.code)
+            return
+
+        for ttype, value in self.join(tokens):
+            yield (_get_ttype_class(ttype), value)
+
+
+# code_block_directive
+# --------------------
+# ::
+
+def code_block_directive(name, arguments, options, content, lineno,
+                       content_offset, block_text, state, state_machine):
+    """Parse and classify content of a code_block."""
+    if 'include' in options:
+        try:
+            if 'encoding' in options:
+                encoding = options['encoding']
+            else:
+                encoding = 'utf-8'
+            content = codecs.open(
+                options['include'], 'r', encoding).read().rstrip()
+        except (IOError, UnicodeError):  # no file or problem reading it
+            content = u''
+        line_offset = 0
+        if content:
+            # here we define the start-at and end-at options
+            # so that limit is included in extraction
+            # this is different than the start-after directive of docutils
+            # (docutils/parsers/rst/directives/misc.py L73+)
+            # which excludes the beginning
+            # the reason is we want to be able to define a start-at like
+            # def mymethod(self)
+            # and have such a definition included
+
+            after_text = options.get('start-at', None)
+            if after_text:
+                # skip content in include_text before
+                # *and NOT incl.* a matching text
+                after_index = content.find(after_text)
+                if after_index < 0:
+                    raise state_machine.reporter.severe(
+                        'Problem with "start-at" option of "%s" '
+                        'code-block directive:\nText not found.' %
+                        options['start-at'])
+                content = content[after_index:]
+                line_offset = len(content[:after_index].splitlines())
+
+            after_text = options.get('start-after', None)
+            if after_text:
+                # skip content in include_text before
+                # *and incl.* a matching text
+                after_index = content.find(after_text)
+                if after_index < 0:
+                    raise state_machine.reporter.severe(
+                        'Problem with "start-after" option of "%s" '
+                        'code-block directive:\nText not found.' %
+                        options['start-after'])
+                line_offset = len(content[:after_index +
+                    len(after_text)].splitlines())
+                content = content[after_index + len(after_text):]
+
+            # same changes here for the same reason
+            before_text = options.get('end-at', None)
+            if before_text:
+                # skip content in include_text after
+                # *and incl.* a matching text
+                before_index = content.find(before_text)
+                if before_index < 0:
+                    raise state_machine.reporter.severe(
+                        'Problem with "end-at" option of "%s" '
+                        'code-block directive:\nText not found.' %
+                        options['end-at'])
+                content = content[:before_index + len(before_text)]
+
+            before_text = options.get('end-before', None)
+            if before_text:
+                # skip content in include_text after
+                # *and NOT incl.* a matching text
+                before_index = content.find(before_text)
+                if before_index < 0:
+                    raise state_machine.reporter.severe(
+                        'Problem with "end-before" option of "%s" '
+                        'code-block directive:\nText not found.' %
+                        options['end-before'])
+                content = content[:before_index]
+
+    else:
+        content = u'\n'.join(content)
+
+    if 'tabsize' in options:
+        tabw = options['tabsize']
+    else:
+        tabw = int(options.get('tab-width', 8))
+
+    content = content.replace('\t', ' ' * tabw)
+
+    withln = "linenos" in options
+    if not "linenos_offset" in options:
+        line_offset = 0
+
+    language = arguments[0]
+    # create a literal block element and set class argument
+    code_block = nodes.literal_block(classes=["code", language])
+
+    if withln:
+        lineno = 1 + line_offset
+        total_lines = content.count('\n') + 1 + line_offset
+        lnwidth = len(str(total_lines))
+        fstr = "\n%%%dd " % lnwidth
+        code_block += nodes.inline(fstr[1:] % lineno, fstr[1:] % lineno,
+            classes=['linenumber'])
+
+    # parse content with pygments and add to code_block element
+    content = content.rstrip()
+    if pygments is None:
+        code_block += nodes.Text(content, content)
+    else:
+        # The [:-1] is because pygments adds a trailing \n which looks bad
+        l = list(DocutilsInterface(content, language, options))
+        if l[-1] == ('', u'\n'):
+            l = l[:-1]
+        for cls, value in l:
+            if withln and "\n" in value:
+                # Split on the "\n"s
+                values = value.split("\n")
+                # The first piece, pass as-is
+                code_block += nodes.Text(values[0], values[0])
+                # On the second and later pieces, insert \n and linenos
+                linenos = range(lineno, lineno + len(values))
+                for chunk, ln in zip(values, linenos)[1:]:
+                    if ln <= total_lines:
+                        code_block += nodes.inline(fstr % ln, fstr % ln,
+                            classes=['linenumber'])
+                        code_block += nodes.Text(chunk, chunk)
+                lineno += len(values) - 1
+
+            elif cls in unstyled_tokens:
+                # insert as Text to decrease the verbosity of the output.
+                code_block += nodes.Text(value, value)
+            else:
+                code_block += nodes.inline(value, value, classes=[cls])
+
+    return [code_block]
+
+# Custom argument validators
+# --------------------------
+# ::
+#
+# Move to separated module??
+
+
+def string_list(argument):
+    """
+    Converts a space- or comma-separated list of values into a python list
+    of strings.
+    (Directive option conversion function)
+    Based in positive_int_list of docutils.parsers.rst.directives
+    """
+    if ',' in argument:
+        entries = argument.split(',')
+    else:
+        entries = argument.split()
+    return entries
+
+
+def string_bool(argument):
+    """
+    Converts True, true, False, False in python boolean values
+    """
+    if argument is None:
+        msg = 'argument required but none supplied; choose "True" or "False"'
+        raise ValueError(msg)
+
+    elif argument.lower() == 'true':
+        return True
+    elif argument.lower() == 'false':
+        return False
+    else:
+        raise ValueError('"%s" unknown; choose from "True" or "False"'
+                        % argument)
+
+
+def csharp_unicodelevel(argument):
+    return directives.choice(argument, ('none', 'basic', 'full'))
+
+
+def lhs_litstyle(argument):
+    return directives.choice(argument, ('bird', 'latex'))
+
+
+def raw_compress(argument):
+    return directives.choice(argument, ('gz', 'bz2'))
+
+
+def listings_directive(name, arguments, options, content, lineno,
+                       content_offset, block_text, state, state_machine):
+    fname = arguments[0]
+    options['include'] = os.path.join('listings', fname)
+    target = urlparse.urlunsplit(("link", 'listing', fname, '', ''))
+    generated_nodes = [core.publish_doctree('`%s <%s>`_' % (fname, target))[0]]
+    generated_nodes += code_block_directive(name, [arguments[1]],
+                       options, content, lineno, content_offset, block_text,
+                       state, state_machine)
+    return generated_nodes
+
+code_block_directive.arguments = (1, 0, 1)
+listings_directive.arguments = (2, 0, 1)
+code_block_directive.content = 1
+listings_directive.content = 1
+code_block_directive.options = {'include': directives.unchanged_required,
+                                'start-at': directives.unchanged_required,
+                                'end-at': directives.unchanged_required,
+                                'start-after': directives.unchanged_required,
+                                'end-before': directives.unchanged_required,
+                                'linenos': directives.unchanged,
+                                'linenos_offset': directives.unchanged,
+                                'tab-width': directives.unchanged,
+                                # generic
+                                'stripnl': string_bool,
+                                'stripall': string_bool,
+                                'ensurenl': string_bool,
+                                'tabsize': directives.positive_int,
+                                'encoding': directives.encoding,
+                                # Lua
+                                'func_name_hightlighting': string_bool,
+                                'disabled_modules': string_list,
+                                # Python Console
+                                'python3': string_bool,
+                                # Delphi
+                                'turbopascal': string_bool,
+                                'delphi': string_bool,
+                                'freepascal': string_bool,
+                                'units': string_list,
+                                # Modula2
+                                'pim': string_bool,
+                                'iso': string_bool,
+                                'objm2': string_bool,
+                                'gm2ext': string_bool,
+                                # CSharp
+                                'unicodelevel': csharp_unicodelevel,
+                                # Literate haskell
+                                'litstyle': lhs_litstyle,
+                                # Raw
+                                'compress': raw_compress,
+                                # Rst
+                                'handlecodeblocks': string_bool,
+                                # Php
+                                'startinline': string_bool,
+                                'funcnamehighlighting': string_bool,
+                                'disabledmodules': string_list,
+                                }
+
+listings_directive.options = copy(code_block_directive.options)
+listings_directive.options.pop('include')
+
+# .. _doctutils: http://docutils.sf.net/
+# .. _pygments: http://pygments.org/
+# .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/
+# .. _proof of concept:
+#      http://article.gmane.org/gmane.text.docutils.user/3689
+#
+# Test output
+# -----------
+#
+# If called from the command line, call the docutils publisher to render the
+# input::
+
+if __name__ == '__main__':
+    from docutils.core import publish_cmdline, default_description
+    from docutils.parsers.rst import directives
+    directives.register_directive('code-block', code_block_directive)
+    description = "code-block directive test output" + default_description
+    try:
+        import locale
+        locale.setlocale(locale.LC_ALL, '')
+    except Exception:
+        pass
+    publish_cmdline(writer_name='html', description=description)