# -*- coding: utf-8 -*- #$Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $ #$Revision: 2443 $ # :Author: a Pygments author|contributor; Felix Wiemann; Guenter Milde # :Date: $Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $ # :Copyright: This module has been placed in the public domain. # # This is a merge of `Using Pygments in ReST documents`_ from the pygments_ # documentation, and a `proof of concept`_ by Felix Wiemann. # # ========== =========================================================== # 2007-06-01 Removed redundancy from class values. # 2007-06-04 Merge of successive tokens of same type # (code taken from pygments.formatters.others). # 2007-06-05 Separate docutils formatter script # Use pygments' CSS class names (like the html formatter) # allowing the use of pygments-produced style sheets. # 2007-06-07 Merge in the formatting of the parsed tokens # (misnamed as docutils_formatter) as class DocutilsInterface # 2007-06-08 Failsave implementation (fallback to a standard literal block # if pygments not found) # ========== =========================================================== # # :: """Define and register a code-block directive using pygments""" # Requirements # ------------ # :: import codecs from copy import copy import os import urlparse from docutils import nodes, core from docutils.parsers.rst import directives pygments = None try: import pygments from pygments.lexers import get_lexer_by_name from pygments.formatters.html import _get_ttype_class except ImportError: pass # Customisation # ------------- # # Do not insert inline nodes for the following tokens. # (You could add e.g. Token.Punctuation like ``['', 'p']``.) :: unstyled_tokens = [''] # DocutilsInterface # ----------------- # # This interface class combines code from # pygments.formatters.html and pygments.formatters.others. # # It does not require anything of docutils and could also become a part of # pygments:: class DocutilsInterface(object): """Parse `code` string and yield "classified" tokens. Arguments code -- string of source code to parse language -- formal language the code is written in. Merge subsequent tokens of the same token-type. Yields the tokens as ``(ttype_class, value)`` tuples, where ttype_class is taken from pygments.token.STANDARD_TYPES and corresponds to the class argument used in pygments html output. """ def __init__(self, code, language, custom_args={}): self.code = code self.language = language self.custom_args = custom_args def lex(self): """Get lexer for language (use text as fallback)""" try: if self.language and unicode(self.language).lower() != 'none': lexer = get_lexer_by_name(self.language.lower(), **self.custom_args ) else: lexer = get_lexer_by_name('text', **self.custom_args) except ValueError: # what happens if pygment isn't present ? lexer = get_lexer_by_name('text') return pygments.lex(self.code, lexer) def join(self, tokens): """join subsequent tokens of same token-type """ tokens = iter(tokens) (lasttype, lastval) = tokens.next() for ttype, value in tokens: if ttype is lasttype: lastval += value else: yield(lasttype, lastval) (lasttype, lastval) = (ttype, value) yield(lasttype, lastval) def __iter__(self): """parse code string and yield "clasified" tokens """ try: tokens = self.lex() except IOError: yield ('', self.code) return for ttype, value in self.join(tokens): yield (_get_ttype_class(ttype), value) # code_block_directive # -------------------- # :: def code_block_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): """Parse and classify content of a code_block.""" if 'include' in options: try: if 'encoding' in options: encoding = options['encoding'] else: encoding = 'utf-8' content = codecs.open( options['include'], 'r', encoding).read().rstrip() except (IOError, UnicodeError): # no file or problem reading it content = u'' line_offset = 0 if content: # here we define the start-at and end-at options # so that limit is included in extraction # this is different than the start-after directive of docutils # (docutils/parsers/rst/directives/misc.py L73+) # which excludes the beginning # the reason is we want to be able to define a start-at like # def mymethod(self) # and have such a definition included after_text = options.get('start-at', None) if after_text: # skip content in include_text before # *and NOT incl.* a matching text after_index = content.find(after_text) if after_index < 0: raise state_machine.reporter.severe( 'Problem with "start-at" option of "%s" ' 'code-block directive:\nText not found.' % options['start-at']) content = content[after_index:] line_offset = len(content[:after_index].splitlines()) after_text = options.get('start-after', None) if after_text: # skip content in include_text before # *and incl.* a matching text after_index = content.find(after_text) if after_index < 0: raise state_machine.reporter.severe( 'Problem with "start-after" option of "%s" ' 'code-block directive:\nText not found.' % options['start-after']) line_offset = len(content[:after_index + len(after_text)].splitlines()) content = content[after_index + len(after_text):] # same changes here for the same reason before_text = options.get('end-at', None) if before_text: # skip content in include_text after # *and incl.* a matching text before_index = content.find(before_text) if before_index < 0: raise state_machine.reporter.severe( 'Problem with "end-at" option of "%s" ' 'code-block directive:\nText not found.' % options['end-at']) content = content[:before_index + len(before_text)] before_text = options.get('end-before', None) if before_text: # skip content in include_text after # *and NOT incl.* a matching text before_index = content.find(before_text) if before_index < 0: raise state_machine.reporter.severe( 'Problem with "end-before" option of "%s" ' 'code-block directive:\nText not found.' % options['end-before']) content = content[:before_index] else: content = u'\n'.join(content) if 'tabsize' in options: tabw = options['tabsize'] else: tabw = int(options.get('tab-width', 8)) content = content.replace('\t', ' ' * tabw) withln = "linenos" in options if not "linenos_offset" in options: line_offset = 0 language = arguments[0] # create a literal block element and set class argument code_block = nodes.literal_block(classes=["code", language]) if withln: lineno = 1 + line_offset total_lines = content.count('\n') + 1 + line_offset lnwidth = len(str(total_lines)) fstr = "\n%%%dd " % lnwidth code_block += nodes.inline(fstr[1:] % lineno, fstr[1:] % lineno, classes=['linenumber']) # parse content with pygments and add to code_block element content = content.rstrip() if pygments is None: code_block += nodes.Text(content, content) else: # The [:-1] is because pygments adds a trailing \n which looks bad l = list(DocutilsInterface(content, language, options)) if l[-1] == ('', u'\n'): l = l[:-1] for cls, value in l: if withln and "\n" in value: # Split on the "\n"s values = value.split("\n") # The first piece, pass as-is code_block += nodes.Text(values[0], values[0]) # On the second and later pieces, insert \n and linenos linenos = range(lineno, lineno + len(values)) for chunk, ln in zip(values, linenos)[1:]: if ln <= total_lines: code_block += nodes.inline(fstr % ln, fstr % ln, classes=['linenumber']) code_block += nodes.Text(chunk, chunk) lineno += len(values) - 1 elif cls in unstyled_tokens: # insert as Text to decrease the verbosity of the output. code_block += nodes.Text(value, value) else: code_block += nodes.inline(value, value, classes=[cls]) return [code_block] # Custom argument validators # -------------------------- # :: # # Move to separated module?? def string_list(argument): """ Converts a space- or comma-separated list of values into a python list of strings. (Directive option conversion function) Based in positive_int_list of docutils.parsers.rst.directives """ if ',' in argument: entries = argument.split(',') else: entries = argument.split() return entries def string_bool(argument): """ Converts True, true, False, False in python boolean values """ if argument is None: msg = 'argument required but none supplied; choose "True" or "False"' raise ValueError(msg) elif argument.lower() == 'true': return True elif argument.lower() == 'false': return False else: raise ValueError('"%s" unknown; choose from "True" or "False"' % argument) def csharp_unicodelevel(argument): return directives.choice(argument, ('none', 'basic', 'full')) def lhs_litstyle(argument): return directives.choice(argument, ('bird', 'latex')) def raw_compress(argument): return directives.choice(argument, ('gz', 'bz2')) def listings_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): fname = arguments[0] options['include'] = os.path.join('listings', fname) target = urlparse.urlunsplit(("link", 'listing', fname, '', '')) generated_nodes = [core.publish_doctree('`%s <%s>`_' % (fname, target))[0]] generated_nodes += code_block_directive(name, [arguments[1]], options, content, lineno, content_offset, block_text, state, state_machine) return generated_nodes code_block_directive.arguments = (1, 0, 1) listings_directive.arguments = (2, 0, 1) code_block_directive.content = 1 listings_directive.content = 1 code_block_directive.options = {'include': directives.unchanged_required, 'start-at': directives.unchanged_required, 'end-at': directives.unchanged_required, 'start-after': directives.unchanged_required, 'end-before': directives.unchanged_required, 'linenos': directives.unchanged, 'linenos_offset': directives.unchanged, 'tab-width': directives.unchanged, # generic 'stripnl': string_bool, 'stripall': string_bool, 'ensurenl': string_bool, 'tabsize': directives.positive_int, 'encoding': directives.encoding, # Lua 'func_name_hightlighting': string_bool, 'disabled_modules': string_list, # Python Console 'python3': string_bool, # Delphi 'turbopascal': string_bool, 'delphi': string_bool, 'freepascal': string_bool, 'units': string_list, # Modula2 'pim': string_bool, 'iso': string_bool, 'objm2': string_bool, 'gm2ext': string_bool, # CSharp 'unicodelevel': csharp_unicodelevel, # Literate haskell 'litstyle': lhs_litstyle, # Raw 'compress': raw_compress, # Rst 'handlecodeblocks': string_bool, # Php 'startinline': string_bool, 'funcnamehighlighting': string_bool, 'disabledmodules': string_list, } listings_directive.options = copy(code_block_directive.options) listings_directive.options.pop('include') # .. _doctutils: http://docutils.sf.net/ # .. _pygments: http://pygments.org/ # .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/ # .. _proof of concept: # http://article.gmane.org/gmane.text.docutils.user/3689 # # Test output # ----------- # # If called from the command line, call the docutils publisher to render the # input:: if __name__ == '__main__': from docutils.core import publish_cmdline, default_description from docutils.parsers.rst import directives directives.register_directive('code-block', code_block_directive) description = "code-block directive test output" + default_description try: import locale locale.setlocale(locale.LC_ALL, '') except Exception: pass publish_cmdline(writer_name='html', description=description)