diff options
Diffstat (limited to 'nikola/plugins')
59 files changed, 4964 insertions, 0 deletions
diff --git a/nikola/plugins/command_bootswatch_theme.plugin b/nikola/plugins/command_bootswatch_theme.plugin new file mode 100644 index 0000000..f75f734 --- /dev/null +++ b/nikola/plugins/command_bootswatch_theme.plugin @@ -0,0 +1,10 @@ +[Core] +Name = bootswatch_theme +Module = command_bootswatch_theme + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Given a swatch name and a parent theme, creates a custom theme. + diff --git a/nikola/plugins/command_bootswatch_theme.py b/nikola/plugins/command_bootswatch_theme.py new file mode 100644 index 0000000..f077eb1 --- /dev/null +++ b/nikola/plugins/command_bootswatch_theme.py @@ -0,0 +1,47 @@ +from optparse import OptionParser +import os +import urllib2 + +from nikola.plugin_categories import Command + + +class CommandBootswatchTheme(Command): + """Given a swatch name and a parent theme, creates a custom theme.""" + + name = "bootswatch_theme" + + def run(self, *args): + """Given a swatch name and a parent theme, creates a custom theme.""" + + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option("-n", "--name", dest="name", + help="New theme name (default: custom)", default='custom') + parser.add_option("-s", "--swatch", dest="swatch", + help="Name of the swatch from bootswatch.com (default: slate)", + default='slate') + parser.add_option("-p", "--parent", dest="parent", + help="Parent theme name (default: site)", default='site') + (options, args) = parser.parse_args(list(args)) + + name = options.name + swatch = options.swatch + parent = options.parent + + print "Creating '%s' theme from '%s' and '%s'" % ( + name, swatch, parent) + try: + os.makedirs(os.path.join('themes', name, 'assets', 'css')) + except: + pass + for fname in ('bootstrap.min.css', 'bootstrap.css'): + url = 'http://bootswatch.com/%s/%s' % (swatch, fname) + print "Downloading: ", url + data = urllib2.urlopen(url).read() + with open(os.path.join( + 'themes', name, 'assets', 'css', fname), 'wb+') as output: + output.write(data) + + with open(os.path.join('themes', name, 'parent'), 'wb+') as output: + output.write(parent) + print 'Theme created. Change the THEME setting to "%s" to use it.'\ + % name diff --git a/nikola/plugins/command_build.plugin b/nikola/plugins/command_build.plugin new file mode 100644 index 0000000..7d029a7 --- /dev/null +++ b/nikola/plugins/command_build.plugin @@ -0,0 +1,10 @@ +[Core] +Name = build +Module = command_build + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Build the site. + diff --git a/nikola/plugins/command_build.py b/nikola/plugins/command_build.py new file mode 100644 index 0000000..cface15 --- /dev/null +++ b/nikola/plugins/command_build.py @@ -0,0 +1,32 @@ +import os +import tempfile + +from nikola.plugin_categories import Command + + +class CommandBuild(Command): + """Build the site.""" + + name = "build" + + def run(self, *args): + """Build the site using doit.""" + + # FIXME: this is crap, do it right + with tempfile.NamedTemporaryFile(suffix='.py', delete=False) as dodo: + dodo.write(''' +from doit.reporter import ExecutedOnlyReporter +DOIT_CONFIG = { + 'reporter': ExecutedOnlyReporter, + 'default_tasks': ['render_site'], +} +from nikola import Nikola +import conf +SITE = Nikola(**conf.__dict__) + + +def task_render_site(): + return SITE.gen_tasks() + ''') + dodo.flush() + os.system('doit -f %s -d . %s' % (dodo.name, ' '.join(args))) diff --git a/nikola/plugins/command_check.plugin b/nikola/plugins/command_check.plugin new file mode 100644 index 0000000..d4dcd1c --- /dev/null +++ b/nikola/plugins/command_check.plugin @@ -0,0 +1,10 @@ +[Core] +Name = check +Module = command_check + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Check the generated site + diff --git a/nikola/plugins/command_check.py b/nikola/plugins/command_check.py new file mode 100644 index 0000000..ce1e2e3 --- /dev/null +++ b/nikola/plugins/command_check.py @@ -0,0 +1,109 @@ +from optparse import OptionParser +import os +import sys +import urllib +from urlparse import urlparse + +import lxml.html + +from nikola.plugin_categories import Command + + +class CommandCheck(Command): + """Check the generated site.""" + + name = "check" + + def run(self, *args): + """Check the generated site.""" + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option('-l', '--check-links', dest='links', + action='store_true', + help='Check for dangling links.') + parser.add_option('-f', '--check-files', dest='files', + action='store_true', + help='Check for unknown files.') + + (options, args) = parser.parse_args(list(args)) + if options.links: + scan_links() + if options.files: + scan_files() + +existing_targets = set([]) + + +def analize(task): + try: + filename = task.split(":")[-1] + d = lxml.html.fromstring(open(filename).read()) + for l in d.iterlinks(): + target = l[0].attrib[l[1]] + if target == "#": + continue + parsed = urlparse(target) + if parsed.scheme: + continue + if parsed.fragment: + target = target.split('#')[0] + target_filename = os.path.abspath( + os.path.join(os.path.dirname(filename), + urllib.unquote(target))) + if target_filename not in existing_targets: + if os.path.exists(target_filename): + existing_targets.add(target_filename) + else: + print "In %s broken link: " % filename, target + if '--find-sources' in sys.argv: + print "Possible sources:" + print os.popen( + 'nikola build list --deps %s' % task, 'r').read() + print "===============================\n" + + except Exception as exc: + print "Error with:", filename, exc + + +def scan_links(): + print "Checking Links:\n===============\n" + for task in os.popen('nikola build list --all', 'r').readlines(): + task = task.strip() + if task.split(':')[0] in ( + 'render_tags', + 'render_archive', + 'render_galleries', + 'render_indexes', + 'render_pages', + 'render_site') and '.html' in task: + analize(task) + + +def scan_files(): + print "Checking Files:\n===============\n" + task_fnames = set([]) + real_fnames = set([]) + # First check that all targets are generated in the right places + for task in os.popen('nikola build list --all', 'r').readlines(): + task = task.strip() + if 'output' in task and ':' in task: + fname = task.split(':')[-1] + task_fnames.add(fname) + # And now check that there are no non-target files + for root, dirs, files in os.walk('output'): + for src_name in files: + fname = os.path.join(root, src_name) + real_fnames.add(fname) + + only_on_output = list(real_fnames - task_fnames) + if only_on_output: + only_on_output.sort() + print "\nFiles from unknown origins:\n" + for f in only_on_output: + print f + + only_on_input = list(task_fnames - real_fnames) + if only_on_input: + only_on_input.sort() + print "\nFiles not generated:\n" + for f in only_on_input: + print f diff --git a/nikola/plugins/command_deploy.plugin b/nikola/plugins/command_deploy.plugin new file mode 100644 index 0000000..c8776b5 --- /dev/null +++ b/nikola/plugins/command_deploy.plugin @@ -0,0 +1,9 @@ +[Core] +Name = deploy +Module = command_deploy + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Deploy the site diff --git a/nikola/plugins/command_deploy.py b/nikola/plugins/command_deploy.py new file mode 100644 index 0000000..cb2eb41 --- /dev/null +++ b/nikola/plugins/command_deploy.py @@ -0,0 +1,16 @@ +from optparse import OptionParser +import os + +from nikola.plugin_categories import Command + + +class Deploy(Command): + """Deploy site. """ + name = "deploy" + + def run(self, *args): + parser = OptionParser(usage="nikola %s [options]" % self.name) + (options, args) = parser.parse_args(list(args)) + for command in self.site.config['DEPLOY_COMMANDS']: + print "==>", command + os.system(command) diff --git a/nikola/plugins/command_import_wordpress.plugin b/nikola/plugins/command_import_wordpress.plugin new file mode 100644 index 0000000..a2477b9 --- /dev/null +++ b/nikola/plugins/command_import_wordpress.plugin @@ -0,0 +1,10 @@ +[Core] +Name = import_wordpress +Module = command_import_wordpress + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Import a wordpress site from a XML dump (requires markdown). + diff --git a/nikola/plugins/command_import_wordpress.py b/nikola/plugins/command_import_wordpress.py new file mode 100644 index 0000000..e75d022 --- /dev/null +++ b/nikola/plugins/command_import_wordpress.py @@ -0,0 +1,163 @@ +import codecs +import os +from urlparse import urlparse +from urllib import urlopen + +from lxml import etree, html +from mako.template import Template + +from nikola.plugin_categories import Command +from nikola import utils + +links = {} + + +class CommandImportWordpress(Command): + """Import a wordpress dump.""" + + name = "import_wordpress" + + def run(self, fname=None): + # Parse the data + if fname is None: + print "Usage: nikola import_wordpress wordpress_dump.xml" + return + context = {} + with open(fname) as fd: + xml = [] + for line in fd: + # These explode etree and are useless + if '<atom:link rel=' in line: + continue + xml.append(line) + xml = '\n'.join(xml) + + tree = etree.fromstring(xml) + channel = tree.find('channel') + + context['DEFAULT_LANG'] = get_text_tag(channel, 'language', 'en')[:2] + context['BLOG_TITLE'] = get_text_tag( + channel, 'title', 'PUT TITLE HERE') + context['BLOG_DESCRIPTION'] = get_text_tag( + channel, 'description', 'PUT DESCRIPTION HERE') + context['BLOG_URL'] = get_text_tag(channel, 'link', '#') + author = channel.find('{http://wordpress.org/export/1.2/}author') + context['BLOG_EMAIL'] = get_text_tag( + author, + '{http://wordpress.org/export/1.2/}author_email', + "joe@example.com") + context['BLOG_AUTHOR'] = get_text_tag( + author, + '{http://wordpress.org/export/1.2/}author_display_name', + "Joe Example") + context['POST_PAGES'] = '''( + ("posts/*.wp", "posts", "post.tmpl", True), + ("stories/*.wp", "stories", "story.tmpl", False), + )''' + context['POST_COMPILERS'] = '''{ + "rest": ('.txt', '.rst'), + "markdown": ('.md', '.mdown', '.markdown', '.wp'), + "html": ('.html', '.htm') + } + ''' + + # Generate base site + os.system('nikola init new_site') + conf_template = Template(filename=os.path.join( + os.path.dirname(utils.__file__), 'data', 'samplesite', 'conf.py.in')) + with codecs.open(os.path.join('new_site', 'conf.py'), + 'w+', 'utf8') as fd: + fd.write(conf_template.render(**context)) + + # Import posts + for item in channel.findall('item'): + import_attachment(item) + for item in channel.findall('item'): + import_item(item) + + +def replacer(dst): + return links.get(dst, dst) + + +def get_text_tag(tag, name, default): + t = tag.find(name) + if t is not None: + return t.text + else: + return default + + +def import_attachment(item): + post_type = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_type', 'post') + if post_type == 'attachment': + url = get_text_tag(item, + '{http://wordpress.org/export/1.2/}attachment_url', 'foo') + link = get_text_tag(item, + '{http://wordpress.org/export/1.2/}link', 'foo') + path = urlparse(url).path + dst_path = os.path.join(*(['new_site', 'files'] + + list(path.split('/')))) + dst_dir = os.path.dirname(dst_path) + if not os.path.isdir(dst_dir): + os.makedirs(dst_dir) + print "Downloading %s => %s" % (url, dst_path) + with open(dst_path, 'wb+') as fd: + fd.write(urlopen(url).read()) + dst_url = '/'.join(dst_path.split(os.sep)[2:]) + links[link] = '/' + dst_url + links[url] = '/' + dst_url + return + + +def import_item(item): + """Takes an item from the feed and creates a post file.""" + title = get_text_tag(item, 'title', 'NO TITLE') + # link is something like http://foo.com/2012/09/01/hello-world/ + # So, take the path, utils.slugify it, and that's our slug + slug = utils.slugify(urlparse(get_text_tag(item, 'link', None)).path) + description = get_text_tag(item, 'description', '') + post_date = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_date', None) + post_type = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_type', 'post') + status = get_text_tag(item, + '{http://wordpress.org/export/1.2/}status', 'publish') + content = get_text_tag(item, + '{http://purl.org/rss/1.0/modules/content/}encoded', '') + + tags = [] + if status != 'publish': + tags.append('draft') + for tag in item.findall('category'): + text = tag.text + if text == 'Uncategorized': + continue + tags.append(text) + + if post_type == 'attachment': + return + elif post_type == 'post': + out_folder = 'posts' + else: + out_folder = 'stories' + # Write metadata + with codecs.open(os.path.join('new_site', out_folder, slug + '.meta'), + "w+", "utf8") as fd: + fd.write(u'%s\n' % title) + fd.write(u'%s\n' % slug) + fd.write(u'%s\n' % post_date) + fd.write(u'%s\n' % ','.join(tags)) + fd.write(u'\n') + fd.write(u'%s\n' % description) + with open(os.path.join( + 'new_site', out_folder, slug + '.wp'), "wb+") as fd: + if content.strip(): + try: + doc = html.document_fromstring(content) + doc.rewrite_links(replacer) + fd.write(html.tostring(doc, encoding='utf8')) + except: + import pdb + pdb.set_trace() diff --git a/nikola/plugins/command_init.plugin b/nikola/plugins/command_init.plugin new file mode 100644 index 0000000..3c6bd21 --- /dev/null +++ b/nikola/plugins/command_init.plugin @@ -0,0 +1,10 @@ +[Core] +Name = init +Module = command_init + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create a new site. + diff --git a/nikola/plugins/command_init.py b/nikola/plugins/command_init.py new file mode 100644 index 0000000..a032370 --- /dev/null +++ b/nikola/plugins/command_init.py @@ -0,0 +1,34 @@ +from optparse import OptionParser +import os +import shutil + +import nikola +from nikola.plugin_categories import Command + + +class CommandInit(Command): + """Create a new site.""" + + name = "init" + + usage = """Usage: nikola init folder [options]. + +That will create a sample site in the specified folder. +The destination folder must not exist. +""" + + def run(self, *args): + """Create a new site.""" + parser = OptionParser(usage=self.usage) + (options, args) = parser.parse_args(list(args)) + + target = args[0] + if target is None: + print self.usage + else: + src = os.path.join(os.path.dirname(nikola.__file__), + 'data', 'samplesite') + shutil.copytree(src, target) + print "A new site with some sample data has been created at %s."\ + % target + print "See README.txt in that folder for more information." diff --git a/nikola/plugins/command_install_theme.plugin b/nikola/plugins/command_install_theme.plugin new file mode 100644 index 0000000..f010074 --- /dev/null +++ b/nikola/plugins/command_install_theme.plugin @@ -0,0 +1,10 @@ +[Core] +Name = install_theme +Module = command_install_theme + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Install a theme into the current site. + diff --git a/nikola/plugins/command_install_theme.py b/nikola/plugins/command_install_theme.py new file mode 100644 index 0000000..293ce97 --- /dev/null +++ b/nikola/plugins/command_install_theme.py @@ -0,0 +1,62 @@ +from optparse import OptionParser +import os +import urllib2 +import json +from io import StringIO + +from nikola.plugin_categories import Command +from nikola import utils + + +class CommandInstallTheme(Command): + """Start test server.""" + + name = "install_theme" + + def run(self, *args): + """Install theme into current site.""" + + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option("-l", "--list", dest="list", + action="store_true", + help="Show list of available themes.") + parser.add_option("-n", "--name", dest="name", + help="Theme name", default=None) + parser.add_option("-u", "--url", dest="url", + help="URL for the theme repository" + "(default: http://nikola.ralsina.com.ar/themes/index.json)", + default='http://nikola.ralsina.com.ar/themes/index.json') + (options, args) = parser.parse_args(list(args)) + + listing = options.list + name = options.name + url = options.url + + if name is None and not listing: + print "This command needs either the -n or the -l option." + return False + data = urllib2.urlopen(url).read() + data = json.loads(data) + if listing: + print "Themes:" + print "-------" + for theme in sorted(data.keys()): + print theme + return True + else: + if name in data: + if os.path.isfile("themes"): + raise IOError("'themes' isn't a directory!") + elif not os.path.isdir("themes"): + try: + os.makedirs("themes") + except: + raise OSError("mkdir 'theme' error!") + print 'Downloading: %s' % data[name] + zip_file = StringIO() + zip_file.write(urllib2.urlopen(data[name]).read()) + print 'Extracting: %s into themes' % name + utils.extract_all(zip_file) + else: + print "Can't find theme %s" % name + return False diff --git a/nikola/plugins/command_new_post.plugin b/nikola/plugins/command_new_post.plugin new file mode 100644 index 0000000..6d70aff --- /dev/null +++ b/nikola/plugins/command_new_post.plugin @@ -0,0 +1,10 @@ +[Core] +Name = new_post +Module = command_new_post + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create a new post. + diff --git a/nikola/plugins/command_new_post.py b/nikola/plugins/command_new_post.py new file mode 100644 index 0000000..574df5f --- /dev/null +++ b/nikola/plugins/command_new_post.py @@ -0,0 +1,100 @@ +import codecs +import datetime +from optparse import OptionParser +import os +import sys + +from nikola.plugin_categories import Command +from nikola import utils + + +class CommandNewPost(Command): + """Create a new post.""" + + name = "new_post" + + def run(self, *args): + """Create a new post.""" + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option('-p', '--page', dest='is_post', + action='store_false', + help='Create a page instead of a blog post.') + parser.add_option('-t', '--title', dest='title', + help='Title for the page/post.', default=None) + parser.add_option('--tags', dest='tags', + help='Comma-separated tags for the page/post.', + default='') + parser.add_option('-1', dest='onefile', + action='store_true', + help='Create post with embedded metadata (single file format).', + default=self.site.config.get('ONE_FILE_POSTS', True)) + parser.add_option('-f', '--format', + dest='post_format', + default='rest', + help='Format for post (rest or markdown)') + (options, args) = parser.parse_args(list(args)) + + is_post = options.is_post + title = options.title + tags = options.tags + onefile = options.onefile + post_format = options.post_format + + # Guess where we should put this + for path, _, _, use_in_rss in self.site.config['post_pages']: + if use_in_rss == is_post: + break + else: + path = self.site.config['post_pages'][0][0] + + print "Creating New Post" + print "-----------------\n" + if title is None: + title = raw_input("Enter title: ").decode(sys.stdin.encoding) + else: + print "Title: ", title + slug = utils.slugify(title) + date = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') + data = [ + title, + slug, + date, + tags + ] + output_path = os.path.dirname(path) + meta_path = os.path.join(output_path, slug + ".meta") + pattern = os.path.basename(path) + if pattern.startswith("*."): + suffix = pattern[1:] + else: + suffix = ".txt" + txt_path = os.path.join(output_path, slug + suffix) + + if (not onefile and os.path.isfile(meta_path)) or \ + os.path.isfile(txt_path): + print "The title already exists!" + exit() + + if onefile: + if post_format not in ('rest', 'markdown'): + print "ERROR: Unknown post format %s" % post_format + return + with codecs.open(txt_path, "wb+", "utf8") as fd: + if post_format == 'markdown': + fd.write('<!-- \n') + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n') + if post_format == 'markdown': + fd.write('-->\n') + fd.write(u"Write your post here.") + else: + with codecs.open(meta_path, "wb+", "utf8") as fd: + fd.write(data) + with codecs.open(txt_path, "wb+", "utf8") as fd: + fd.write(u"Write your post here.") + print "Your post's metadata is at: ", meta_path + print "Your post's text is at: ", txt_path diff --git a/nikola/plugins/command_serve.plugin b/nikola/plugins/command_serve.plugin new file mode 100644 index 0000000..684935d --- /dev/null +++ b/nikola/plugins/command_serve.plugin @@ -0,0 +1,10 @@ +[Core] +Name = serve +Module = command_serve + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Start test server. + diff --git a/nikola/plugins/command_serve.py b/nikola/plugins/command_serve.py new file mode 100644 index 0000000..626b117 --- /dev/null +++ b/nikola/plugins/command_serve.py @@ -0,0 +1,40 @@ +from optparse import OptionParser +import os +from BaseHTTPServer import HTTPServer +from SimpleHTTPServer import SimpleHTTPRequestHandler + +from nikola.plugin_categories import Command + + +class CommandBuild(Command): + """Start test server.""" + + name = "serve" + + def run(self, *args): + """Start test server.""" + + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option("-p", "--port", dest="port", + help="Port numer (default: 8000)", default=8000, + type="int") + parser.add_option("-a", "--address", dest="address", + help="Address to bind (default: 127.0.0.1)", + default='127.0.0.1') + (options, args) = parser.parse_args(list(args)) + + out_dir = self.site.config['OUTPUT_FOLDER'] + if not os.path.isdir(out_dir): + print "Error: Missing '%s' folder?" % out_dir + else: + os.chdir(out_dir) + httpd = HTTPServer((options.address, options.port), + OurHTTPRequestHandler) + sa = httpd.socket.getsockname() + print "Serving HTTP on", sa[0], "port", sa[1], "..." + httpd.serve_forever() + + +class OurHTTPRequestHandler(SimpleHTTPRequestHandler): + extensions_map = dict(SimpleHTTPRequestHandler.extensions_map) + extensions_map[""] = "text/plain" diff --git a/nikola/plugins/compile_html.plugin b/nikola/plugins/compile_html.plugin new file mode 100644 index 0000000..f6cdfbc --- /dev/null +++ b/nikola/plugins/compile_html.plugin @@ -0,0 +1,10 @@ +[Core] +Name = html +Module = compile_html + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile HTML into HTML (just copy) + diff --git a/nikola/plugins/compile_html.py b/nikola/plugins/compile_html.py new file mode 100644 index 0000000..8241030 --- /dev/null +++ b/nikola/plugins/compile_html.py @@ -0,0 +1,20 @@ +"""Implementation of compile_html based on markdown.""" + +import os +import shutil + + +from nikola.plugin_categories import PageCompiler + + +class CompileHtml(PageCompiler): + """Compile HTML into HTML.""" + + name = "html" + + def compile_html(self, source, dest): + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + shutil.copyfile(source, dest) diff --git a/nikola/plugins/compile_markdown.plugin b/nikola/plugins/compile_markdown.plugin new file mode 100644 index 0000000..f3e119b --- /dev/null +++ b/nikola/plugins/compile_markdown.plugin @@ -0,0 +1,10 @@ +[Core] +Name = markdown +Module = compile_markdown + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile Markdown into HTML + diff --git a/nikola/plugins/compile_markdown/__init__.py b/nikola/plugins/compile_markdown/__init__.py new file mode 100644 index 0000000..958cfa3 --- /dev/null +++ b/nikola/plugins/compile_markdown/__init__.py @@ -0,0 +1,33 @@ +"""Implementation of compile_html based on markdown.""" + +import codecs +import os +import re + +from markdown import markdown + +from nikola.plugin_categories import PageCompiler + + +class CompileMarkdown(PageCompiler): + """Compile reSt into HTML.""" + + name = "markdown" + + def compile_html(self, source, dest): + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + output = markdown(data, ['fenced_code', 'codehilite']) + # remove the H1 because there is "title" h1. + output = re.sub(r'<h1>.*</h1>', '', output) + # python-markdown's highlighter uses the class 'codehilite' to wrap + # code, # instead of the standard 'code'. None of the standard + # pygments stylesheets use this class, so swap it to be 'code' + output = re.sub(r'(<div[^>]+class="[^"]*)codehilite([^>]+)', + r'\1code\2', output) + out_file.write(output) diff --git a/nikola/plugins/compile_rest.plugin b/nikola/plugins/compile_rest.plugin new file mode 100644 index 0000000..67eb562 --- /dev/null +++ b/nikola/plugins/compile_rest.plugin @@ -0,0 +1,10 @@ +[Core] +Name = rest +Module = compile_rest + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile reSt into HTML + diff --git a/nikola/plugins/compile_rest/__init__.py b/nikola/plugins/compile_rest/__init__.py new file mode 100644 index 0000000..0a25a06 --- /dev/null +++ b/nikola/plugins/compile_rest/__init__.py @@ -0,0 +1,79 @@ +import codecs +import os + +import docutils.core +import docutils.io +from docutils.parsers.rst import directives + +from pygments_code_block_directive import ( + code_block_directive, + listings_directive) +directives.register_directive('code-block', code_block_directive) +directives.register_directive('listing', listings_directive) + +import pygments_code_block_directive +# Below is to make pyflakes happy (sigh) +pygments_code_block_directive +from youtube import youtube +directives.register_directive('youtube', youtube) + +from nikola.plugin_categories import PageCompiler + + +class CompileRest(PageCompiler): + """Compile reSt into HTML.""" + + name = "rest" + + def compile_html(self, source, dest): + """Compile reSt into HTML.""" + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + error_level = 100 + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + output, error_level = rst2html(data, + settings_overrides={'initial_header_level': 2}) + out_file.write(output) + if error_level < 3: + return True + else: + return False + + +def rst2html(source, source_path=None, source_class=docutils.io.StringInput, + destination_path=None, + reader=None, reader_name='standalone', + parser=None, parser_name='restructuredtext', + writer=None, writer_name='html', + settings=None, settings_spec=None, + settings_overrides=None, config_section=None, + enable_exit_status=None): + """ + Set up & run a `Publisher`, and return a dictionary of document parts. + Dictionary keys are the names of parts, and values are Unicode strings; + encoding is up to the client. For programmatic use with string I/O. + + For encoded string input, be sure to set the 'input_encoding' setting to + the desired encoding. Set it to 'unicode' for unencoded Unicode string + input. Here's how:: + + publish_parts(..., settings_overrides={'input_encoding': 'unicode'}) + + Parameters: see `publish_programmatically`. + """ + output, pub = docutils.core.publish_programmatically( + source=source, source_path=source_path, source_class=source_class, + destination_class=docutils.io.StringOutput, + destination=None, destination_path=destination_path, + reader=reader, reader_name=reader_name, + parser=parser, parser_name=parser_name, + writer=writer, writer_name=writer_name, + settings=settings, settings_spec=settings_spec, + settings_overrides=settings_overrides, + config_section=config_section, + enable_exit_status=enable_exit_status) + return pub.writer.parts['fragment'], pub.document.reporter.max_level diff --git a/nikola/plugins/compile_rest/pygments_code_block_directive.py b/nikola/plugins/compile_rest/pygments_code_block_directive.py new file mode 100644 index 0000000..ac91f3c --- /dev/null +++ b/nikola/plugins/compile_rest/pygments_code_block_directive.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*-
+#$Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $
+#$Revision: 2443 $
+
+# :Author: a Pygments author|contributor; Felix Wiemann; Guenter Milde
+# :Date: $Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $
+# :Copyright: This module has been placed in the public domain.
+#
+# This is a merge of `Using Pygments in ReST documents`_ from the pygments_
+# documentation, and a `proof of concept`_ by Felix Wiemann.
+#
+# ========== ===========================================================
+# 2007-06-01 Removed redundancy from class values.
+# 2007-06-04 Merge of successive tokens of same type
+# (code taken from pygments.formatters.others).
+# 2007-06-05 Separate docutils formatter script
+# Use pygments' CSS class names (like the html formatter)
+# allowing the use of pygments-produced style sheets.
+# 2007-06-07 Merge in the formatting of the parsed tokens
+# (misnamed as docutils_formatter) as class DocutilsInterface
+# 2007-06-08 Failsave implementation (fallback to a standard literal block
+# if pygments not found)
+# ========== ===========================================================
+#
+# ::
+
+"""Define and register a code-block directive using pygments"""
+
+
+# Requirements
+# ------------
+# ::
+
+import codecs
+from copy import copy
+import os
+import urlparse
+
+from docutils import nodes, core
+from docutils.parsers.rst import directives
+
+pygments = None
+try:
+ import pygments
+ from pygments.lexers import get_lexer_by_name
+ from pygments.formatters.html import _get_ttype_class
+except ImportError:
+ pass
+
+
+# Customisation
+# -------------
+#
+# Do not insert inline nodes for the following tokens.
+# (You could add e.g. Token.Punctuation like ``['', 'p']``.) ::
+
+unstyled_tokens = ['']
+
+
+# DocutilsInterface
+# -----------------
+#
+# This interface class combines code from
+# pygments.formatters.html and pygments.formatters.others.
+#
+# It does not require anything of docutils and could also become a part of
+# pygments::
+
+class DocutilsInterface(object):
+ """Parse `code` string and yield "classified" tokens.
+
+ Arguments
+
+ code -- string of source code to parse
+ language -- formal language the code is written in.
+
+ Merge subsequent tokens of the same token-type.
+
+ Yields the tokens as ``(ttype_class, value)`` tuples,
+ where ttype_class is taken from pygments.token.STANDARD_TYPES and
+ corresponds to the class argument used in pygments html output.
+
+ """
+
+ def __init__(self, code, language, custom_args={}):
+ self.code = code
+ self.language = language
+ self.custom_args = custom_args
+
+ def lex(self):
+ """Get lexer for language (use text as fallback)"""
+ try:
+ if self.language and unicode(self.language).lower() != 'none':
+ lexer = get_lexer_by_name(self.language.lower(),
+ **self.custom_args
+ )
+ else:
+ lexer = get_lexer_by_name('text', **self.custom_args)
+ except ValueError:
+ # what happens if pygment isn't present ?
+ lexer = get_lexer_by_name('text')
+ return pygments.lex(self.code, lexer)
+
+ def join(self, tokens):
+ """join subsequent tokens of same token-type
+ """
+ tokens = iter(tokens)
+ (lasttype, lastval) = tokens.next()
+ for ttype, value in tokens:
+ if ttype is lasttype:
+ lastval += value
+ else:
+ yield(lasttype, lastval)
+ (lasttype, lastval) = (ttype, value)
+ yield(lasttype, lastval)
+
+ def __iter__(self):
+ """parse code string and yield "clasified" tokens
+ """
+ try:
+ tokens = self.lex()
+ except IOError:
+ yield ('', self.code)
+ return
+
+ for ttype, value in self.join(tokens):
+ yield (_get_ttype_class(ttype), value)
+
+
+# code_block_directive
+# --------------------
+# ::
+
+def code_block_directive(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ """Parse and classify content of a code_block."""
+ if 'include' in options:
+ try:
+ if 'encoding' in options:
+ encoding = options['encoding']
+ else:
+ encoding = 'utf-8'
+ content = codecs.open(
+ options['include'], 'r', encoding).read().rstrip()
+ except (IOError, UnicodeError): # no file or problem reading it
+ content = u''
+ line_offset = 0
+ if content:
+ # here we define the start-at and end-at options
+ # so that limit is included in extraction
+ # this is different than the start-after directive of docutils
+ # (docutils/parsers/rst/directives/misc.py L73+)
+ # which excludes the beginning
+ # the reason is we want to be able to define a start-at like
+ # def mymethod(self)
+ # and have such a definition included
+
+ after_text = options.get('start-at', None)
+ if after_text:
+ # skip content in include_text before
+ # *and NOT incl.* a matching text
+ after_index = content.find(after_text)
+ if after_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "start-at" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['start-at'])
+ content = content[after_index:]
+ line_offset = len(content[:after_index].splitlines())
+
+ after_text = options.get('start-after', None)
+ if after_text:
+ # skip content in include_text before
+ # *and incl.* a matching text
+ after_index = content.find(after_text)
+ if after_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "start-after" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['start-after'])
+ line_offset = len(content[:after_index +
+ len(after_text)].splitlines())
+ content = content[after_index + len(after_text):]
+
+ # same changes here for the same reason
+ before_text = options.get('end-at', None)
+ if before_text:
+ # skip content in include_text after
+ # *and incl.* a matching text
+ before_index = content.find(before_text)
+ if before_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "end-at" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['end-at'])
+ content = content[:before_index + len(before_text)]
+
+ before_text = options.get('end-before', None)
+ if before_text:
+ # skip content in include_text after
+ # *and NOT incl.* a matching text
+ before_index = content.find(before_text)
+ if before_index < 0:
+ raise state_machine.reporter.severe(
+ 'Problem with "end-before" option of "%s" '
+ 'code-block directive:\nText not found.' %
+ options['end-before'])
+ content = content[:before_index]
+
+ else:
+ content = u'\n'.join(content)
+
+ if 'tabsize' in options:
+ tabw = options['tabsize']
+ else:
+ tabw = int(options.get('tab-width', 8))
+
+ content = content.replace('\t', ' ' * tabw)
+
+ withln = "linenos" in options
+ if not "linenos_offset" in options:
+ line_offset = 0
+
+ language = arguments[0]
+ # create a literal block element and set class argument
+ code_block = nodes.literal_block(classes=["code", language])
+
+ if withln:
+ lineno = 1 + line_offset
+ total_lines = content.count('\n') + 1 + line_offset
+ lnwidth = len(str(total_lines))
+ fstr = "\n%%%dd " % lnwidth
+ code_block += nodes.inline(fstr[1:] % lineno, fstr[1:] % lineno,
+ classes=['linenumber'])
+
+ # parse content with pygments and add to code_block element
+ content = content.rstrip()
+ if pygments is None:
+ code_block += nodes.Text(content, content)
+ else:
+ # The [:-1] is because pygments adds a trailing \n which looks bad
+ l = list(DocutilsInterface(content, language, options))
+ if l[-1] == ('', u'\n'):
+ l = l[:-1]
+ for cls, value in l:
+ if withln and "\n" in value:
+ # Split on the "\n"s
+ values = value.split("\n")
+ # The first piece, pass as-is
+ code_block += nodes.Text(values[0], values[0])
+ # On the second and later pieces, insert \n and linenos
+ linenos = range(lineno, lineno + len(values))
+ for chunk, ln in zip(values, linenos)[1:]:
+ if ln <= total_lines:
+ code_block += nodes.inline(fstr % ln, fstr % ln,
+ classes=['linenumber'])
+ code_block += nodes.Text(chunk, chunk)
+ lineno += len(values) - 1
+
+ elif cls in unstyled_tokens:
+ # insert as Text to decrease the verbosity of the output.
+ code_block += nodes.Text(value, value)
+ else:
+ code_block += nodes.inline(value, value, classes=[cls])
+
+ return [code_block]
+
+# Custom argument validators
+# --------------------------
+# ::
+#
+# Move to separated module??
+
+
+def string_list(argument):
+ """
+ Converts a space- or comma-separated list of values into a python list
+ of strings.
+ (Directive option conversion function)
+ Based in positive_int_list of docutils.parsers.rst.directives
+ """
+ if ',' in argument:
+ entries = argument.split(',')
+ else:
+ entries = argument.split()
+ return entries
+
+
+def string_bool(argument):
+ """
+ Converts True, true, False, False in python boolean values
+ """
+ if argument is None:
+ msg = 'argument required but none supplied; choose "True" or "False"'
+ raise ValueError(msg)
+
+ elif argument.lower() == 'true':
+ return True
+ elif argument.lower() == 'false':
+ return False
+ else:
+ raise ValueError('"%s" unknown; choose from "True" or "False"'
+ % argument)
+
+
+def csharp_unicodelevel(argument):
+ return directives.choice(argument, ('none', 'basic', 'full'))
+
+
+def lhs_litstyle(argument):
+ return directives.choice(argument, ('bird', 'latex'))
+
+
+def raw_compress(argument):
+ return directives.choice(argument, ('gz', 'bz2'))
+
+
+def listings_directive(name, arguments, options, content, lineno,
+ content_offset, block_text, state, state_machine):
+ fname = arguments[0]
+ options['include'] = os.path.join('listings', fname)
+ target = urlparse.urlunsplit(("link", 'listing', fname, '', ''))
+ generated_nodes = [core.publish_doctree('`%s <%s>`_' % (fname, target))[0]]
+ generated_nodes += code_block_directive(name, [arguments[1]],
+ options, content, lineno, content_offset, block_text,
+ state, state_machine)
+ return generated_nodes
+
+code_block_directive.arguments = (1, 0, 1)
+listings_directive.arguments = (2, 0, 1)
+code_block_directive.content = 1
+listings_directive.content = 1
+code_block_directive.options = {'include': directives.unchanged_required,
+ 'start-at': directives.unchanged_required,
+ 'end-at': directives.unchanged_required,
+ 'start-after': directives.unchanged_required,
+ 'end-before': directives.unchanged_required,
+ 'linenos': directives.unchanged,
+ 'linenos_offset': directives.unchanged,
+ 'tab-width': directives.unchanged,
+ # generic
+ 'stripnl': string_bool,
+ 'stripall': string_bool,
+ 'ensurenl': string_bool,
+ 'tabsize': directives.positive_int,
+ 'encoding': directives.encoding,
+ # Lua
+ 'func_name_hightlighting': string_bool,
+ 'disabled_modules': string_list,
+ # Python Console
+ 'python3': string_bool,
+ # Delphi
+ 'turbopascal': string_bool,
+ 'delphi': string_bool,
+ 'freepascal': string_bool,
+ 'units': string_list,
+ # Modula2
+ 'pim': string_bool,
+ 'iso': string_bool,
+ 'objm2': string_bool,
+ 'gm2ext': string_bool,
+ # CSharp
+ 'unicodelevel': csharp_unicodelevel,
+ # Literate haskell
+ 'litstyle': lhs_litstyle,
+ # Raw
+ 'compress': raw_compress,
+ # Rst
+ 'handlecodeblocks': string_bool,
+ # Php
+ 'startinline': string_bool,
+ 'funcnamehighlighting': string_bool,
+ 'disabledmodules': string_list,
+ }
+
+listings_directive.options = copy(code_block_directive.options)
+listings_directive.options.pop('include')
+
+# .. _doctutils: http://docutils.sf.net/
+# .. _pygments: http://pygments.org/
+# .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/
+# .. _proof of concept:
+# http://article.gmane.org/gmane.text.docutils.user/3689
+#
+# Test output
+# -----------
+#
+# If called from the command line, call the docutils publisher to render the
+# input::
+
+if __name__ == '__main__':
+ from docutils.core import publish_cmdline, default_description
+ from docutils.parsers.rst import directives
+ directives.register_directive('code-block', code_block_directive)
+ description = "code-block directive test output" + default_description
+ try:
+ import locale
+ locale.setlocale(locale.LC_ALL, '')
+ except Exception:
+ pass
+ publish_cmdline(writer_name='html', description=description)
diff --git a/nikola/plugins/compile_rest/youtube.py b/nikola/plugins/compile_rest/youtube.py new file mode 100644 index 0000000..584160b --- /dev/null +++ b/nikola/plugins/compile_rest/youtube.py @@ -0,0 +1,33 @@ +from docutils import nodes +from docutils.parsers.rst import directives + +CODE = """\ +<iframe width="%(width)s" +height="%(height)s" +src="http://www.youtube.com/embed/%(yid)s?rel=0&hd=1&wmode=transparent"> +</iframe> +""" + + +def youtube(name, args, options, content, lineno, + contentOffset, blockText, state, stateMachine): + """ Restructured text extension for inserting youtube embedded videos """ + if len(content) == 0: + return + string_vars = { + 'yid': content[0], + 'width': 425, + 'height': 344, + 'extra': '' + } + extra_args = content[1:] # Because content[0] is ID + extra_args = [ea.strip().split("=") for ea in extra_args] # key=value + extra_args = [ea for ea in extra_args if len(ea) == 2] # drop bad lines + extra_args = dict(extra_args) + if 'width' in extra_args: + string_vars['width'] = extra_args.pop('width') + if 'height' in extra_args: + string_vars['height'] = extra_args.pop('height') + return [nodes.raw('', CODE % (string_vars), format='html')] +youtube.content = True +directives.register_directive('youtube', youtube) diff --git a/nikola/plugins/task_archive.plugin b/nikola/plugins/task_archive.plugin new file mode 100644 index 0000000..23f93ed --- /dev/null +++ b/nikola/plugins/task_archive.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_archive +Module = task_archive + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generates the blog's archive pages. + diff --git a/nikola/plugins/task_archive.py b/nikola/plugins/task_archive.py new file mode 100644 index 0000000..4c97101 --- /dev/null +++ b/nikola/plugins/task_archive.py @@ -0,0 +1,77 @@ +import os + +from nikola.plugin_categories import Task +from nikola.utils import config_changed + + +class Archive(Task): + """Render the post archives.""" + + name = "render_archive" + + def gen_tasks(self): + kw = { + "messages": self.site.MESSAGES, + "translations": self.site.config['TRANSLATIONS'], + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + self.site.scan_posts() + # TODO add next/prev links for years + template_name = "list.tmpl" + # TODO: posts_per_year is global, kill it + for year, posts in self.site.posts_per_year.items(): + for lang in kw["translations"]: + output_name = os.path.join( + kw['output_folder'], self.site.path("archive", year, lang)) + post_list = [self.site.global_data[post] for post in posts] + post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) + post_list.reverse() + context = {} + context["lang"] = lang + context["items"] = [("[%s] %s" % + (post.date, post.title(lang)), post.permalink(lang)) + for post in post_list] + context["permalink"] = self.site.link("archive", year, lang) + context["title"] = kw["messages"][lang]["Posts for year %s"]\ + % year + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task + + # And global "all your years" page + years = self.site.posts_per_year.keys() + years.sort(reverse=True) + template_name = "list.tmpl" + kw['years'] = years + for lang in kw["translations"]: + context = {} + output_name = os.path.join( + kw['output_folder'], self.site.path("archive", None, lang)) + context["title"] = kw["messages"][lang]["Archive"] + context["items"] = [(year, self.site.link("archive", year, lang)) + for year in years] + context["permalink"] = self.site.link("archive", None, lang) + for task in self.site.generic_post_list_renderer( + lang, + [], + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task diff --git a/nikola/plugins/task_copy_assets.plugin b/nikola/plugins/task_copy_assets.plugin new file mode 100644 index 0000000..b11133f --- /dev/null +++ b/nikola/plugins/task_copy_assets.plugin @@ -0,0 +1,10 @@ +[Core] +Name = copy_assets +Module = task_copy_assets + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Copy theme assets into output. + diff --git a/nikola/plugins/task_copy_assets.py b/nikola/plugins/task_copy_assets.py new file mode 100644 index 0000000..ac31fd7 --- /dev/null +++ b/nikola/plugins/task_copy_assets.py @@ -0,0 +1,35 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class CopyAssets(Task): + """Copy theme assets into output.""" + + name = "copy_assets" + + def gen_tasks(self): + """Create tasks to copy the assets of the whole theme chain. + + If a file is present on two themes, use the version + from the "youngest" theme. + """ + + kw = { + "themes": self.site.THEMES, + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + + tasks = {} + for theme_name in kw['themes']: + src = os.path.join(utils.get_theme_path(theme_name), 'assets') + dst = os.path.join(kw['output_folder'], 'assets') + for task in utils.copy_tree(src, dst): + if task['name'] in tasks: + continue + tasks[task['name']] = task + task['uptodate'] = [utils.config_changed(kw)] + task['basename'] = self.name + yield utils.apply_filters(task, kw['filters']) diff --git a/nikola/plugins/task_copy_files.plugin b/nikola/plugins/task_copy_files.plugin new file mode 100644 index 0000000..0bfc5be --- /dev/null +++ b/nikola/plugins/task_copy_files.plugin @@ -0,0 +1,10 @@ +[Core] +Name = copy_files +Module = task_copy_files + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Copy static files into the output. + diff --git a/nikola/plugins/task_copy_files.py b/nikola/plugins/task_copy_files.py new file mode 100644 index 0000000..a053905 --- /dev/null +++ b/nikola/plugins/task_copy_files.py @@ -0,0 +1,35 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class CopyFiles(Task): + """Copy static files into the output folder.""" + + name = "copy_files" + + def gen_tasks(self): + """Copy static files into the output folder.""" + + kw = { + 'files_folders': self.site.config['FILES_FOLDERS'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + 'filters': self.site.config['FILTERS'], + } + + flag = False + for src in kw['files_folders']: + dst = kw['output_folder'] + filters = kw['filters'] + real_dst = os.path.join(dst, kw['files_folders'][src]) + for task in utils.copy_tree(src, real_dst, link_cutoff=dst): + flag = True + task['basename'] = self.name + task['uptodate'] = [utils.config_changed(kw)] + yield utils.apply_filters(task, filters) + if not flag: + yield { + 'basename': self.name, + 'actions': (), + } diff --git a/nikola/plugins/task_create_bundles.plugin b/nikola/plugins/task_create_bundles.plugin new file mode 100644 index 0000000..5d4f6d3 --- /dev/null +++ b/nikola/plugins/task_create_bundles.plugin @@ -0,0 +1,10 @@ +[Core] +Name = create_bundles +Module = task_create_bundles + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Theme bundles using WebAssets + diff --git a/nikola/plugins/task_create_bundles.py b/nikola/plugins/task_create_bundles.py new file mode 100644 index 0000000..ebca0b7 --- /dev/null +++ b/nikola/plugins/task_create_bundles.py @@ -0,0 +1,85 @@ +import os + +try: + import webassets +except ImportError: + webassets = None # NOQA + +from nikola.plugin_categories import LateTask +from nikola import utils + + +class BuildBundles(LateTask): + """Bundle assets using WebAssets.""" + + name = "build_bundles" + + def set_site(self, site): + super(BuildBundles, self).set_site(site) + if webassets is None: + self.site.config['USE_BUNDLES'] = False + + def gen_tasks(self): + """Bundle assets using WebAssets.""" + + kw = { + 'filters': self.site.config['FILTERS'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + 'theme_bundles': get_theme_bundles(self.site.THEMES), + } + + def build_bundle(output, inputs): + out_dir = os.path.join(kw['output_folder'], os.path.dirname(output)) + inputs = [i for i in inputs if os.path.isfile( + os.path.join(out_dir, i))] + cache_dir = os.path.join('cache', 'webassets') + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + env = webassets.Environment(out_dir, os.path.dirname(output), + cache=cache_dir) + bundle = webassets.Bundle(*inputs, + output=os.path.basename(output)) + env.register(output, bundle) + # This generates the file + env[output].urls() + + flag = False + if webassets is not None and self.site.config['USE_BUNDLES'] is not False: + for name, files in kw['theme_bundles'].items(): + output_path = os.path.join(kw['output_folder'], name) + dname = os.path.dirname(name) + file_dep = [os.path.join('output', dname, fname) + for fname in files] + task = { + 'file_dep': file_dep, + 'basename': self.name, + 'name': output_path, + 'actions': [(build_bundle, (name, files))], + 'targets': [output_path], + 'uptodate': [utils.config_changed(kw)] + } + flag = True + yield utils.apply_filters(task, kw['filters']) + if flag is False: # No page rendered, yield a dummy task + yield { + 'basename': self.name, + 'uptodate': [True], + 'name': 'None', + 'actions': [], + } + + +def get_theme_bundles(themes): + """Given a theme chain, return the bundle definitions.""" + bundles = {} + for theme_name in themes: + bundles_path = os.path.join( + utils.get_theme_path(theme_name), 'bundles') + if os.path.isfile(bundles_path): + with open(bundles_path) as fd: + for line in fd: + name, files = line.split('=') + files = [f.strip() for f in files.split(',')] + bundles[name.strip()] = files + break + return bundles diff --git a/nikola/plugins/task_indexes.plugin b/nikola/plugins/task_indexes.plugin new file mode 100644 index 0000000..1536006 --- /dev/null +++ b/nikola/plugins/task_indexes.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_index +Module = task_indexes + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generates the blog's index pages. + diff --git a/nikola/plugins/task_indexes.py b/nikola/plugins/task_indexes.py new file mode 100644 index 0000000..2311ef3 --- /dev/null +++ b/nikola/plugins/task_indexes.py @@ -0,0 +1,81 @@ +import os + +from nikola.plugin_categories import Task +from nikola.utils import config_changed + + +class Indexes(Task): + """Render the blog indexes.""" + + name = "render_indexes" + + def gen_tasks(self): + self.site.scan_posts() + + kw = { + "translations": self.site.config['TRANSLATIONS'], + "index_display_post_count": + self.site.config['INDEX_DISPLAY_POST_COUNT'], + "messages": self.site.MESSAGES, + "index_teasers": self.site.config['INDEX_TEASERS'], + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + + template_name = "index.tmpl" + # TODO: timeline is global, get rid of it + posts = [x for x in self.site.timeline if x.use_in_feeds] + # Split in smaller lists + lists = [] + while posts: + lists.append(posts[:kw["index_display_post_count"]]) + posts = posts[kw["index_display_post_count"]:] + num_pages = len(lists) + if not lists: + yield { + 'basename': 'render_indexes', + 'actions': [], + } + for lang in kw["translations"]: + for i, post_list in enumerate(lists): + context = {} + if self.site.config.get("INDEXES_TITLE", ""): + indexes_title = self.site.config['INDEXES_TITLE'] + else: + indexes_title = self.site.config["BLOG_TITLE"] + if not i: + output_name = "index.html" + context["title"] = indexes_title + else: + output_name = "index-%s.html" % i + if self.site.config.get("INDEXES_PAGES", ""): + indexes_pages = self.site.config["INDEXES_PAGES"] % i + else: + indexes_pages = " (" + \ + kw["messages"][lang]["old posts page %d"] % i + ")" + context["title"] = indexes_title + indexes_pages + context["prevlink"] = None + context["nextlink"] = None + context['index_teasers'] = kw['index_teasers'] + if i > 1: + context["prevlink"] = "index-%s.html" % (i - 1) + if i == 1: + context["prevlink"] = "index.html" + if i < num_pages - 1: + context["nextlink"] = "index-%s.html" % (i + 1) + context["permalink"] = self.site.link("index", i, lang) + output_name = os.path.join( + kw['output_folder'], self.site.path("index", i, lang)) + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = 'render_indexes' + yield task diff --git a/nikola/plugins/task_redirect.plugin b/nikola/plugins/task_redirect.plugin new file mode 100644 index 0000000..285720b --- /dev/null +++ b/nikola/plugins/task_redirect.plugin @@ -0,0 +1,10 @@ +[Core] +Name = redirect +Module = task_redirect + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create redirect pages. + diff --git a/nikola/plugins/task_redirect.py b/nikola/plugins/task_redirect.py new file mode 100644 index 0000000..7c2ccb1 --- /dev/null +++ b/nikola/plugins/task_redirect.py @@ -0,0 +1,48 @@ +import codecs +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class Redirect(Task): + """Copy theme assets into output.""" + + name = "redirect" + + def gen_tasks(self): + """Generate redirections tasks.""" + + kw = { + 'redirections': self.site.config['REDIRECTIONS'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + } + + if not kw['redirections']: + # If there are no redirections, still needs to create a + # dummy action so dependencies don't fail + yield { + 'basename': self.name, + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } + + else: + for src, dst in kw["redirections"]: + src_path = os.path.join(kw["output_folder"], src) + yield { + 'basename': self.name, + 'name': src_path, + 'targets': [src_path], + 'actions': [(create_redirect, (src_path, dst))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + +def create_redirect(src, dst): + with codecs.open(src, "wb+", "utf8") as fd: + fd.write(('<head>' + + '<meta HTTP-EQUIV="REFRESH" content="0; url=%s">' + + '</head>') % dst) diff --git a/nikola/plugins/task_render_galleries.plugin b/nikola/plugins/task_render_galleries.plugin new file mode 100644 index 0000000..e0a86c0 --- /dev/null +++ b/nikola/plugins/task_render_galleries.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_galleries +Module = task_render_galleries + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create image galleries automatically. + diff --git a/nikola/plugins/task_render_galleries.py b/nikola/plugins/task_render_galleries.py new file mode 100644 index 0000000..27e13ea --- /dev/null +++ b/nikola/plugins/task_render_galleries.py @@ -0,0 +1,305 @@ +import codecs +import datetime +import glob +import os +import uuid + +Image = None +try: + import Image as _Image + import ExifTags + Image = _Image +except ImportError: + try: + from PIL import Image, ExifTags # NOQA + except ImportError: + pass + + +from nikola.plugin_categories import Task +from nikola import utils + + +class Galleries(Task): + """Copy theme assets into output.""" + + name = "render_galleries" + dates = {} + + def gen_tasks(self): + """Render image galleries.""" + + kw = { + 'thumbnail_size': self.site.config['THUMBNAIL_SIZE'], + 'max_image_size': self.site.config['MAX_IMAGE_SIZE'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + 'default_lang': self.site.config['DEFAULT_LANG'], + 'blog_description': self.site.config['BLOG_DESCRIPTION'], + 'use_filename_as_title': self.site.config['USE_FILENAME_AS_TITLE'], + } + + # FIXME: lots of work is done even when images don't change, + # which should be moved into the task. + + template_name = "gallery.tmpl" + + gallery_list = [] + for root, dirs, files in os.walk('galleries'): + gallery_list.append(root) + if not gallery_list: + yield { + 'basename': 'render_galleries', + 'actions': [], + } + return + + # gallery_path is "gallery/name" + for gallery_path in gallery_list: + # gallery_name is "name" + splitted = gallery_path.split(os.sep)[1:] + if not splitted: + gallery_name = '' + else: + gallery_name = os.path.join(*splitted) + # output_gallery is "output/GALLERY_PATH/name" + output_gallery = os.path.dirname(os.path.join(kw["output_folder"], + self.site.path("gallery", gallery_name, None))) + if not os.path.isdir(output_gallery): + yield { + 'basename': 'render_galleries', + 'name': output_gallery, + 'actions': [(os.makedirs, (output_gallery,))], + 'targets': [output_gallery], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + # image_list contains "gallery/name/image_name.jpg" + image_list = glob.glob(gallery_path + "/*jpg") +\ + glob.glob(gallery_path + "/*JPG") +\ + glob.glob(gallery_path + "/*PNG") +\ + glob.glob(gallery_path + "/*png") + + # Filter ignore images + try: + def add_gallery_path(index): + return "{0}/{1}".format(gallery_path, index) + + exclude_path = os.path.join(gallery_path, "exclude.meta") + try: + f = open(exclude_path, 'r') + excluded_image_name_list = f.read().split() + except IOError: + excluded_image_name_list = [] + + excluded_image_list = map(add_gallery_path, + excluded_image_name_list) + image_set = set(image_list) - set(excluded_image_list) + image_list = list(image_set) + except IOError: + pass + + # List of sub-galleries + folder_list = [x.split(os.sep)[-2] for x in + glob.glob(os.path.join(gallery_path, '*') + os.sep)] + + crumbs = gallery_path.split(os.sep)[:-1] + crumbs.append(os.path.basename(gallery_name)) + # TODO: write this in human + paths = ['/'.join(['..'] * (len(crumbs) - 1 - i)) for i in + range(len(crumbs[:-1]))] + ['#'] + crumbs = zip(paths, crumbs) + + image_list = [x for x in image_list if "thumbnail" not in x] + # Sort by date + image_list.sort(cmp=lambda a, b: cmp( + self.image_date(a), self.image_date(b))) + image_name_list = [os.path.basename(x) for x in image_list] + + thumbs = [] + # Do thumbnails and copy originals + for img, img_name in zip(image_list, image_name_list): + # img is "galleries/name/image_name.jpg" + # img_name is "image_name.jpg" + # fname, ext are "image_name", ".jpg" + fname, ext = os.path.splitext(img_name) + # thumb_path is + # "output/GALLERY_PATH/name/image_name.thumbnail.jpg" + thumb_path = os.path.join(output_gallery, + fname + ".thumbnail" + ext) + # thumb_path is "output/GALLERY_PATH/name/image_name.jpg" + orig_dest_path = os.path.join(output_gallery, img_name) + thumbs.append(os.path.basename(thumb_path)) + yield { + 'basename': 'render_galleries', + 'name': thumb_path, + 'file_dep': [img], + 'targets': [thumb_path], + 'actions': [ + (self.resize_image, + (img, thumb_path, kw['thumbnail_size'])) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + yield { + 'basename': 'render_galleries', + 'name': orig_dest_path, + 'file_dep': [img], + 'targets': [orig_dest_path], + 'actions': [ + (self.resize_image, + (img, orig_dest_path, kw['max_image_size'])) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + # Remove excluded images + if excluded_image_name_list: + for img, img_name in zip(excluded_image_list, + excluded_image_name_list): + # img_name is "image_name.jpg" + # fname, ext are "image_name", ".jpg" + fname, ext = os.path.splitext(img_name) + excluded_thumb_dest_path = os.path.join(output_gallery, + fname + ".thumbnail" + ext) + excluded_dest_path = os.path.join(output_gallery, img_name) + yield { + 'basename': 'render_galleries', + 'name': excluded_thumb_dest_path, + 'file_dep': [exclude_path], + #'targets': [excluded_thumb_dest_path], + 'actions': [ + (utils.remove_file, (excluded_thumb_dest_path,)) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + yield { + 'basename': 'render_galleries', + 'name': excluded_dest_path, + 'file_dep': [exclude_path], + #'targets': [excluded_dest_path], + 'actions': [ + (utils.remove_file, (excluded_dest_path,)) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + output_name = os.path.join(output_gallery, "index.html") + context = {} + context["lang"] = kw["default_lang"] + context["title"] = os.path.basename(gallery_path) + context["description"] = kw["blog_description"] + if kw['use_filename_as_title']: + img_titles = ['title="%s"' % utils.unslugify(fn[:-4]) + for fn in image_name_list] + else: + img_titles = [''] * len(image_name_list) + context["images"] = zip(image_name_list, thumbs, img_titles) + context["folders"] = folder_list + context["crumbs"] = crumbs + context["permalink"] = self.site.link( + "gallery", gallery_name, None) + + # Use galleries/name/index.txt to generate a blurb for + # the gallery, if it exists + index_path = os.path.join(gallery_path, "index.txt") + cache_dir = os.path.join('cache', 'galleries') + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + index_dst_path = os.path.join(cache_dir, unicode(uuid.uuid1())+'.html') + if os.path.exists(index_path): + compile_html = self.site.get_compiler(index_path) + yield { + 'basename': 'render_galleries', + 'name': index_dst_path.encode('utf-8'), + 'file_dep': [index_path], + 'targets': [index_dst_path], + 'actions': [(compile_html, + [index_path, index_dst_path])], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + file_dep = self.site.template_system.template_deps( + template_name) + image_list + + def render_gallery(output_name, context, index_dst_path): + if os.path.exists(index_dst_path): + with codecs.open(index_dst_path, "rb", "utf8") as fd: + context['text'] = fd.read() + file_dep.append(index_dst_path) + else: + context['text'] = '' + self.site.render_template(template_name, output_name, context) + + yield { + 'basename': 'render_galleries', + 'name': output_name, + 'file_dep': file_dep, + 'targets': [output_name], + 'actions': [(render_gallery, + (output_name, context, index_dst_path))], + 'clean': True, + 'uptodate': [utils.config_changed({ + 1: kw, + 2: self.site.config['GLOBAL_CONTEXT']})], + } + + def resize_image(self, src, dst, max_size): + """Make a copy of the image in the requested size.""" + if not Image: + utils.copy_file(src, dst) + return + im = Image.open(src) + w, h = im.size + if w > max_size or h > max_size: + size = max_size, max_size + try: + exif = im._getexif() + except Exception: + exif = None + if exif is not None: + for tag, value in exif.items(): + decoded = ExifTags.TAGS.get(tag, tag) + + if decoded == 'Orientation': + if value == 3: + im = im.rotate(180) + elif value == 6: + im = im.rotate(270) + elif value == 8: + im = im.rotate(90) + + break + + im.thumbnail(size, Image.ANTIALIAS) + im.save(dst) + + else: + utils.copy_file(src, dst) + + def image_date(self, src): + """Try to figure out the date of the image.""" + if src not in self.dates: + im = Image.open(src) + try: + exif = im._getexif() + except Exception: + exif = None + if exif is not None: + for tag, value in exif.items(): + decoded = ExifTags.TAGS.get(tag, tag) + if decoded == 'DateTimeOriginal': + try: + self.dates[src] = datetime.datetime.strptime( + value, r'%Y:%m:%d %H:%M:%S') + break + except ValueError: # Invalid EXIF date. + pass + if src not in self.dates: + self.dates[src] = datetime.datetime.fromtimestamp( + os.stat(src).st_mtime) + return self.dates[src] diff --git a/nikola/plugins/task_render_listings.plugin b/nikola/plugins/task_render_listings.plugin new file mode 100644 index 0000000..1f897b9 --- /dev/null +++ b/nikola/plugins/task_render_listings.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_listings +Module = task_render_listings + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Render code listings into output + diff --git a/nikola/plugins/task_render_listings.py b/nikola/plugins/task_render_listings.py new file mode 100644 index 0000000..7ec6e42 --- /dev/null +++ b/nikola/plugins/task_render_listings.py @@ -0,0 +1,81 @@ +import os + +from pygments import highlight +from pygments.lexers import get_lexer_for_filename, TextLexer +from pygments.formatters import HtmlFormatter + +from nikola.plugin_categories import Task +from nikola import utils + + +class Listings(Task): + """Render pretty listings.""" + + name = "render_listings" + + def gen_tasks(self): + """Render pretty code listings.""" + kw = { + "default_lang": self.site.config["DEFAULT_LANG"], + "listings_folder": self.site.config["LISTINGS_FOLDER"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + } + + # Things to ignore in listings + ignored_extensions = (".pyc",) + + def render_listing(in_name, out_name): + with open(in_name, 'r') as fd: + try: + lexer = get_lexer_for_filename(in_name) + except: + lexer = TextLexer() + code = highlight(fd.read(), lexer, + HtmlFormatter(cssclass='code', + linenos="table", + nowrap=False, + lineanchors=utils.slugify(f), + anchorlinenos=True)) + title = os.path.basename(in_name) + crumbs = out_name.split(os.sep)[1:-1] + [title] + # TODO: write this in human + paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in + range(len(crumbs[:-2]))] + ['.', '#'] + context = { + 'code': code, + 'title': title, + 'crumbs': zip(paths, crumbs), + 'lang': kw['default_lang'], + 'description': title, + } + self.site.render_template('listing.tmpl', out_name, context) + flag = True + template_deps = self.site.template_system.template_deps('listing.tmpl') + for root, dirs, files in os.walk(kw['listings_folder']): + # Render all files + for f in files: + ext = os.path.splitext(f)[-1] + if ext in ignored_extensions: + continue + flag = False + in_name = os.path.join(root, f) + out_name = os.path.join( + kw['output_folder'], + root, + f) + '.html' + yield { + 'basename': self.name, + 'name': out_name.encode('utf8'), + 'file_dep': template_deps + [in_name], + 'targets': [out_name], + 'actions': [(render_listing, [in_name, out_name])], + # This is necessary to reflect changes in blog title, + # sidebar links, etc. + 'uptodate': [utils.config_changed( + self.site.config['GLOBAL_CONTEXT'])] + } + if flag: + yield { + 'basename': self.name, + 'actions': [], + } diff --git a/nikola/plugins/task_render_pages.plugin b/nikola/plugins/task_render_pages.plugin new file mode 100644 index 0000000..e2a358c --- /dev/null +++ b/nikola/plugins/task_render_pages.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_pages +Module = task_render_pages + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create pages in the output. + diff --git a/nikola/plugins/task_render_pages.py b/nikola/plugins/task_render_pages.py new file mode 100644 index 0000000..954dc47 --- /dev/null +++ b/nikola/plugins/task_render_pages.py @@ -0,0 +1,35 @@ +from nikola.plugin_categories import Task +from nikola.utils import config_changed + + +class RenderPages(Task): + """Render pages into output.""" + + name = "render_pages" + + def gen_tasks(self): + """Build final pages from metadata and HTML fragments.""" + kw = { + "post_pages": self.site.config["post_pages"], + "translations": self.site.config["TRANSLATIONS"], + "filters": self.site.config["FILTERS"], + } + self.site.scan_posts() + flag = False + for lang in kw["translations"]: + for wildcard, destination, template_name, _ in kw["post_pages"]: + for task in self.site.generic_page_renderer(lang, + wildcard, template_name, destination, kw["filters"]): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + flag = True + yield task + if flag is False: # No page rendered, yield a dummy task + yield { + 'basename': self.name, + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } diff --git a/nikola/plugins/task_render_posts.plugin b/nikola/plugins/task_render_posts.plugin new file mode 100644 index 0000000..0d19ea9 --- /dev/null +++ b/nikola/plugins/task_render_posts.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_posts +Module = task_render_posts + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create HTML fragments out of posts. + diff --git a/nikola/plugins/task_render_posts.py b/nikola/plugins/task_render_posts.py new file mode 100644 index 0000000..44888f2 --- /dev/null +++ b/nikola/plugins/task_render_posts.py @@ -0,0 +1,52 @@ +from copy import copy +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class RenderPosts(Task): + """Build HTML fragments from metadata and text.""" + + name = "render_posts" + + def gen_tasks(self): + """Build HTML fragments from metadata and text.""" + self.site.scan_posts() + kw = { + "translations": self.site.config["TRANSLATIONS"], + "timeline": self.site.timeline, + "default_lang": self.site.config["DEFAULT_LANG"], + } + + flag = False + for lang in kw["translations"]: + # TODO: timeline is global, get rid of it + deps_dict = copy(kw) + deps_dict.pop('timeline') + for post in kw['timeline']: + source = post.source_path + dest = post.base_path + if lang != kw["default_lang"]: + dest += '.' + lang + source_lang = source + '.' + lang + if os.path.exists(source_lang): + source = source_lang + flag = True + yield { + 'basename': self.name, + 'name': dest.encode('utf-8'), + 'file_dep': post.fragment_deps(lang), + 'targets': [dest], + 'actions': [(self.site.get_compiler(post.source_path), + [source, dest])], + 'clean': True, + 'uptodate': [utils.config_changed(deps_dict)], + } + if flag is False: # Return a dummy task + yield { + 'basename': self.name, + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } diff --git a/nikola/plugins/task_render_rss.plugin b/nikola/plugins/task_render_rss.plugin new file mode 100644 index 0000000..20caf15 --- /dev/null +++ b/nikola/plugins/task_render_rss.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_rss +Module = task_render_rss + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generate RSS feeds. + diff --git a/nikola/plugins/task_render_rss.py b/nikola/plugins/task_render_rss.py new file mode 100644 index 0000000..bee1192 --- /dev/null +++ b/nikola/plugins/task_render_rss.py @@ -0,0 +1,41 @@ +import os + +from nikola import utils +from nikola.plugin_categories import Task + + +class RenderRSS(Task): + """Generate RSS feeds.""" + + name = "render_rss" + + def gen_tasks(self): + """Generate RSS feeds.""" + kw = { + "translations": self.site.config["TRANSLATIONS"], + "filters": self.site.config["FILTERS"], + "blog_title": self.site.config["BLOG_TITLE"], + "blog_url": self.site.config["BLOG_URL"], + "blog_description": self.site.config["BLOG_DESCRIPTION"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + } + self.site.scan_posts() + # TODO: timeline is global, kill it + for lang in kw["translations"]: + output_name = os.path.join(kw['output_folder'], + self.site.path("rss", None, lang)) + deps = [] + posts = [x for x in self.site.timeline if x.use_in_feeds][:10] + for post in posts: + deps += post.deps(lang) + yield { + 'basename': 'render_rss', + 'name': output_name, + 'file_dep': deps, + 'targets': [output_name], + 'actions': [(utils.generic_rss_renderer, + (lang, kw["blog_title"], kw["blog_url"], + kw["blog_description"], posts, output_name))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } diff --git a/nikola/plugins/task_render_sources.plugin b/nikola/plugins/task_render_sources.plugin new file mode 100644 index 0000000..5b59598 --- /dev/null +++ b/nikola/plugins/task_render_sources.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_sources +Module = task_render_sources + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Copy page sources into the output. + diff --git a/nikola/plugins/task_render_sources.py b/nikola/plugins/task_render_sources.py new file mode 100644 index 0000000..ae5ce23 --- /dev/null +++ b/nikola/plugins/task_render_sources.py @@ -0,0 +1,54 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class Sources(Task): + """Copy page sources into the output.""" + + name = "render_sources" + + def gen_tasks(self): + """Publish the page sources into the output. + + Required keyword arguments: + + translations + default_lang + post_pages + output_folder + """ + kw = { + "translations": self.site.config["TRANSLATIONS"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + "default_lang": self.site.config["DEFAULT_LANG"], + } + + self.site.scan_posts() + flag = False + for lang in kw["translations"]: + for post in self.site.timeline: + output_name = os.path.join(kw['output_folder'], + post.destination_path(lang, post.source_ext())) + source = post.source_path + if lang != kw["default_lang"]: + source_lang = source + '.' + lang + if os.path.exists(source_lang): + source = source_lang + yield { + 'basename': 'render_sources', + 'name': output_name.encode('utf8'), + 'file_dep': [source], + 'targets': [output_name], + 'actions': [(utils.copy_file, (source, output_name))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + if flag is False: # No page rendered, yield a dummy task + yield { + 'basename': 'render_sources', + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } diff --git a/nikola/plugins/task_render_tags.plugin b/nikola/plugins/task_render_tags.plugin new file mode 100644 index 0000000..b826e87 --- /dev/null +++ b/nikola/plugins/task_render_tags.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_tags +Module = task_render_tags + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Render the tag pages and feeds. + diff --git a/nikola/plugins/task_render_tags.py b/nikola/plugins/task_render_tags.py new file mode 100644 index 0000000..61629ec --- /dev/null +++ b/nikola/plugins/task_render_tags.py @@ -0,0 +1,180 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class RenderTags(Task): + """Render the tag pages and feeds.""" + + name = "render_tags" + + def gen_tasks(self): + """Render the tag pages and feeds.""" + + kw = { + "translations": self.site.config["TRANSLATIONS"], + "blog_title": self.site.config["BLOG_TITLE"], + "blog_url": self.site.config["BLOG_URL"], + "blog_description": self.site.config["BLOG_DESCRIPTION"], + "messages": self.site.MESSAGES, + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + "tag_pages_are_indexes": self.site.config['TAG_PAGES_ARE_INDEXES'], + "index_display_post_count": + self.site.config['INDEX_DISPLAY_POST_COUNT'], + "index_teasers": self.site.config['INDEX_TEASERS'], + } + + self.site.scan_posts() + + if not self.site.posts_per_tag: + yield { + 'basename': self.name, + 'actions': [], + } + return + + def page_name(tagname, i, lang): + """Given tag, n, returns a page name.""" + name = self.site.path("tag", tag, lang) + if i: + name = name.replace('.html', '-%s.html' % i) + return name + + for tag, posts in self.site.posts_per_tag.items(): + post_list = [self.site.global_data[post] for post in posts] + post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) + post_list.reverse() + for lang in kw["translations"]: + #Render RSS + output_name = os.path.join(kw['output_folder'], + self.site.path("tag_rss", tag, lang)) + deps = [] + post_list = [self.site.global_data[post] for post in posts + if self.site.global_data[post].use_in_feeds] + post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) + post_list.reverse() + for post in post_list: + deps += post.deps(lang) + yield { + 'name': output_name.encode('utf8'), + 'file_dep': deps, + 'targets': [output_name], + 'actions': [(utils.generic_rss_renderer, + (lang, "%s (%s)" % (kw["blog_title"], tag), + kw["blog_url"], kw["blog_description"], + post_list, output_name))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + 'basename': self.name + } + + # Render HTML + if kw['tag_pages_are_indexes']: + # We render a sort of index page collection using only + # this tag's posts. + + # FIXME: deduplicate this with render_indexes + template_name = "index.tmpl" + # Split in smaller lists + lists = [] + while post_list: + lists.append(post_list[ + :kw["index_display_post_count"]]) + post_list = post_list[ + kw["index_display_post_count"]:] + num_pages = len(lists) + for i, post_list in enumerate(lists): + context = {} + # On a tag page, the feeds include the tag's feeds + rss_link = \ + """<link rel="alternate" type="application/rss+xml" """\ + """type="application/rss+xml" title="RSS for tag """\ + """%s (%s)" href="%s">""" % \ + (tag, lang, self.site.link("tag_rss", tag, lang)) + context['rss_link'] = rss_link + output_name = os.path.join(kw['output_folder'], + page_name(tag, i, lang)) + context["title"] = kw["messages"][lang][ + u"Posts about %s"] % tag + context["prevlink"] = None + context["nextlink"] = None + context['index_teasers'] = kw['index_teasers'] + if i > 1: + context["prevlink"] = os.path.basename( + page_name(tag, i - 1, lang)) + if i == 1: + context["prevlink"] = os.path.basename( + page_name(tag, 0, lang)) + if i < num_pages - 1: + context["nextlink"] = os.path.basename( + page_name(tag, i + 1, lang)) + context["permalink"] = self.site.link("tag", tag, lang) + context["tag"] = tag + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [utils.config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task + else: + # We render a single flat link list with this tag's posts + template_name = "tag.tmpl" + output_name = os.path.join(kw['output_folder'], + self.site.path("tag", tag, lang)) + context = {} + context["lang"] = lang + context["title"] = kw["messages"][lang][ + u"Posts about %s"] % tag + context["items"] = [("[%s] %s" % (post.date, + post.title(lang)), + post.permalink(lang)) for post in post_list] + context["permalink"] = self.site.link("tag", tag, lang) + context["tag"] = tag + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [utils.config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task + + # And global "all your tags" page + tags = self.site.posts_per_tag.keys() + tags.sort() + template_name = "tags.tmpl" + kw['tags'] = tags + for lang in kw["translations"]: + output_name = os.path.join( + kw['output_folder'], self.site.path('tag_index', None, lang)) + context = {} + context["title"] = kw["messages"][lang][u"Tags"] + context["items"] = [(tag, self.site.link("tag", tag, lang)) + for tag in tags] + context["permalink"] = self.site.link("tag_index", None, lang) + for task in self.site.generic_post_list_renderer( + lang, + [], + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [utils.config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + yield task diff --git a/nikola/plugins/task_sitemap.plugin b/nikola/plugins/task_sitemap.plugin new file mode 100644 index 0000000..f6b01d7 --- /dev/null +++ b/nikola/plugins/task_sitemap.plugin @@ -0,0 +1,10 @@ +[Core] +Name = sitemap +Module = task_sitemap + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generate google sitemap. + diff --git a/nikola/plugins/task_sitemap/__init__.py b/nikola/plugins/task_sitemap/__init__.py new file mode 100644 index 0000000..87b72bf --- /dev/null +++ b/nikola/plugins/task_sitemap/__init__.py @@ -0,0 +1,62 @@ +import os +import tempfile + +from nikola.plugin_categories import LateTask +from nikola.utils import config_changed + +import sitemap_gen as smap + + +class Sitemap(LateTask): + """Copy theme assets into output.""" + + name = "sitemap" + + def gen_tasks(self): + """Generate Google sitemap.""" + kw = { + "blog_url": self.site.config["BLOG_URL"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + } + output_path = os.path.abspath(kw['output_folder']) + sitemap_path = os.path.join(output_path, "sitemap.xml.gz") + + def sitemap(): + # Generate config + config_data = """<?xml version="1.0" encoding="UTF-8"?> + <site + base_url="%s" + store_into="%s" + verbose="1" > + <directory path="%s" url="%s" /> + <filter action="drop" type="wildcard" pattern="*~" /> + <filter action="drop" type="regexp" pattern="/\.[^/]*" /> + </site>""" % ( + kw["blog_url"], + sitemap_path, + output_path, + kw["blog_url"], + ) + config_file = tempfile.NamedTemporaryFile(delete=False) + config_file.write(config_data) + config_file.close() + + # Generate sitemap + sitemap = smap.CreateSitemapFromFile(config_file.name, True) + if not sitemap: + smap.output.Log('Configuration file errors -- exiting.', 0) + else: + sitemap.Generate() + smap.output.Log('Number of errors: %d' % + smap.output.num_errors, 1) + smap.output.Log('Number of warnings: %d' % + smap.output.num_warns, 1) + os.unlink(config_file.name) + + yield { + "basename": "sitemap", + "targets": [sitemap_path], + "actions": [(sitemap,)], + "uptodate": [config_changed(kw)], + "clean": True, + } diff --git a/nikola/plugins/task_sitemap/sitemap_gen.py b/nikola/plugins/task_sitemap/sitemap_gen.py new file mode 100755 index 0000000..43e7c32 --- /dev/null +++ b/nikola/plugins/task_sitemap/sitemap_gen.py @@ -0,0 +1,2241 @@ +#!/usr/bin/env python +# flake8: noqa +# +# Copyright (c) 2004, 2005 Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of Google nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# +# The sitemap_gen.py script is written in Python 2.2 and released to +# the open source community for continuous improvements under the BSD +# 2.0 new license, which can be found at: +# +# http://www.opensource.org/licenses/bsd-license.php +# + +__usage__ = \ +"""A simple script to automatically produce sitemaps for a webserver, +in the Google Sitemap Protocol (GSP). + +Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] + --config=config.xml, specifies config file location + --help, displays usage message + --testing, specified when user is experimenting +""" + +# Please be careful that all syntax used in this file can be parsed on +# Python 1.5 -- this version check is not evaluated until after the +# entire file has been parsed. +import sys +if sys.hexversion < 0x02020000: + print 'This script requires Python 2.2 or later.' + print 'Currently run with version: %s' % sys.version + sys.exit(1) + +import fnmatch +import glob +import gzip +import hashlib +import os +import re +import stat +import time +import types +import urllib +import urlparse +import xml.sax + +# True and False were introduced in Python2.2.2 +try: + testTrue=True + del testTrue +except NameError: + True=1 + False=0 + +# Text encodings +ENC_ASCII = 'ASCII' +ENC_UTF8 = 'UTF-8' +ENC_IDNA = 'IDNA' +ENC_ASCII_LIST = ['ASCII', 'US-ASCII', 'US', 'IBM367', 'CP367', 'ISO646-US' + 'ISO_646.IRV:1991', 'ISO-IR-6', 'ANSI_X3.4-1968', + 'ANSI_X3.4-1986', 'CPASCII' ] +ENC_DEFAULT_LIST = ['ISO-8859-1', 'ISO-8859-2', 'ISO-8859-5'] + +# Maximum number of urls in each sitemap, before next Sitemap is created +MAXURLS_PER_SITEMAP = 50000 + +# Suffix on a Sitemap index file +SITEINDEX_SUFFIX = '_index.xml' + +# Regular expressions tried for extracting URLs from access logs. +ACCESSLOG_CLF_PATTERN = re.compile( + r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' + ) + +# Match patterns for lastmod attributes +LASTMOD_PATTERNS = map(re.compile, [ + r'^\d\d\d\d$', + r'^\d\d\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', + ]) + +# Match patterns for changefreq attributes +CHANGEFREQ_PATTERNS = [ + 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' + ] + +# XML formats +SITEINDEX_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<?xml-stylesheet type="text/xsl" href="gss.xsl"?>\n' \ + '<sitemapindex\n' \ + ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ + ' http://www.google.com/schemas/sitemap/0.84/' \ + 'siteindex.xsd">\n' +SITEINDEX_FOOTER = '</sitemapindex>\n' +SITEINDEX_ENTRY = \ + ' <sitemap>\n' \ + ' <loc>%(loc)s</loc>\n' \ + ' <lastmod>%(lastmod)s</lastmod>\n' \ + ' </sitemap>\n' +SITEMAP_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<urlset\n' \ + ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ + ' http://www.google.com/schemas/sitemap/0.84/' \ + 'sitemap.xsd">\n' +SITEMAP_FOOTER = '</urlset>\n' +SITEURL_XML_PREFIX = ' <url>\n' +SITEURL_XML_SUFFIX = ' </url>\n' + +# Search engines to notify with the updated sitemaps +# +# This list is very non-obvious in what's going on. Here's the gist: +# Each item in the list is a 6-tuple of items. The first 5 are "almost" +# the same as the input arguments to urlparse.urlunsplit(): +# 0 - schema +# 1 - netloc +# 2 - path +# 3 - query <-- EXCEPTION: specify a query map rather than a string +# 4 - fragment +# Additionally, add item 5: +# 5 - query attribute that should be set to the new Sitemap URL +# Clear as mud, I know. +NOTIFICATION_SITES = [ + ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap') + ] + + +class Error(Exception): + """ + Base exception class. In this module we tend not to use our own exception + types for very much, but they come in very handy on XML parsing with SAX. + """ + pass +#end class Error + + +class SchemaError(Error): + """Failure to process an XML file according to the schema we know.""" + pass +#end class SchemeError + + +class Encoder: + """ + Manages wide-character/narrow-character conversions for just about all + text that flows into or out of the script. + + You should always use this class for string coercion, as opposed to + letting Python handle coercions automatically. Reason: Python + usually assumes ASCII (7-bit) as a default narrow character encoding, + which is not the kind of data we generally deal with. + + General high-level methodologies used in sitemap_gen: + + [PATHS] + File system paths may be wide or narrow, depending on platform. + This works fine, just be aware of it and be very careful to not + mix them. That is, if you have to pass several file path arguments + into a library call, make sure they are all narrow or all wide. + This class has MaybeNarrowPath() which should be called on every + file system path you deal with. + + [URLS] + URL locations are stored in Narrow form, already escaped. This has the + benefit of keeping escaping and encoding as close as possible to the format + we read them in. The downside is we may end up with URLs that have + intermingled encodings -- the root path may be encoded in one way + while the filename is encoded in another. This is obviously wrong, but + it should hopefully be an issue hit by very few users. The workaround + from the user level (assuming they notice) is to specify a default_encoding + parameter in their config file. + + [OTHER] + Other text, such as attributes of the URL class, configuration options, + etc, are generally stored in Unicode for simplicity. + """ + + def __init__(self): + self._user = None # User-specified default encoding + self._learned = [] # Learned default encodings + self._widefiles = False # File system can be wide + + # Can the file system be Unicode? + try: + self._widefiles = os.path.supports_unicode_filenames + except AttributeError: + try: + self._widefiles = sys.getwindowsversion() == os.VER_PLATFORM_WIN32_NT + except AttributeError: + pass + + # Try to guess a working default + try: + encoding = sys.getfilesystemencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [ encoding ] + except AttributeError: + pass + + if not self._learned: + encoding = sys.getdefaultencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [ encoding ] + + # If we had no guesses, start with some European defaults + if not self._learned: + self._learned = ENC_DEFAULT_LIST + #end def __init__ + + def SetUserEncoding(self, encoding): + self._user = encoding + #end def SetUserEncoding + + def NarrowText(self, text, encoding): + """ Narrow a piece of arbitrary text """ + if type(text) != types.UnicodeType: + return text + + # Try the passed in preference + if encoding: + try: + result = text.encode(encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return text.encode(self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return text.encode(self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return text.encode(ENC_UTF8) + except UnicodeError: + pass + + # Something is seriously wrong if we get to here + return text.encode(ENC_ASCII, 'ignore') + #end def NarrowText + + def MaybeNarrowPath(self, text): + """ Paths may be allowed to stay wide """ + if self._widefiles: + return text + return self.NarrowText(text, None) + #end def MaybeNarrowPath + + def WidenText(self, text, encoding): + """ Widen a piece of arbitrary text """ + if type(text) != types.StringType: + return text + + # Try the passed in preference + if encoding: + try: + result = unicode(text, encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return unicode(text, self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return unicode(text, self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return unicode(text, ENC_UTF8) + except UnicodeError: + pass + + # Getting here means it wasn't UTF-8 and we had no working default. + # We really don't have anything "right" we can do anymore. + output.Warn('Unrecognized encoding in text: %s' % text) + if not self._user: + output.Warn('You may need to set a default_encoding in your ' + 'configuration file.') + return text.decode(ENC_ASCII, 'ignore') + #end def WidenText +#end class Encoder +encoder = Encoder() + + +class Output: + """ + Exposes logging functionality, and tracks how many errors + we have thus output. + + Logging levels should be used as thus: + Fatal -- extremely sparingly + Error -- config errors, entire blocks of user 'intention' lost + Warn -- individual URLs lost + Log(,0) -- Un-suppressable text that's not an error + Log(,1) -- touched files, major actions + Log(,2) -- parsing notes, filtered or duplicated URLs + Log(,3) -- each accepted URL + """ + + def __init__(self): + self.num_errors = 0 # Count of errors + self.num_warns = 0 # Count of warnings + + self._errors_shown = {} # Shown errors + self._warns_shown = {} # Shown warnings + self._verbose = 0 # Level of verbosity + #end def __init__ + + def Log(self, text, level): + """ Output a blurb of diagnostic text, if the verbose level allows it """ + if text: + text = encoder.NarrowText(text, None) + if self._verbose >= level: + print text + #end def Log + + def Warn(self, text): + """ Output and count a warning. Suppress duplicate warnings. """ + if text: + text = encoder.NarrowText(text, None) + hash = hashlib.md5(text).digest() + if not self._warns_shown.has_key(hash): + self._warns_shown[hash] = 1 + print '[WARNING] ' + text + else: + self.Log('(suppressed) [WARNING] ' + text, 3) + self.num_warns = self.num_warns + 1 + #end def Warn + + def Error(self, text): + """ Output and count an error. Suppress duplicate errors. """ + if text: + text = encoder.NarrowText(text, None) + hash = hashlib.md5(text).digest() + if not self._errors_shown.has_key(hash): + self._errors_shown[hash] = 1 + print '[ERROR] ' + text + else: + self.Log('(suppressed) [ERROR] ' + text, 3) + self.num_errors = self.num_errors + 1 + #end def Error + + def Fatal(self, text): + """ Output an error and terminate the program. """ + if text: + text = encoder.NarrowText(text, None) + print '[FATAL] ' + text + else: + print 'Fatal error.' + sys.exit(1) + #end def Fatal + + def SetVerbose(self, level): + """ Sets the verbose level. """ + try: + if type(level) != types.IntType: + level = int(level) + if (level >= 0) and (level <= 3): + self._verbose = level + return + except ValueError: + pass + self.Error('Verbose level (%s) must be between 0 and 3 inclusive.' % level) + #end def SetVerbose +#end class Output +output = Output() + + +class URL(object): + """ URL is a smart structure grouping together the properties we + care about for a single web reference. """ + __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' + + def __init__(self): + self.loc = None # URL -- in Narrow characters + self.lastmod = None # ISO8601 timestamp of last modify + self.changefreq = None # Text term for update frequency + self.priority = None # Float between 0 and 1 (inc) + #end def __init__ + + def __cmp__(self, other): + if self.loc < other.loc: + return -1 + if self.loc > other.loc: + return 1 + return 0 + #end def __cmp__ + + def TrySetAttribute(self, attribute, value): + """ Attempt to set the attribute to the value, with a pretty try + block around it. """ + if attribute == 'loc': + self.loc = self.Canonicalize(value) + else: + try: + setattr(self, attribute, value) + except AttributeError: + output.Warn('Unknown URL attribute: %s' % attribute) + #end def TrySetAttribute + + def IsAbsolute(loc): + """ Decide if the URL is absolute or not """ + if not loc: + return False + narrow = encoder.NarrowText(loc, None) + (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) + if (not scheme) or (not netloc): + return False + return True + #end def IsAbsolute + IsAbsolute = staticmethod(IsAbsolute) + + def Canonicalize(loc): + """ Do encoding and canonicalization on a URL string """ + if not loc: + return loc + + # Let the encoder try to narrow it + narrow = encoder.NarrowText(loc, None) + + # Escape components individually + (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) + unr = '-._~' + sub = '!$&\'()*+,;=' + netloc = urllib.quote(netloc, unr + sub + '%:@/[]') + path = urllib.quote(path, unr + sub + '%:@/') + query = urllib.quote(query, unr + sub + '%:@/?') + frag = urllib.quote(frag, unr + sub + '%:@/?') + + # Try built-in IDNA encoding on the netloc + try: + (ignore, widenetloc, ignore, ignore, ignore) = urlparse.urlsplit(loc) + for c in widenetloc: + if c >= unichr(128): + netloc = widenetloc.encode(ENC_IDNA) + netloc = urllib.quote(netloc, unr + sub + '%:@/[]') + break + except UnicodeError: + # urlsplit must have failed, based on implementation differences in the + # library. There is not much we can do here, except ignore it. + pass + except LookupError: + output.Warn('An International Domain Name (IDN) is being used, but this ' + 'version of Python does not have support for IDNA encoding. ' + ' (IDNA support was introduced in Python 2.3) The encoding ' + 'we have used instead is wrong and will probably not yield ' + 'valid URLs.') + bad_netloc = False + if '%' in netloc: + bad_netloc = True + + # Put it all back together + narrow = urlparse.urlunsplit((scheme, netloc, path, query, frag)) + + # I let '%' through. Fix any that aren't pre-existing escapes. + HEXDIG = '0123456789abcdefABCDEF' + list = narrow.split('%') + narrow = list[0] + del list[0] + for item in list: + if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): + narrow = narrow + '%' + item + else: + narrow = narrow + '%25' + item + + # Issue a warning if this is a bad URL + if bad_netloc: + output.Warn('Invalid characters in the host or domain portion of a URL: ' + + narrow) + + return narrow + #end def Canonicalize + Canonicalize = staticmethod(Canonicalize) + + def Validate(self, base_url, allow_fragment): + """ Verify the data in this URL is well-formed, and override if not. """ + assert type(base_url) == types.StringType + + # Test (and normalize) the ref + if not self.loc: + output.Warn('Empty URL') + return False + if allow_fragment: + self.loc = urlparse.urljoin(base_url, self.loc) + if not self.loc.startswith(base_url): + output.Warn('Discarded URL for not starting with the base_url: %s' % + self.loc) + self.loc = None + return False + + # Test the lastmod + if self.lastmod: + match = False + self.lastmod = self.lastmod.upper() + for pattern in LASTMOD_PATTERNS: + match = pattern.match(self.lastmod) + if match: + break + if not match: + output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' + 'URL: %s' % (self.lastmod, self.loc)) + self.lastmod = None + + # Test the changefreq + if self.changefreq: + match = False + self.changefreq = self.changefreq.lower() + for pattern in CHANGEFREQ_PATTERNS: + if self.changefreq == pattern: + match = True + break + if not match: + output.Warn('Changefreq "%s" is not a valid change frequency on URL ' + ': %s' % (self.changefreq, self.loc)) + self.changefreq = None + + # Test the priority + if self.priority: + priority = -1.0 + try: + priority = float(self.priority) + except ValueError: + pass + if (priority < 0.0) or (priority > 1.0): + output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' + 'on URL: %s' % (self.priority, self.loc)) + self.priority = None + + return True + #end def Validate + + def MakeHash(self): + """ Provides a uniform way of hashing URLs """ + if not self.loc: + return None + if self.loc.endswith('/'): + return hashlib.md5(self.loc[:-1]).digest() + return hashlib.md5(self.loc).digest() + #end def MakeHash + + def Log(self, prefix='URL', level=3): + """ Dump the contents, empty or not, to the log. """ + out = prefix + ':' + + for attribute in self.__slots__: + value = getattr(self, attribute) + if not value: + value = '' + out = out + (' %s=[%s]' % (attribute, value)) + + output.Log('%s' % encoder.NarrowText(out, None), level) + #end def Log + + def WriteXML(self, file): + """ Dump non-empty contents to the output file, in XML format. """ + if not self.loc: + return + out = SITEURL_XML_PREFIX + + for attribute in self.__slots__: + value = getattr(self, attribute) + if value: + if type(value) == types.UnicodeType: + value = encoder.NarrowText(value, None) + elif type(value) != types.StringType: + value = str(value) + value = xml.sax.saxutils.escape(value) + out = out + (' <%s>%s</%s>\n' % (attribute, value, attribute)) + + out = out + SITEURL_XML_SUFFIX + file.write(out) + #end def WriteXML +#end class URL + + +class Filter: + """ + A filter on the stream of URLs we find. A filter is, in essence, + a wildcard applied to the stream. You can think of this as an + operator that returns a tri-state when given a URL: + + True -- this URL is to be included in the sitemap + None -- this URL is undecided + False -- this URL is to be dropped from the sitemap + """ + + def __init__(self, attributes): + self._wildcard = None # Pattern for wildcard match + self._regexp = None # Pattern for regexp match + self._pass = False # "Drop" filter vs. "Pass" filter + + if not ValidateAttributes('FILTER', attributes, + ('pattern', 'type', 'action')): + return + + # Check error count on the way in + num_errors = output.num_errors + + # Fetch the attributes + pattern = attributes.get('pattern') + type = attributes.get('type', 'wildcard') + action = attributes.get('action', 'drop') + if type: + type = type.lower() + if action: + action = action.lower() + + # Verify the attributes + if not pattern: + output.Error('On a filter you must specify a "pattern" to match') + elif (not type) or ((type != 'wildcard') and (type != 'regexp')): + output.Error('On a filter you must specify either \'type="wildcard"\' ' + 'or \'type="regexp"\'') + elif (action != 'pass') and (action != 'drop'): + output.Error('If you specify a filter action, it must be either ' + '\'action="pass"\' or \'action="drop"\'') + + # Set the rule + if action == 'drop': + self._pass = False + elif action == 'pass': + self._pass = True + + if type == 'wildcard': + self._wildcard = pattern + elif type == 'regexp': + try: + self._regexp = re.compile(pattern) + except re.error: + output.Error('Bad regular expression: %s' % pattern) + + # Log the final results iff we didn't add any errors + if num_errors == output.num_errors: + output.Log('Filter: %s any URL that matches %s "%s"' % + (action, type, pattern), 2) + #end def __init__ + + def Apply(self, url): + """ Process the URL, as above. """ + if (not url) or (not url.loc): + return None + + if self._wildcard: + if fnmatch.fnmatchcase(url.loc, self._wildcard): + return self._pass + return None + + if self._regexp: + if self._regexp.search(url.loc): + return self._pass + return None + + assert False # unreachable + #end def Apply +#end class Filter + + +class InputURL: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a single URL, manually specified in the config file. + """ + + def __init__(self, attributes): + self._url = None # The lonely URL + + if not ValidateAttributes('URL', attributes, + ('href', 'lastmod', 'changefreq', 'priority')): + return + + url = URL() + for attr in attributes.keys(): + if attr == 'href': + url.TrySetAttribute('loc', attributes[attr]) + else: + url.TrySetAttribute(attr, attributes[attr]) + + if not url.loc: + output.Error('Url entries must have an href attribute.') + return + + self._url = url + output.Log('Input: From URL "%s"' % self._url.loc, 2) + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if self._url: + consumer(self._url, True) + #end def ProduceURLs +#end class InputURL + + +class InputURLList: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a text file with a list of URLs + """ + + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + + if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From URLLIST "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Urllist entries must have a "path" attribute.') + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'URLLIST') + if not file: + return + + # Iterate lines + linenum = 0 + for line in file.readlines(): + linenum = linenum + 1 + + # Strip comments and empty lines + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + if (not line) or line[0] == '#': + continue + + # Split the line on space + url = URL() + cols = line.split(' ') + for i in range(0,len(cols)): + cols[i] = cols[i].strip() + url.TrySetAttribute('loc', cols[0]) + + # Extract attributes from the other columns + for i in range(1,len(cols)): + if cols[i]: + try: + (attr_name, attr_val) = cols[i].split('=', 1) + url.TrySetAttribute(attr_name, attr_val) + except ValueError: + output.Warn('Line %d: Unable to parse attribute: %s' % + (linenum, cols[i])) + + # Pass it on + consumer(url, False) + + file.close() + if frame: + frame.close() + #end def ProduceURLs +#end class InputURLList + + +class InputDirectory: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a directory that acts as base for walking the filesystem. + """ + + def __init__(self, attributes, base_url): + self._path = None # The directory + self._url = None # The URL equivelant + self._default_file = None + + if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', + 'default_file')): + return + + # Prep the path -- it MUST end in a sep + path = attributes.get('path') + if not path: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + path = encoder.MaybeNarrowPath(path) + if not path.endswith(os.sep): + path = path + os.sep + if not os.path.isdir(path): + output.Error('Can not locate directory: %s' % path) + return + + # Prep the URL -- it MUST end in a sep + url = attributes.get('url') + if not url: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + url = URL.Canonicalize(url) + if not url.endswith('/'): + url = url + '/' + if not url.startswith(base_url): + url = urlparse.urljoin(base_url, url) + if not url.startswith(base_url): + output.Error('The directory URL "%s" is not relative to the ' + 'base_url: %s' % (url, base_url)) + return + + # Prep the default file -- it MUST be just a filename + file = attributes.get('default_file') + if file: + file = encoder.MaybeNarrowPath(file) + if os.sep in file: + output.Error('The default_file "%s" can not include path information.' + % file) + file = None + + self._path = path + self._url = url + self._default_file = file + if file: + output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' + % (path, url, file), 2) + else: + output.Log('Input: From DIRECTORY "%s" (%s) with no default file' + % (path, url), 2) + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if not self._path: + return + + root_path = self._path + root_URL = self._url + root_file = "index.html" + + def DecideFilename(name): + assert "/" not in name + + if name in ( "robots.txt, " ): + return False + + if ".thumbnail." in name: + return False + + if re.match( r"google[a-f0-9]+.html", name ): + return False + + return not re.match( r"^index(\-\d+)?.html$", name ) + + def DecideDirectory(dirpath): + subpath = dirpath[len(root_path):] + + assert not subpath.startswith( "/" ), subpath + + for remove in ( "assets", ): + if subpath == remove or subpath.startswith( remove + os.path.sep ): + return False + else: + return True + + def PerFile(dirpath, name): + """ + Called once per file. + Note that 'name' will occasionally be None -- for a directory itself + """ + if not DecideDirectory(dirpath): + return + + if name is not None and not DecideFilename(name): + return + + # Pull a timestamp + url = URL() + isdir = False + try: + if name: + path = os.path.join(dirpath, name) + else: + path = dirpath + isdir = os.path.isdir(path) + time = None + if isdir and root_file: + file = os.path.join(path, root_file) + try: + time = os.stat(file)[stat.ST_MTIME]; + except OSError: + pass + if not time: + time = os.stat(path)[stat.ST_MTIME]; + url.lastmod = TimestampISO8601(time) + except OSError: + pass + except ValueError: + pass + + # Build a URL + middle = dirpath[len(root_path):] + if os.sep != '/': + middle = middle.replace(os.sep, '/') + if middle: + middle = middle + '/' + if name: + middle = middle + name + if isdir: + middle = middle + '/' + url.TrySetAttribute('loc', root_URL + encoder.WidenText(middle, None)) + + # Suppress default files. (All the way down here so we can log it.) + if name and (root_file == name): + url.Log(prefix='IGNORED (default file)', level=2) + return + + consumer(url, False) + #end def PerFile + + def PerDirectory(ignore, dirpath, namelist): + """ + Called once per directory with a list of all the contained files/dirs. + """ + ignore = ignore # Avoid warnings of an unused parameter + + if not dirpath.startswith(root_path): + output.Warn('Unable to decide what the root path is for directory: ' + '%s' % dirpath) + return + + if not DecideDirectory(dirpath): + return + + for name in namelist: + PerFile(dirpath, name) + #end def PerDirectory + + output.Log('Walking DIRECTORY "%s"' % self._path, 1) + PerFile(self._path, None) + os.path.walk(self._path, PerDirectory, None) + #end def ProduceURLs +#end class InputDirectory + + +class InputAccessLog: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles access logs. It's non-trivial in that we want to + auto-detect log files in the Common Logfile Format (as used by Apache, + for instance) and the Extended Log File Format (as used by IIS, for + instance). + """ + + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + self._is_elf = False # Extended Log File Format? + self._is_clf = False # Common Logfile Format? + self._elf_status = -1 # ELF field: '200' + self._elf_method = -1 # ELF field: 'HEAD' + self._elf_uri = -1 # ELF field: '/foo?bar=1' + self._elf_urifrag1 = -1 # ELF field: '/foo' + self._elf_urifrag2 = -1 # ELF field: 'bar=1' + + if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Accesslog entries must have a "path" attribute.') + #end def __init__ + + def RecognizeELFLine(self, line): + """ Recognize the Fields directive that heads an ELF file """ + if not line.startswith('#Fields:'): + return False + fields = line.split(' ') + del fields[0] + for i in range(0, len(fields)): + field = fields[i].strip() + if field == 'sc-status': + self._elf_status = i + elif field == 'cs-method': + self._elf_method = i + elif field == 'cs-uri': + self._elf_uri = i + elif field == 'cs-uri-stem': + self._elf_urifrag1 = i + elif field == 'cs-uri-query': + self._elf_urifrag2 = i + output.Log('Recognized an Extended Log File Format file.', 2) + return True + #end def RecognizeELFLine + + def GetELFLine(self, line): + """ Fetch the requested URL from an ELF line """ + fields = line.split(' ') + count = len(fields) + + # Verify status was Ok + if self._elf_status >= 0: + if self._elf_status >= count: + return None + if not fields[self._elf_status].strip() == '200': + return None + + # Verify method was HEAD or GET + if self._elf_method >= 0: + if self._elf_method >= count: + return None + if not fields[self._elf_method].strip() in ('HEAD', 'GET'): + return None + + # Pull the full URL if we can + if self._elf_uri >= 0: + if self._elf_uri >= count: + return None + url = fields[self._elf_uri].strip() + if url != '-': + return url + + # Put together a fragmentary URL + if self._elf_urifrag1 >= 0: + if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: + return None + urlfrag1 = fields[self._elf_urifrag1].strip() + urlfrag2 = None + if self._elf_urifrag2 >= 0: + urlfrag2 = fields[self._elf_urifrag2] + if urlfrag1 and (urlfrag1 != '-'): + if urlfrag2 and (urlfrag2 != '-'): + urlfrag1 = urlfrag1 + '?' + urlfrag2 + return urlfrag1 + + return None + #end def GetELFLine + + def RecognizeCLFLine(self, line): + """ Try to tokenize a logfile line according to CLF pattern and see if + it works. """ + match = ACCESSLOG_CLF_PATTERN.match(line) + recognize = match and (match.group(1) in ('HEAD', 'GET')) + if recognize: + output.Log('Recognized a Common Logfile Format file.', 2) + return recognize + #end def RecognizeCLFLine + + def GetCLFLine(self, line): + """ Fetch the requested URL from a CLF line """ + match = ACCESSLOG_CLF_PATTERN.match(line) + if match: + request = match.group(1) + if request in ('HEAD', 'GET'): + return match.group(2) + return None + #end def GetCLFLine + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') + if not file: + return + + # Iterate lines + for line in file.readlines(): + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + + # If we don't know the format yet, try them both + if (not self._is_clf) and (not self._is_elf): + self._is_elf = self.RecognizeELFLine(line) + self._is_clf = self.RecognizeCLFLine(line) + + # Digest the line + match = None + if self._is_elf: + match = self.GetELFLine(line) + elif self._is_clf: + match = self.GetCLFLine(line) + if not match: + continue + + # Pass it on + url = URL() + url.TrySetAttribute('loc', match) + consumer(url, True) + + file.close() + if frame: + frame.close() + #end def ProduceURLs +#end class InputAccessLog + + +class InputSitemap(xml.sax.handler.ContentHandler): + + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles Sitemap files and Sitemap index files. For the sake + of simplicity in design (and simplicity in interfacing with the SAX + package), we do not handle these at the same time, recursively. Instead + we read an index file completely and make a list of Sitemap files, then + go back and process each Sitemap. + """ + + class _ContextBase(object): + + """Base class for context handlers in our SAX processing. A context + handler is a class that is responsible for understanding one level of + depth in the XML schema. The class knows what sub-tags are allowed, + and doing any processing specific for the tag we're in. + + This base class is the API filled in by specific context handlers, + all defined below. + """ + + def __init__(self, subtags): + """Initialize with a sequence of the sub-tags that would be valid in + this context.""" + self._allowed_tags = subtags # Sequence of sub-tags we can have + self._last_tag = None # Most recent seen sub-tag + #end def __init__ + + def AcceptTag(self, tag): + """Returns True iff opening a sub-tag is valid in this context.""" + valid = tag in self._allowed_tags + if valid: + self._last_tag = tag + else: + self._last_tag = None + return valid + #end def AcceptTag + + def AcceptText(self, text): + """Returns True iff a blurb of text is valid in this context.""" + return False + #end def AcceptText + + def Open(self): + """The context is opening. Do initialization.""" + pass + #end def Open + + def Close(self): + """The context is closing. Return our result, if any.""" + pass + #end def Close + + def Return(self, result): + """We're returning to this context after handling a sub-tag. This + method is called with the result data from the sub-tag that just + closed. Here in _ContextBase, if we ever see a result it means + the derived child class forgot to override this method.""" + if result: + raise NotImplementedError + #end def Return + #end class _ContextBase + + class _ContextUrlSet(_ContextBase): + + """Context handler for the document node in a Sitemap.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('url',)) + #end def __init__ + #end class _ContextUrlSet + + class _ContextUrl(_ContextBase): + + """Context handler for a URL node in a Sitemap.""" + + def __init__(self, consumer): + """Initialize this context handler with the callable consumer that + wants our URLs.""" + InputSitemap._ContextBase.__init__(self, URL.__slots__) + self._url = None # The URL object we're building + self._consumer = consumer # Who wants to consume it + #end def __init__ + + def Open(self): + """Initialize the URL.""" + assert not self._url + self._url = URL() + #end def Open + + def Close(self): + """Pass the URL to the consumer and reset it to None.""" + assert self._url + self._consumer(self._url, False) + self._url = None + #end def Close + + def Return(self, result): + """A value context has closed, absorb the data it gave us.""" + assert self._url + if result: + self._url.TrySetAttribute(self._last_tag, result) + #end def Return + #end class _ContextUrl + + class _ContextSitemapIndex(_ContextBase): + + """Context handler for the document node in an index file.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('sitemap',)) + self._loclist = [] # List of accumulated Sitemap URLs + #end def __init__ + + def Open(self): + """Just a quick verify of state.""" + assert not self._loclist + #end def Open + + def Close(self): + """Return our list of accumulated URLs.""" + if self._loclist: + temp = self._loclist + self._loclist = [] + return temp + #end def Close + + def Return(self, result): + """Getting a new loc URL, add it to the collection.""" + if result: + self._loclist.append(result) + #end def Return + #end class _ContextSitemapIndex + + class _ContextSitemap(_ContextBase): + + """Context handler for a Sitemap entry in an index file.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('loc', 'lastmod')) + self._loc = None # The URL to the Sitemap + #end def __init__ + + def Open(self): + """Just a quick verify of state.""" + assert not self._loc + #end def Open + + def Close(self): + """Return our URL to our parent.""" + if self._loc: + temp = self._loc + self._loc = None + return temp + output.Warn('In the Sitemap index file, a "sitemap" entry had no "loc".') + #end def Close + + def Return(self, result): + """A value has closed. If it was a 'loc', absorb it.""" + if result and (self._last_tag == 'loc'): + self._loc = result + #end def Return + #end class _ContextSitemap + + class _ContextValue(_ContextBase): + + """Context handler for a single value. We return just the value. The + higher level context has to remember what tag led into us.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ()) + self._text = None + #end def __init__ + + def AcceptText(self, text): + """Allow all text, adding it to our buffer.""" + if self._text: + self._text = self._text + text + else: + self._text = text + return True + #end def AcceptText + + def Open(self): + """Initialize our buffer.""" + self._text = None + #end def Open + + def Close(self): + """Return what's in our buffer.""" + text = self._text + self._text = None + if text: + text = text.strip() + return text + #end def Close + #end class _ContextValue + + def __init__(self, attributes): + """Initialize with a dictionary of attributes from our entry in the + config file.""" + xml.sax.handler.ContentHandler.__init__(self) + self._pathlist = None # A list of files + self._current = -1 # Current context in _contexts + self._contexts = None # The stack of contexts we allow + self._contexts_idx = None # ...contexts for index files + self._contexts_stm = None # ...contexts for Sitemap files + + if not ValidateAttributes('SITEMAP', attributes, ['path']): + return + + # Init the first file path + path = attributes.get('path') + if path: + path = encoder.MaybeNarrowPath(path) + if os.path.isfile(path): + output.Log('Input: From SITEMAP "%s"' % path, 2) + self._pathlist = [path] + else: + output.Error('Can not locate file "%s"' % path) + else: + output.Error('Sitemap entries must have a "path" attribute.') + #end def __init__ + + def ProduceURLs(self, consumer): + """In general: Produces URLs from our data source, hand them to the + callable consumer. + + In specific: Iterate over our list of paths and delegate the actual + processing to helper methods. This is a complexity no other data source + needs to suffer. We are unique in that we can have files that tell us + to bring in other files. + + Note the decision to allow an index file or not is made in this method. + If we call our parser with (self._contexts == None) the parser will + grab whichever context stack can handle the file. IE: index is allowed. + If instead we set (self._contexts = ...) before parsing, the parser + will only use the stack we specify. IE: index not allowed. + """ + # Set up two stacks of contexts + self._contexts_idx = [InputSitemap._ContextSitemapIndex(), + InputSitemap._ContextSitemap(), + InputSitemap._ContextValue()] + + self._contexts_stm = [InputSitemap._ContextUrlSet(), + InputSitemap._ContextUrl(consumer), + InputSitemap._ContextValue()] + + # Process the first file + assert self._pathlist + path = self._pathlist[0] + self._contexts = None # We allow an index file here + self._ProcessFile(path) + + # Iterate over remaining files + self._contexts = self._contexts_stm # No index files allowed + for path in self._pathlist[1:]: + self._ProcessFile(path) + #end def ProduceURLs + + def _ProcessFile(self, path): + """Do per-file reading/parsing/consuming for the file path passed in.""" + assert path + + # Open our file + (frame, file) = OpenFileForRead(path, 'SITEMAP') + if not file: + return + + # Rev up the SAX engine + try: + self._current = -1 + xml.sax.parse(file, self) + except SchemaError: + output.Error('An error in file "%s" made us abort reading the Sitemap.' + % path) + except IOError: + output.Error('Cannot read from file "%s"' % path) + except xml.sax._exceptions.SAXParseException, e: + output.Error('XML error in the file "%s" (line %d, column %d): %s' % + (path, e._linenum, e._colnum, e.getMessage())) + + # Clean up + file.close() + if frame: + frame.close() + #end def _ProcessFile + + def _MungeLocationListIntoFiles(self, urllist): + """Given a list of URLs, munge them into our self._pathlist property. + We do this by assuming all the files live in the same directory as + the first file in the existing pathlist. That is, we assume a + Sitemap index points to Sitemaps only in the same directory. This + is not true in general, but will be true for any output produced + by this script. + """ + assert self._pathlist + path = self._pathlist[0] + path = os.path.normpath(path) + dir = os.path.dirname(path) + wide = False + if type(path) == types.UnicodeType: + wide = True + + for url in urllist: + url = URL.Canonicalize(url) + output.Log('Index points to Sitemap file at: %s' % url, 2) + (scheme, netloc, path, query, frag) = urlparse.urlsplit(url) + file = os.path.basename(path) + file = urllib.unquote(file) + if wide: + file = encoder.WidenText(file) + if dir: + file = dir + os.sep + file + if file: + self._pathlist.append(file) + output.Log('Will attempt to read Sitemap file: %s' % file, 1) + #end def _MungeLocationListIntoFiles + + def startElement(self, tag, attributes): + """SAX processing, called per node in the config stream. + As long as the new tag is legal in our current context, this + becomes an Open call on one context deeper. + """ + # If this is the document node, we may have to look for a context stack + if (self._current < 0) and not self._contexts: + assert self._contexts_idx and self._contexts_stm + if tag == 'urlset': + self._contexts = self._contexts_stm + elif tag == 'sitemapindex': + self._contexts = self._contexts_idx + output.Log('File is a Sitemap index.', 2) + else: + output.Error('The document appears to be neither a Sitemap nor a ' + 'Sitemap index.') + raise SchemaError + + # Display a kinder error on a common mistake + if (self._current < 0) and (self._contexts == self._contexts_stm) and ( + tag == 'sitemapindex'): + output.Error('A Sitemap index can not refer to another Sitemap index.') + raise SchemaError + + # Verify no unexpected attributes + if attributes: + text = '' + for attr in attributes.keys(): + # The document node will probably have namespaces + if self._current < 0: + if attr.find('xmlns') >= 0: + continue + if attr.find('xsi') >= 0: + continue + if text: + text = text + ', ' + text = text + attr + if text: + output.Warn('Did not expect any attributes on any tag, instead tag ' + '"%s" had attributes: %s' % (tag, text)) + + # Switch contexts + if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): + self._current = self._current + 1 + assert self._current < len(self._contexts) + self._contexts[self._current].Open() + else: + output.Error('Can not accept tag "%s" where it appears.' % tag) + raise SchemaError + #end def startElement + + def endElement(self, tag): + """SAX processing, called per node in the config stream. + This becomes a call to Close on one context followed by a call + to Return on the previous. + """ + tag = tag # Avoid warning on unused argument + assert self._current >= 0 + retval = self._contexts[self._current].Close() + self._current = self._current - 1 + if self._current >= 0: + self._contexts[self._current].Return(retval) + elif retval and (self._contexts == self._contexts_idx): + self._MungeLocationListIntoFiles(retval) + #end def endElement + + def characters(self, text): + """SAX processing, called when text values are read. Important to + note that one single text value may be split across multiple calls + of this method. + """ + if (self._current < 0) or ( + not self._contexts[self._current].AcceptText(text)): + if text.strip(): + output.Error('Can not accept text "%s" where it appears.' % text) + raise SchemaError + #end def characters +#end class InputSitemap + + +class FilePathGenerator: + """ + This class generates filenames in a series, upon request. + You can request any iteration number at any time, you don't + have to go in order. + + Example of iterations for '/path/foo.xml.gz': + 0 --> /path/foo.xml.gz + 1 --> /path/foo1.xml.gz + 2 --> /path/foo2.xml.gz + _index.xml --> /path/foo_index.xml + """ + + def __init__(self): + self.is_gzip = False # Is this a GZIP file? + + self._path = None # '/path/' + self._prefix = None # 'foo' + self._suffix = None # '.xml.gz' + #end def __init__ + + def Preload(self, path): + """ Splits up a path into forms ready for recombination. """ + path = encoder.MaybeNarrowPath(path) + + # Get down to a base name + path = os.path.normpath(path) + base = os.path.basename(path).lower() + if not base: + output.Error('Couldn\'t parse the file path: %s' % path) + return False + lenbase = len(base) + + # Recognize extension + lensuffix = 0 + compare_suffix = ['.xml', '.xml.gz', '.gz'] + for suffix in compare_suffix: + if base.endswith(suffix): + lensuffix = len(suffix) + break + if not lensuffix: + output.Error('The path "%s" doesn\'t end in a supported file ' + 'extension.' % path) + return False + self.is_gzip = suffix.endswith('.gz') + + # Split the original path + lenpath = len(path) + self._path = path[:lenpath-lenbase] + self._prefix = path[lenpath-lenbase:lenpath-lensuffix] + self._suffix = path[lenpath-lensuffix:] + + return True + #end def Preload + + def GeneratePath(self, instance): + """ Generates the iterations, as described above. """ + prefix = self._path + self._prefix + if type(instance) == types.IntType: + if instance: + return '%s%d%s' % (prefix, instance, self._suffix) + return prefix + self._suffix + return prefix + instance + #end def GeneratePath + + def GenerateURL(self, instance, root_url): + """ Generates iterations, but as a URL instead of a path. """ + prefix = root_url + self._prefix + retval = None + if type(instance) == types.IntType: + if instance: + retval = '%s%d%s' % (prefix, instance, self._suffix) + else: + retval = prefix + self._suffix + else: + retval = prefix + instance + return URL.Canonicalize(retval) + #end def GenerateURL + + def GenerateWildURL(self, root_url): + """ Generates a wildcard that should match all our iterations """ + prefix = URL.Canonicalize(root_url + self._prefix) + temp = URL.Canonicalize(prefix + self._suffix) + suffix = temp[len(prefix):] + return prefix + '*' + suffix + #end def GenerateURL +#end class FilePathGenerator + + +class PerURLStatistics: + """ Keep track of some simple per-URL statistics, like file extension. """ + + def __init__(self): + self._extensions = {} # Count of extension instances + #end def __init__ + + def Consume(self, url): + """ Log some stats for the URL. At the moment, that means extension. """ + if url and url.loc: + (scheme, netloc, path, query, frag) = urlparse.urlsplit(url.loc) + if not path: + return + + # Recognize directories + if path.endswith('/'): + if self._extensions.has_key('/'): + self._extensions['/'] = self._extensions['/'] + 1 + else: + self._extensions['/'] = 1 + return + + # Strip to a filename + i = path.rfind('/') + if i >= 0: + assert i < len(path) + path = path[i:] + + # Find extension + i = path.rfind('.') + if i > 0: + assert i < len(path) + ext = path[i:].lower() + if self._extensions.has_key(ext): + self._extensions[ext] = self._extensions[ext] + 1 + else: + self._extensions[ext] = 1 + else: + if self._extensions.has_key('(no extension)'): + self._extensions['(no extension)'] = self._extensions[ + '(no extension)'] + 1 + else: + self._extensions['(no extension)'] = 1 + #end def Consume + + def Log(self): + """ Dump out stats to the output. """ + if len(self._extensions): + output.Log('Count of file extensions on URLs:', 1) + set = self._extensions.keys() + set.sort() + for ext in set: + output.Log(' %7d %s' % (self._extensions[ext], ext), 1) + #end def Log + +class Sitemap(xml.sax.handler.ContentHandler): + """ + This is the big workhorse class that processes your inputs and spits + out sitemap files. It is built as a SAX handler for set up purposes. + That is, it processes an XML stream to bring itself up. + """ + + def __init__(self, suppress_notify): + xml.sax.handler.ContentHandler.__init__(self) + self._filters = [] # Filter objects + self._inputs = [] # Input objects + self._urls = {} # Maps URLs to count of dups + self._set = [] # Current set of URLs + self._filegen = None # Path generator for output files + self._wildurl1 = None # Sitemap URLs to filter out + self._wildurl2 = None # Sitemap URLs to filter out + self._sitemaps = 0 # Number of output files + # We init _dup_max to 2 so the default priority is 0.5 instead of 1.0 + self._dup_max = 2 # Max number of duplicate URLs + self._stat = PerURLStatistics() # Some simple stats + self._in_site = False # SAX: are we in a Site node? + self._in_Site_ever = False # SAX: were we ever in a Site? + + self._default_enc = None # Best encoding to try on URLs + self._base_url = None # Prefix to all valid URLs + self._store_into = None # Output filepath + self._suppress = suppress_notify # Suppress notify of servers + #end def __init__ + + def ValidateBasicConfig(self): + """ Verifies (and cleans up) the basic user-configurable options. """ + all_good = True + + if self._default_enc: + encoder.SetUserEncoding(self._default_enc) + + # Canonicalize the base_url + if all_good and not self._base_url: + output.Error('A site needs a "base_url" attribute.') + all_good = False + if all_good and not URL.IsAbsolute(self._base_url): + output.Error('The "base_url" must be absolute, not relative: %s' % + self._base_url) + all_good = False + if all_good: + self._base_url = URL.Canonicalize(self._base_url) + if not self._base_url.endswith('/'): + self._base_url = self._base_url + '/' + output.Log('BaseURL is set to: %s' % self._base_url, 2) + + # Load store_into into a generator + if all_good: + if self._store_into: + self._filegen = FilePathGenerator() + if not self._filegen.Preload(self._store_into): + all_good = False + else: + output.Error('A site needs a "store_into" attribute.') + all_good = False + + # Ask the generator for patterns on what its output will look like + if all_good: + self._wildurl1 = self._filegen.GenerateWildURL(self._base_url) + self._wildurl2 = self._filegen.GenerateURL(SITEINDEX_SUFFIX, + self._base_url) + + # Unify various forms of False + if all_good: + if self._suppress: + if (type(self._suppress) == types.StringType) or (type(self._suppress) + == types.UnicodeType): + if (self._suppress == '0') or (self._suppress.lower() == 'false'): + self._suppress = False + + # Done + if not all_good: + output.Log('See "example_config.xml" for more information.', 0) + return all_good + #end def ValidateBasicConfig + + def Generate(self): + """ Run over all the Inputs and ask them to Produce """ + # Run the inputs + for input in self._inputs: + input.ProduceURLs(self.ConsumeURL) + + # Do last flushes + if len(self._set): + self.FlushSet() + if not self._sitemaps: + output.Warn('No URLs were recorded, writing an empty sitemap.') + self.FlushSet() + + # Write an index as needed + if self._sitemaps > 1: + self.WriteIndex() + + # Notify + self.NotifySearch() + + # Dump stats + self._stat.Log() + #end def Generate + + def ConsumeURL(self, url, allow_fragment): + """ + All per-URL processing comes together here, regardless of Input. + Here we run filters, remove duplicates, spill to disk as needed, etc. + """ + if not url: + return + + # Validate + if not url.Validate(self._base_url, allow_fragment): + return + + # Run filters + accept = None + for filter in self._filters: + accept = filter.Apply(url) + if accept != None: + break + if not (accept or (accept == None)): + url.Log(prefix='FILTERED', level=2) + return + + # Ignore our out output URLs + if fnmatch.fnmatchcase(url.loc, self._wildurl1) or fnmatch.fnmatchcase( + url.loc, self._wildurl2): + url.Log(prefix='IGNORED (output file)', level=2) + return + + # Note the sighting + hash = url.MakeHash() + if self._urls.has_key(hash): + dup = self._urls[hash] + if dup > 0: + dup = dup + 1 + self._urls[hash] = dup + if self._dup_max < dup: + self._dup_max = dup + url.Log(prefix='DUPLICATE') + return + + # Acceptance -- add to set + self._urls[hash] = 1 + self._set.append(url) + self._stat.Consume(url) + url.Log() + + # Flush the set if needed + if len(self._set) >= MAXURLS_PER_SITEMAP: + self.FlushSet() + #end def ConsumeURL + + def FlushSet(self): + """ + Flush the current set of URLs to the output. This is a little + slow because we like to sort them all and normalize the priorities + before dumping. + """ + + # Sort and normalize + output.Log('Sorting and normalizing collected URLs.', 1) + self._set.sort() + for url in self._set: + hash = url.MakeHash() + dup = self._urls[hash] + if dup > 0: + self._urls[hash] = -1 + if not url.priority: + url.priority = '%.4f' % (float(dup) / float(self._dup_max)) + + # Get the filename we're going to write to + filename = self._filegen.GeneratePath(self._sitemaps) + if not filename: + output.Fatal('Unexpected: Couldn\'t generate output filename.') + self._sitemaps = self._sitemaps + 1 + output.Log('Writing Sitemap file "%s" with %d URLs' % + (filename, len(self._set)), 1) + + # Write to it + frame = None + file = None + + try: + if self._filegen.is_gzip: + basename = os.path.basename(filename); + frame = open(filename, 'wb') + file = gzip.GzipFile(fileobj=frame, filename=basename, mode='wt') + else: + file = open(filename, 'wt') + + file.write(SITEMAP_HEADER) + for url in self._set: + url.WriteXML(file) + file.write(SITEMAP_FOOTER) + + file.close() + if frame: + frame.close() + + frame = None + file = None + except IOError: + output.Fatal('Couldn\'t write out to file: %s' % filename) + os.chmod(filename, 0644) + + # Flush + self._set = [] + #end def FlushSet + + def WriteIndex(self): + """ Write the master index of all Sitemap files """ + # Make a filename + filename = self._filegen.GeneratePath(SITEINDEX_SUFFIX) + if not filename: + output.Fatal('Unexpected: Couldn\'t generate output index filename.') + output.Log('Writing index file "%s" with %d Sitemaps' % + (filename, self._sitemaps), 1) + + # Make a lastmod time + lastmod = TimestampISO8601(time.time()) + + # Write to it + try: + fd = open(filename, 'wt') + fd.write(SITEINDEX_HEADER) + + for mapnumber in range(0,self._sitemaps): + # Write the entry + mapurl = self._filegen.GenerateURL(mapnumber, self._base_url) + mapattributes = { 'loc' : mapurl, 'lastmod' : lastmod } + fd.write(SITEINDEX_ENTRY % mapattributes) + + fd.write(SITEINDEX_FOOTER) + + fd.close() + fd = None + except IOError: + output.Fatal('Couldn\'t write out to file: %s' % filename) + os.chmod(filename, 0644) + #end def WriteIndex + + def NotifySearch(self): + """ Send notification of the new Sitemap(s) to the search engines. """ + if self._suppress: + output.Log('Search engine notification is suppressed.', 1) + return + + output.Log('Notifying search engines.', 1) + + # Override the urllib's opener class with one that doesn't ignore 404s + class ExceptionURLopener(urllib.FancyURLopener): + def http_error_default(self, url, fp, errcode, errmsg, headers): + output.Log('HTTP error %d: %s' % (errcode, errmsg), 2) + raise IOError + #end def http_error_default + #end class ExceptionURLOpener + old_opener = urllib._urlopener + urllib._urlopener = ExceptionURLopener() + + # Build the URL we want to send in + if self._sitemaps > 1: + url = self._filegen.GenerateURL(SITEINDEX_SUFFIX, self._base_url) + else: + url = self._filegen.GenerateURL(0, self._base_url) + + # Test if we can hit it ourselves + try: + u = urllib.urlopen(url) + u.close() + except IOError: + output.Error('When attempting to access our generated Sitemap at the ' + 'following URL:\n %s\n we failed to read it. Please ' + 'verify the store_into path you specified in\n' + ' your configuration file is web-accessable. Consult ' + 'the FAQ for more\n information.' % url) + output.Warn('Proceeding to notify with an unverifyable URL.') + + # Cycle through notifications + # To understand this, see the comment near the NOTIFICATION_SITES comment + for ping in NOTIFICATION_SITES: + query_map = ping[3] + query_attr = ping[5] + query_map[query_attr] = url + query = urllib.urlencode(query_map) + notify = urlparse.urlunsplit((ping[0], ping[1], ping[2], query, ping[4])) + + # Send the notification + output.Log('Notifying: %s' % ping[1], 1) + output.Log('Notification URL: %s' % notify, 2) + try: + u = urllib.urlopen(notify) + u.read() + u.close() + except IOError: + output.Warn('Cannot contact: %s' % ping[1]) + + if old_opener: + urllib._urlopener = old_opener + #end def NotifySearch + + def startElement(self, tag, attributes): + """ SAX processing, called per node in the config stream. """ + + if tag == 'site': + if self._in_site: + output.Error('Can not nest Site entries in the configuration.') + else: + self._in_site = True + + if not ValidateAttributes('SITE', attributes, + ('verbose', 'default_encoding', 'base_url', 'store_into', + 'suppress_search_engine_notify')): + return + + verbose = attributes.get('verbose', 0) + if verbose: + output.SetVerbose(verbose) + + self._default_enc = attributes.get('default_encoding') + self._base_url = attributes.get('base_url') + self._store_into = attributes.get('store_into') + if not self._suppress: + self._suppress = attributes.get('suppress_search_engine_notify', + False) + self.ValidateBasicConfig() + + elif tag == 'filter': + self._filters.append(Filter(attributes)) + + elif tag == 'url': + self._inputs.append(InputURL(attributes)) + + elif tag == 'urllist': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputURLList(attributeset)) + + elif tag == 'directory': + self._inputs.append(InputDirectory(attributes, self._base_url)) + + elif tag == 'accesslog': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputAccessLog(attributeset)) + + elif tag == 'sitemap': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputSitemap(attributeset)) + + else: + output.Error('Unrecognized tag in the configuration: %s' % tag) + #end def startElement + + def endElement(self, tag): + """ SAX processing, called per node in the config stream. """ + if tag == 'site': + assert self._in_site + self._in_site = False + self._in_site_ever = True + #end def endElement + + def endDocument(self): + """ End of SAX, verify we can proceed. """ + if not self._in_site_ever: + output.Error('The configuration must specify a "site" element.') + else: + if not self._inputs: + output.Warn('There were no inputs to generate a sitemap from.') + #end def endDocument +#end class Sitemap + + +def ValidateAttributes(tag, attributes, goodattributes): + """ Makes sure 'attributes' does not contain any attribute not + listed in 'goodattributes' """ + all_good = True + for attr in attributes.keys(): + if not attr in goodattributes: + output.Error('Unknown %s attribute: %s' % (tag, attr)) + all_good = False + return all_good +#end def ValidateAttributes + +def ExpandPathAttribute(src, attrib): + """ Given a dictionary of attributes, return a list of dictionaries + with all the same attributes except for the one named attrib. + That one, we treat as a file path and expand into all its possible + variations. """ + # Do the path expansion. On any error, just return the source dictionary. + path = src.get(attrib) + if not path: + return [src] + path = encoder.MaybeNarrowPath(path); + pathlist = glob.glob(path) + if not pathlist: + return [src] + + # If this isn't actually a dictionary, make it one + if type(src) != types.DictionaryType: + tmp = {} + for key in src.keys(): + tmp[key] = src[key] + src = tmp + + # Create N new dictionaries + retval = [] + for path in pathlist: + dst = src.copy() + dst[attrib] = path + retval.append(dst) + + return retval +#end def ExpandPathAttribute + +def OpenFileForRead(path, logtext): + """ Opens a text file, be it GZip or plain """ + + frame = None + file = None + + if not path: + return (frame, file) + + try: + if path.endswith('.gz'): + frame = open(path, 'rb') + file = gzip.GzipFile(fileobj=frame, mode='rt') + else: + file = open(path, 'rt') + + if logtext: + output.Log('Opened %s file: %s' % (logtext, path), 1) + else: + output.Log('Opened file: %s' % path, 1) + except IOError: + output.Error('Can not open file: %s' % path) + + return (frame, file) +#end def OpenFileForRead + +def TimestampISO8601(t): + """Seconds since epoch (1970-01-01) --> ISO 8601 time string.""" + return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) +#end def TimestampISO8601 + +def CreateSitemapFromFile(configpath, suppress_notify): + """ Sets up a new Sitemap object from the specified configuration file. """ + + # Remember error count on the way in + num_errors = output.num_errors + + # Rev up SAX to parse the config + sitemap = Sitemap(suppress_notify) + try: + output.Log('Reading configuration file: %s' % configpath, 0) + xml.sax.parse(configpath, sitemap) + except IOError: + output.Error('Cannot read configuration file: %s' % configpath) + except xml.sax._exceptions.SAXParseException, e: + output.Error('XML error in the config file (line %d, column %d): %s' % + (e._linenum, e._colnum, e.getMessage())) + except xml.sax._exceptions.SAXReaderNotAvailable: + output.Error('Some installs of Python 2.2 did not include complete support' + ' for XML.\n Please try upgrading your version of Python' + ' and re-running the script.') + + # If we added any errors, return no sitemap + if num_errors == output.num_errors: + return sitemap + return None +#end def CreateSitemapFromFile + +def ProcessCommandFlags(args): + """ + Parse command line flags per specified usage, pick off key, value pairs + All flags of type "--key=value" will be processed as __flags[key] = value, + "--option" will be processed as __flags[option] = option + """ + + flags = {} + rkeyval = '--(?P<key>\S*)[=](?P<value>\S*)' # --key=val + roption = '--(?P<option>\S*)' # --key + r = '(' + rkeyval + ')|(' + roption + ')' + rc = re.compile(r) + for a in args: + try: + rcg = rc.search(a).groupdict() + if rcg.has_key('key'): + flags[rcg['key']] = rcg['value'] + if rcg.has_key('option'): + flags[rcg['option']] = rcg['option'] + except AttributeError: + return None + return flags +#end def ProcessCommandFlags + + +# +# __main__ +# + +if __name__ == '__main__': + flags = ProcessCommandFlags(sys.argv[1:]) + if not flags or not flags.has_key('config') or flags.has_key('help'): + output.Log(__usage__, 0) + else: + suppress_notify = flags.has_key('testing') + sitemap = CreateSitemapFromFile(flags['config'], suppress_notify) + if not sitemap: + output.Log('Configuration file errors -- exiting.', 0) + else: + sitemap.Generate() + output.Log('Number of errors: %d' % output.num_errors, 1) + output.Log('Number of warnings: %d' % output.num_warns, 1) diff --git a/nikola/plugins/template_jinja.plugin b/nikola/plugins/template_jinja.plugin new file mode 100644 index 0000000..01e6d8c --- /dev/null +++ b/nikola/plugins/template_jinja.plugin @@ -0,0 +1,9 @@ +[Core] +Name = jinja +Module = template_jinja + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Support for Jinja2 templates. diff --git a/nikola/plugins/template_jinja.py b/nikola/plugins/template_jinja.py new file mode 100644 index 0000000..0893cf7 --- /dev/null +++ b/nikola/plugins/template_jinja.py @@ -0,0 +1,38 @@ +"""Jinja template handlers""" + +import os +import jinja2 + +from nikola.plugin_categories import TemplateSystem + + +class JinjaTemplates(TemplateSystem): + """Wrapper for Jinja2 templates.""" + + name = "jinja" + lookup = None + + def set_directories(self, directories): + """Createa template lookup.""" + self.lookup = jinja2.Environment(loader=jinja2.FileSystemLoader( + directories, + encoding='utf-8', + )) + + def render_template(self, template_name, output_name, context): + """Render the template into output_name using context.""" + + template = self.lookup.get_template(template_name) + output = template.render(**context) + if output_name is not None: + try: + os.makedirs(os.path.dirname(output_name)) + except: + pass + with open(output_name, 'w+') as output: + output.write(output.encode('utf8')) + return output + + def template_deps(self, template_name): + # FIXME: unimplemented + return [] diff --git a/nikola/plugins/template_mako.plugin b/nikola/plugins/template_mako.plugin new file mode 100644 index 0000000..3fdc354 --- /dev/null +++ b/nikola/plugins/template_mako.plugin @@ -0,0 +1,9 @@ +[Core] +Name = mako +Module = template_mako + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Support for Mako templates. diff --git a/nikola/plugins/template_mako.py b/nikola/plugins/template_mako.py new file mode 100644 index 0000000..7ab5c43 --- /dev/null +++ b/nikola/plugins/template_mako.py @@ -0,0 +1,68 @@ +"""Mako template handlers""" + +import os +import shutil + +from mako import util, lexer +from mako.lookup import TemplateLookup + +from nikola.plugin_categories import TemplateSystem + + +class MakoTemplates(TemplateSystem): + """Wrapper for Mako templates.""" + + name = "mako" + + lookup = None + cache = {} + + def get_deps(self, filename): + text = util.read_file(filename) + lex = lexer.Lexer(text=text, filename=filename) + lex.parse() + + deps = [] + for n in lex.template.nodes: + if getattr(n, 'keyword', None) == "inherit": + deps.append(n.attributes['file']) + # TODO: include tags are not handled + return deps + + def set_directories(self, directories): + """Createa template lookup.""" + cache_dir = os.path.join('cache', '.mako.tmp') + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + self.lookup = TemplateLookup( + directories=directories, + module_directory=cache_dir, + output_encoding='utf-8', + ) + + def render_template(self, template_name, output_name, context): + """Render the template into output_name using context.""" + + template = self.lookup.get_template(template_name) + data = template.render_unicode(**context) + if output_name is not None: + try: + os.makedirs(os.path.dirname(output_name)) + except: + pass + with open(output_name, 'w+') as output: + output.write(data) + return data + + def template_deps(self, template_name): + """Returns filenames which are dependencies for a template.""" + # We can cache here because depedencies should + # not change between runs + if self.cache.get(template_name, None) is None: + template = self.lookup.get_template(template_name) + dep_filenames = self.get_deps(template.filename) + deps = [template.filename] + for fname in dep_filenames: + deps += self.template_deps(fname) + self.cache[template_name] = tuple(deps) + return list(self.cache[template_name]) |
