From 0f2c04e70a0ffdd0892d6970cafbcd952d221db5 Mon Sep 17 00:00:00 2001 From: Agustin Henze Date: Wed, 12 Dec 2012 20:15:48 -0300 Subject: Imported Upstream version 5 --- nikola/PyRSS2Gen.py | 10 +- nikola/__init__.py | 1 + nikola/data/samplesite/conf.py | 10 +- nikola/data/samplesite/dodo.py | 21 - nikola/data/samplesite/stories/manual.txt | 809 +------ nikola/data/samplesite/stories/theming.txt | 237 +-- nikola/data/themes/default/messages/de.py | 16 +- nikola/data/themes/default/messages/en.py | 14 +- nikola/data/themes/default/messages/es.py | 16 +- nikola/data/themes/default/messages/fr.py | 10 +- nikola/data/themes/default/messages/gr.py | 14 +- nikola/data/themes/default/messages/it.py | 16 +- nikola/data/themes/default/messages/ru.py | 14 +- nikola/data/themes/default/templates/base.tmpl | 2 +- nikola/data/themes/default/templates/gallery.tmpl | 10 + nikola/data/themes/default/templates/index.tmpl | 6 +- nikola/data/themes/default/templates/post.tmpl | 6 +- .../data/themes/jinja-default/templates/base.tmpl | 2 +- .../themes/jinja-default/templates/gallery.tmpl | 12 +- .../data/themes/jinja-default/templates/index.tmpl | 6 +- .../data/themes/jinja-default/templates/post.tmpl | 6 +- nikola/data/themes/site/templates/post.tmpl | 8 +- nikola/filters.py | 8 +- nikola/jinja_templates.py | 37 - nikola/mako_templates.py | 65 - nikola/md.py | 29 - nikola/nikola.py | 1479 ++----------- nikola/plugin_categories.py | 85 + nikola/plugins/command_bootswatch_theme.plugin | 10 + nikola/plugins/command_bootswatch_theme.py | 47 + nikola/plugins/command_build.plugin | 10 + nikola/plugins/command_build.py | 32 + nikola/plugins/command_check.plugin | 10 + nikola/plugins/command_check.py | 109 + nikola/plugins/command_deploy.plugin | 9 + nikola/plugins/command_deploy.py | 16 + nikola/plugins/command_import_wordpress.plugin | 10 + nikola/plugins/command_import_wordpress.py | 163 ++ nikola/plugins/command_init.plugin | 10 + nikola/plugins/command_init.py | 34 + nikola/plugins/command_install_theme.plugin | 10 + nikola/plugins/command_install_theme.py | 62 + nikola/plugins/command_new_post.plugin | 10 + nikola/plugins/command_new_post.py | 100 + nikola/plugins/command_serve.plugin | 10 + nikola/plugins/command_serve.py | 40 + nikola/plugins/compile_html.plugin | 10 + nikola/plugins/compile_html.py | 20 + nikola/plugins/compile_markdown.plugin | 10 + nikola/plugins/compile_markdown/__init__.py | 33 + nikola/plugins/compile_rest.plugin | 10 + nikola/plugins/compile_rest/__init__.py | 79 + .../compile_rest/pygments_code_block_directive.py | 401 ++++ nikola/plugins/compile_rest/youtube.py | 33 + nikola/plugins/task_archive.plugin | 10 + nikola/plugins/task_archive.py | 77 + nikola/plugins/task_copy_assets.plugin | 10 + nikola/plugins/task_copy_assets.py | 35 + nikola/plugins/task_copy_files.plugin | 10 + nikola/plugins/task_copy_files.py | 35 + nikola/plugins/task_create_bundles.plugin | 10 + nikola/plugins/task_create_bundles.py | 85 + nikola/plugins/task_indexes.plugin | 10 + nikola/plugins/task_indexes.py | 81 + nikola/plugins/task_redirect.plugin | 10 + nikola/plugins/task_redirect.py | 48 + nikola/plugins/task_render_galleries.plugin | 10 + nikola/plugins/task_render_galleries.py | 305 +++ nikola/plugins/task_render_listings.plugin | 10 + nikola/plugins/task_render_listings.py | 81 + nikola/plugins/task_render_pages.plugin | 10 + nikola/plugins/task_render_pages.py | 35 + nikola/plugins/task_render_posts.plugin | 10 + nikola/plugins/task_render_posts.py | 52 + nikola/plugins/task_render_rss.plugin | 10 + nikola/plugins/task_render_rss.py | 41 + nikola/plugins/task_render_sources.plugin | 10 + nikola/plugins/task_render_sources.py | 54 + nikola/plugins/task_render_tags.plugin | 10 + nikola/plugins/task_render_tags.py | 180 ++ nikola/plugins/task_sitemap.plugin | 10 + nikola/plugins/task_sitemap/__init__.py | 62 + nikola/plugins/task_sitemap/sitemap_gen.py | 2241 ++++++++++++++++++++ nikola/plugins/template_jinja.plugin | 9 + nikola/plugins/template_jinja.py | 38 + nikola/plugins/template_mako.plugin | 9 + nikola/plugins/template_mako.py | 68 + nikola/post.py | 4 +- nikola/pygments_code_block_directive.py | 401 ---- nikola/rest.py | 78 - nikola/sitemap_gen.py | 2240 ------------------- nikola/utils.py | 150 +- nikola/wordpress.py | 134 -- nikola/youtube.py | 33 - 94 files changed, 5331 insertions(+), 5622 deletions(-) delete mode 100755 nikola/data/samplesite/dodo.py mode change 100644 => 120000 nikola/data/samplesite/stories/manual.txt mode change 100644 => 120000 nikola/data/samplesite/stories/theming.txt delete mode 100644 nikola/jinja_templates.py delete mode 100644 nikola/mako_templates.py delete mode 100644 nikola/md.py create mode 100644 nikola/plugin_categories.py create mode 100644 nikola/plugins/command_bootswatch_theme.plugin create mode 100644 nikola/plugins/command_bootswatch_theme.py create mode 100644 nikola/plugins/command_build.plugin create mode 100644 nikola/plugins/command_build.py create mode 100644 nikola/plugins/command_check.plugin create mode 100644 nikola/plugins/command_check.py create mode 100644 nikola/plugins/command_deploy.plugin create mode 100644 nikola/plugins/command_deploy.py create mode 100644 nikola/plugins/command_import_wordpress.plugin create mode 100644 nikola/plugins/command_import_wordpress.py create mode 100644 nikola/plugins/command_init.plugin create mode 100644 nikola/plugins/command_init.py create mode 100644 nikola/plugins/command_install_theme.plugin create mode 100644 nikola/plugins/command_install_theme.py create mode 100644 nikola/plugins/command_new_post.plugin create mode 100644 nikola/plugins/command_new_post.py create mode 100644 nikola/plugins/command_serve.plugin create mode 100644 nikola/plugins/command_serve.py create mode 100644 nikola/plugins/compile_html.plugin create mode 100644 nikola/plugins/compile_html.py create mode 100644 nikola/plugins/compile_markdown.plugin create mode 100644 nikola/plugins/compile_markdown/__init__.py create mode 100644 nikola/plugins/compile_rest.plugin create mode 100644 nikola/plugins/compile_rest/__init__.py create mode 100644 nikola/plugins/compile_rest/pygments_code_block_directive.py create mode 100644 nikola/plugins/compile_rest/youtube.py create mode 100644 nikola/plugins/task_archive.plugin create mode 100644 nikola/plugins/task_archive.py create mode 100644 nikola/plugins/task_copy_assets.plugin create mode 100644 nikola/plugins/task_copy_assets.py create mode 100644 nikola/plugins/task_copy_files.plugin create mode 100644 nikola/plugins/task_copy_files.py create mode 100644 nikola/plugins/task_create_bundles.plugin create mode 100644 nikola/plugins/task_create_bundles.py create mode 100644 nikola/plugins/task_indexes.plugin create mode 100644 nikola/plugins/task_indexes.py create mode 100644 nikola/plugins/task_redirect.plugin create mode 100644 nikola/plugins/task_redirect.py create mode 100644 nikola/plugins/task_render_galleries.plugin create mode 100644 nikola/plugins/task_render_galleries.py create mode 100644 nikola/plugins/task_render_listings.plugin create mode 100644 nikola/plugins/task_render_listings.py create mode 100644 nikola/plugins/task_render_pages.plugin create mode 100644 nikola/plugins/task_render_pages.py create mode 100644 nikola/plugins/task_render_posts.plugin create mode 100644 nikola/plugins/task_render_posts.py create mode 100644 nikola/plugins/task_render_rss.plugin create mode 100644 nikola/plugins/task_render_rss.py create mode 100644 nikola/plugins/task_render_sources.plugin create mode 100644 nikola/plugins/task_render_sources.py create mode 100644 nikola/plugins/task_render_tags.plugin create mode 100644 nikola/plugins/task_render_tags.py create mode 100644 nikola/plugins/task_sitemap.plugin create mode 100644 nikola/plugins/task_sitemap/__init__.py create mode 100755 nikola/plugins/task_sitemap/sitemap_gen.py create mode 100644 nikola/plugins/template_jinja.plugin create mode 100644 nikola/plugins/template_jinja.py create mode 100644 nikola/plugins/template_mako.plugin create mode 100644 nikola/plugins/template_mako.py delete mode 100644 nikola/pygments_code_block_directive.py delete mode 100644 nikola/rest.py delete mode 100755 nikola/sitemap_gen.py delete mode 100644 nikola/wordpress.py delete mode 100644 nikola/youtube.py (limited to 'nikola') diff --git a/nikola/PyRSS2Gen.py b/nikola/PyRSS2Gen.py index 6c4bda3..198ebb5 100644 --- a/nikola/PyRSS2Gen.py +++ b/nikola/PyRSS2Gen.py @@ -1,5 +1,7 @@ """PyRSS2Gen - A Python library for generating RSS 2.0 feeds.""" +# flake8: noqa + __name__ = "PyRSS2Gen" __version__ = (1, 0, 0) __author__ = "Andrew Dalke " @@ -7,11 +9,7 @@ __author__ = "Andrew Dalke " _generator_name = __name__ + "-" + ".".join(map(str, __version__)) import datetime -try: - import cStringIO - StringIO = cStringIO -except ImportError: - import StringIO +import io # Could make this the base class; will need to add 'publish' class WriteXmlMixin: @@ -23,7 +21,7 @@ class WriteXmlMixin: handler.endDocument() def to_xml(self, encoding = "iso-8859-1"): - f = StringIO.StringIO() + f = io.StringIO() self.write_xml(f, encoding) return f.getvalue() diff --git a/nikola/__init__.py b/nikola/__init__.py index e69de29..3b6ad2a 100644 --- a/nikola/__init__.py +++ b/nikola/__init__.py @@ -0,0 +1 @@ +from nikola import Nikola # NOQA diff --git a/nikola/data/samplesite/conf.py b/nikola/data/samplesite/conf.py index 4389f03..552eb68 100755 --- a/nikola/data/samplesite/conf.py +++ b/nikola/data/samplesite/conf.py @@ -52,11 +52,11 @@ post_pages = ( # 'rest' is reStructuredText # 'markdown' is MarkDown # 'html' assumes the file is html and just copies it -#post_compilers = { -# "rest": ('.txt', '.rst'), -# "markdown": ('.md', '.mdown', '.markdown') -# "html": ('.html', '.htm') -# } +post_compilers = { + "rest": ('.txt', '.rst'), + "markdown": ('.md', '.mdown', '.markdown'), + "html": ('.html', '.htm') + } # Nikola is multilingual! # diff --git a/nikola/data/samplesite/dodo.py b/nikola/data/samplesite/dodo.py deleted file mode 100755 index 1be7663..0000000 --- a/nikola/data/samplesite/dodo.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Please don't edit this file unless you really know what you are doing. -# The configuration is now in conf.py - -from doit.reporter import ExecutedOnlyReporter - -from nikola.nikola import Nikola - -import conf - -DOIT_CONFIG = { - 'reporter': ExecutedOnlyReporter, - 'default_tasks': ['render_site'], -} -SITE = Nikola(**conf.__dict__) - - -def task_render_site(): - return SITE.gen_tasks() diff --git a/nikola/data/samplesite/stories/manual.txt b/nikola/data/samplesite/stories/manual.txt deleted file mode 100644 index f8804e6..0000000 --- a/nikola/data/samplesite/stories/manual.txt +++ /dev/null @@ -1,808 +0,0 @@ -The Nikola Handbook -=================== - -:Version: 2.1+svn -:Author: Roberto Alsina - -.. class:: alert alert-info pull-right - -.. contents:: - - -All You Need to Know --------------------- - -After you have Nikola installed: - -Create a site: - ``nikola init mysite`` - -Create a post: - ``doit new_post`` - -Edit the post: - The filename should be in the output of the previous command. - -Build the site: - ``doit`` - -Start the test server: - ``doit serve`` - -See the site: - http://127.0.0.1:8000 - -That should get you going. If you want to know more, this manual will always be here -for you. - -DON'T READ THIS MANUAL. IF YOU NEED TO READ IT I FAILED, JUST USE THE THING. - -On the other hand, if anything about Nikola is not as obvious as it should be, by all -means tell me about it :-) - -What's Nikola and what can you do with it? ------------------------------------------- - -Nikola is a static website and blog generator. The very short explanation is -that it takes some texts you wrote, and uses them to create a folder full -of HTML files. If you upload that folder to a server, you will have a -rather full-featured website, done with little effort. - -It's original goal is to create blogs, but it supports most kind of sites, and -can be used as a CMS, as long as what you present to the user is your own content -instead of something the user generates. - -Nikola can do: - -* A blog (`example `__) -* Your company's site -* Your personal site -* A software project's site (`example `__) -* A book's site - -Since Nikola-based sites don't run any code on the server, there is no way to process -user input in forms. - -Nikola can't do: - -* Twitter -* Facebook -* An Issue tracker -* Anything with forms, really (except for comments_!) - -Keep in mind that "static" doesn't mean **boring**. You can have animations, slides -or whatever fancy CSS/HTML5 thingie you like. It only means all that HTML is -generated already before being uploaded. On the other hand, Nikola sites will -tend to be content-heavy. What Nikola is good at is at putting what you write -out there. - -Getting Help ------------- - -* Feel free to contact me at ralsina@netmanagers.com.ar for questions about Nikola. -* You can file bugs at `the issue tracker `__ -* You can discuss Nikola at the `nikola-discuss google group `_ -* You can subscribe to `the Nikola Blog `_ -* You can follow `Nikola on Twitter `_ - -Why Static? ------------ - -Most "modern" websites are *dynamic* in the sense that the contents of the site -live in a database, and are converted into presentation-ready HTML only when a -user wants to see the page. That's great. However, it presents some minor issues -that static site generators try to solve. - -In a static site, the whole site, every page, *everything*, is created before -the first user even sees it and uploaded to the server as a simple folder full -of HTML files (and images, CSS, etc). - -So, let's see some reasons for using static sites: - -Security - Dynamic sites are prone to experience security issues. The solution for that - is constant vigilance, keeping the software behind the site updated, and - plain old good luck. The stack of software used to provide a static site, - like those Nikola generates, is much smaller (Just a webserver). - - A smaller software stack implies less security risk. - -Obsolescense - If you create a site using (for example) Wordpress, what happens when Wordpress - releases a new version? You have to update your Wordpress. That is not optional, - because of security and support issues. If I release a new version of Nikola, and - you don't update, *nothing* happens. You can continue to use the version you - have now forever, no problems. - - Also, in the longer term, the very foundations of dynamic sites shift. Can you - still deploy a blog software based on Django 0.96? What happens when your - host stops supporting the php version you rely on? And so on. - - You may say those are long term issues, or that they won't matter for years. Well, - I believe things should work forever, or as close to it as we can make them. - Nikola's static output and its input files will work as long as you can install - a Python > 2.5 (soon 3.x) in a Linux, Windows, or Mac and can find a server - that sends files over HTTP. That's probably 10 or 15 years at least. - - Also, static sites are easily handled by the Internet Archive. - -Cost and Performance - On dynamic sites, every time a reader wants a page, a whole lot of database - queries are made. Then a whole pile of code chews that data, and HTML is - produced, which is sent to the user. All that requires CPU and memory. - - On a static site, the highly optimized HTTP server reads the file from disk - (or, if it's a popular file, from disk cache), and sends it to the user. You could - probably serve a bazillion (technical term) pageviews from a phone using - static sites. - -Lockin - On server-side blog platforms, sometimes you can't export your own data, or - it's in strange formats you can't use in other services. I have switched - blogging platforms from Advogato to PyCs to two homebrewed systems, to Nikola, - and have never lost a file, a URL, or a comment. That's because I have *always* - had my own data in a format of my choice. - - With Nikola, you own your files, and you can do anything with them. - -Features --------- - -Nikola has a very defined featureset: it has every feature I needed for my own sites. -Hopefully, it will be enough for others, and anyway, I am open to suggestions. - -If you want to create a blog or a site, Nikola provides: - -* Front page (and older posts pages) -* RSS Feeds -* Pages and feeds for each tag you used -* Custom search -* Full yearly archives -* Custom output paths for generated pages -* Easy page template customization -* Static pages (not part of the blog) -* Internationalization support (my own blog is English/Spanish) -* Google sitemap generation -* Custom deployment (if it's a command, you can use it) -* A (very) basic look and feel you can customize, and is even text-mode friendly -* The input format is light markup (`reStructuredText `_ or - `Markdown `_) -* Easy-to-create image galleries - -Also: - -* A preview webserver -* "Live" re-rendering while you edit -* "Smart" builds: only what changed gets rebuilt (usually in 1 or 2 seconds) -* Very easy to extend with minimal Python knowledge. - -Installing Nikola ------------------ - -This is currently lacking on detail. Considering the niche Nikola is aimed at, -I suspect that's not a problem yet. So, when I say "get", the specific details -of how to "get" something for your specific operating system are left to you. - -The short version is: ``pip install https://github.com/ralsina/nikola/zipball/master`` - -Longer version: - -#. Get python, if you don't have it. -#. Get `doit `_ -#. Get `docutils `_ -#. Get `Mako `_ -#. Get `PIL `_ -#. Get `Pygments `_ -#. Get `unidecode `_ -#. Get `lxml `_ - -Any non-prehistorical version of the above should work, and if you are in Linux -you can try to use your distribution's packages if they exist, but the newer the better. - -Then get Nikola itself (), unzip it, and -run ``python setup.py install``. - -After that, run ``nikola init sitename`` and that will create a folder called -``sitename`` containing a functional demo site. - -Getting Started ---------------- - -To create posts and pages in Nikola, you write them in restructured text or Markdown, light -markups that are later converted to HTML (I may add support for textile or other -markups later). There is a great `quick tutorial to learn restructured text. `_ - -First, let's see how you "build" your site. Nikola comes with a minimal site to get you started. - -The tool used to do builds is called `doit `_, and it rebuilds the -files that are not up to date, so your site always reflects your latest content. To do our -first build, just run "doit":: - - $ doit - Parsing metadata - . render_posts:stories/manual.html - . render_posts:posts/1.html - . render_posts:stories/1.html - . render_archive:output/2012/index.html - . render_archive:output/archive.html - . render_indexes:output/index.html - . render_pages:output/posts/welcome-to-nikola.html - . render_pages:output/stories/about-nikola.html - . render_pages:output/stories/handbook.html - . render_rss:output/rss.xml - . render_sources:output/stories/about-nikola.txt - : - : - : - -Nikola will print a line for every output file it generates. If we do it again, that -will be much much shorter:: - - $ doit - Parsing metadata - . sitemap - -That is because `doit `_ is smart enough not to generate -all the pages again, unless you changed something that the page requires. So, if you change -the text of a post, or its title, that post page, and all index pages where it is mentioned, -will be recreated. If you change the post page template, then all the post pages will be rebuilt. - -Nikola is a series of doit *tasks*, and you can see them by doing ``doit list``:: - - $ doit list - Scanning posts . . done! - copy_assets Create tasks to copy the assets of the whole theme chain. - copy_files Copy static files into the output folder. - deploy Deploy site. - new_page Create a new post (interactive). - new_post Create a new post (interactive). - redirect Generate redirections. - render_archive Render the post archives. - render_galleries Render image galleries. - render_indexes Render 10-post-per-page indexes. - render_pages Build final pages from metadata and HTML fragments. - render_posts Build HTML fragments from metadata and reSt. - render_rss Generate RSS feeds. - render_site Render the post archives. - render_sources Publish the rst sources because why not? - render_tags Render the tag pages. - serve Start test server. (Usage: doit serve [--address 127.0.0.1] [--port 8000]) - sitemap Generate Google sitemap. - -You can make Nikola redo everything by calling ``doit clean``, you can make it do just a specific -part of the site using task names, for example ``doit render_pages``, and even individual files like -``doit render_indexes:output/index.html`` - -The ``serve`` task is special, in that instead of generating a file it starts a web server so -you can see the site you are creating:: - - $ doit serve - Parsing metadata - . serve - Serving HTTP on 127.0.0.1 port 8000 ... - -After you do this, you can point your web browser to http://localhost:8000 and you should see -the sample site. This is useful as a "preview" of your work. You can combine add ``auto`` and do -``doit auto serve`` which makes doit automatically regenerate your pages as needed, and -it's a live preview! - -By default, the ``serve`` task runs the web server on port 8000 on the IP address 127.0.0.1. -You can pass in an IP address and port number explicity using ``-a IP_ADDRESS`` -(short version of ``--address``) or ``-p PORT_NUMBER`` (short version of ``--port``) -Example usage:: - - $ doit serve --address 0.0.0.0 --port 8080 - Parsing metadata - . serve - Serving HTTP on 0.0.0.0 port 8080 ... - -The ``deploy`` task is discussed in the Deployment_ section. - -Creating a Blog Post --------------------- - -A post consists of two files, a metadata file (``post-title.meta``) and a -file containing the contents written in `restructured text `_ -(``post-title.txt``), markdown or HTML. Which input type is used is guessed using -the ``post_compilers`` option in ``conf.py`` but by default, the extensions -supported are: - -.txt .rst - Restructured Text - -.md .markdown .mdown - Markdown - -.htm .html - HTML - -The default configuration expects them to be placed in ``posts`` but that can be -changed (see below, the post_pages option) - -You can just create them in ``posts`` or use a little helper task provided by Nikola:: - - $ doit new_post - Parsing metadata - . new_post - Creating New Post - ----------------- - - Enter title: How to Make Money - Your post's metadata is at: posts/how-to-make-money.meta - Your post's text is at: posts/how-to-make-money.txt - -The format for the ``.meta`` file is as follows:: - - How to Make Money - how-to-make-money - 2012/04/09 13:59 - -The first line is the title. The second one is the pagename. Since often titles will have -characters that look bad on URLs, it's generated as a "clean" version of the title. -The third line is the post's date, and is set to "now". - -You can add three more optional lines. A fourth line that is a list of tags -separated with commas (spaces around the commas are ignored):: - - programming, python, fame, fortune - -And a fifth line that's a URL for an original source of the post. - -And a sixth line that's the page description. - -If you are writing a multilingual site, you can also create a per-language -metadata file. This one can have two lines: - -1) The translated title for the post or page -2) A translated version of the pagename - -You can edit these files with your favourite text editor, and once you are happy -with the contents, generate the pages as explained in `Getting Started`_ - -Currently supported languages are - -* English -* Spanish -* French -* German -* Russian -* Greek. - -If you wish to add support for more languages, check out the instructions -at the `theming guide `. - -The post page is generated using the ``post.tmpl`` template, which you can use -to customize the output. - -The place where the post will be placed by ``new_post`` is based on the ``post_pages`` -configuration option:: - - # post_pages contains (wildcard, destination, template, use_in_feed) tuples. - # - # The wildcard is used to generate a list of reSt source files (whatever/thing.txt) - # That fragment must have an associated metadata file (whatever/thing.meta), - # and opcionally translated files (example for spanish, with code "es"): - # whatever/thing.txt.es and whatever/thing.meta.es - # - # From those files, a set of HTML fragment files will be generated: - # cache/whatever/thing.html (and maybe cache/whatever/thing.html.es) - # - # These files are combinated with the template to produce rendered - # pages, which will be placed at - # output / TRANSLATIONS[lang] / destination / pagename.html - # - # where "pagename" is specified in the metadata file. - # - # if use_in_feed is True, then those posts will be added to the site's - # rss feeds. - # - post_pages = ( - ("posts/*.txt", "posts", "post.tmpl", True), - ("stories/*.txt", "stories", "story.tmpl", False), - ) - -It will use the first location that has the last parameter set to True, or the last -one in the list if all of them have it set to False. - -Alternatively, you can not have a meta file and embed the metadata in the post itself. - -In restructured text:: - - .. tags: test,demo - .. slug: demo-test - .. date: 2012/04/09 13:59 - .. link: http://foo.bar/baz - -In Markdown: - - -Teasers -~~~~~~~ - -If for any reason you want to provide feeds that only display the beginning of -your post, you only need to add a "magical comment" in your post. - -In restructuredtext:: - - .. TEASER_END - -In Markdown:: - - - -Additionally, if you want also the "index" pages to show only the teasers, you can -set the variable ``INDEX_TEASERS`` to ``True`` in ``conf.py``. - -Drafts -~~~~~~ - -If you add a "draft" tag to a post, then it will not be shown in indexes and feeds. -It *will* be compiled, and if you deploy it it *will* be made available, so use -with care. - - -Creating a Page ---------------- - -Pages are the same as posts, except that: - -* They are not added to the front page -* They don't appear on the RSS feed -* They use the ``story.tmpl`` template instead of ``post.tmpl`` by default - -The default configuration expects the page's metadata and text files to be on the -``stories`` folder, but that can be changed (see post_pages option above). - -You can create the page's files manually or use the helper ``new_page`` that works exactly like -the ``new_post`` described above, except it will place the files in the folder that -has ``use_in_feed`` set to False. - -Redirections ------------- - -If you need a page to be available in more than one place, you can define redirections -in your ``conf.py``:: - - # A list of redirection tuples, [("foo/from.html", "/bar/to.html")]. - # - # A HTML file will be created in output/foo/from.html that redirects - # to the "/bar/to.html" URL. notice that the "from" side MUST be a - # relative URL. - # - # If you don't need any of these, just set to [] - - REDIRECTIONS = [("index.html", "/weblog/index.html")] - -It's better if you can do these using your web server's configuration, but if -you can't, this will work. - -Configuration -------------- - -The configuration file is called ``conf.py`` and can be used to customize a lot of -what Nikola does. Its syntax is python, but if you don't know the language, it -still should not be terribly hard to grasp. - -The default ``conf.py`` you get with Nikola should be fairly complete, and is quite -commented, but just in case, here is a full, -`customized example configuration `_ (the one I use for -`my site `_) - -Adding Files ------------- - -Any files you want to be in ``output/`` but are not generated by Nikola (for example, -``favicon.ico``, just put it in ``files/``. Everything there is copied into -``output`` by the ``copy_files`` task. Remember that you can't have files that collide -with files Nikola generates (it will give an error). - -.. admonition:: Important - - Don't put any files manually in ``output/``. Ever. Really. Maybe someday Nikola - will just wipe ``output/`` and then you will be sorry. So, please don't do that. - -If you want to copy more than one folder of static files into ``output`` you can -change the FILES_FOLDERS option:: - - # One or more folders containing files to be copied as-is into the output. - # The format is a dictionary of "source" "relative destination". - # Default is: - # FILES_FOLDERS = {'files': '' } - # Which means copy 'files' into 'output' - -Post Processing Filters ------------------------ - -You can apply post processing to the files in your site, in order to optimize them -or change them in arbitrary ways. For example, you may want to compress all CSS -and JS files using yui-compressor. - -To do that, you can use the provided helper adding this in your ``config.py``:: - - from nikola import filters - - FILTERS = { - ".css": [filters.yui_compressor], - ".js": [filters.yui_compressor], - } - -Where ``filters.yui_compressor`` is a helper function provided by Nikola. You can -replace that with strings describing command lines, or arbitrary python functions. - -If there's any specific thing you expect to be generally useful as a filter, contact -me and I will add it to the filters library so that more people use it. - -Customizing Your Site ---------------------- - -There are lots of things you can do to persoalize your website, but let's see the easy ones! - -Basics - You can assume this needs to be changed:: - - # Data about this site - BLOG_TITLE = "Demo Site" - BLOG_URL = "http://nikola.ralsina.com.ar" - BLOG_EMAIL = "joe@demo.site" - BLOG_DESCRIPTION = "This is a demo site for Nikola." - -CSS tweaking - The default configuration includes a file, ``themes/default/assets/css/custom.css`` - which is empty. Put your CSS there, for minimal disruption of the provided CSS files. - - If you feel tempted to touch other files in assets, you probably will be better off - with a `custom theme `_. - -Template tweaking - If you really want to change the pages radically, you will want to do a - `custom theme `_. - - -Sidebar - ``LICENSE`` is a HTML snippet for things like a CC badge, or whatever you prefer. - - The 'sidebar_links' option lets you define what links go in the right-hand - sidebar, so you can link to important pages, or to other sites. - - The ``SEARCH_FORM`` option contains the HTML code for a search form based on - duckduckgo.com which should always work, but feel free to change it to - something else. - -Footer - ``CONTENT_FOOTER`` is displayed, small at the bottom of all pages, I use it for - the copyright notice. - -Analytics - This is probably a misleading name, but the ``ANALYTICS`` option lets you define - a HTML snippet that will be added at the bottom of body. The main usage is - a Google analytics snippet or something similar, but you can really put anything - there. - -Getting More Themes -------------------- - -There are not so many themes for Nikola. On occasion, I port something I like, and make -it available for download. Nikola has a builtin theme download/install mechanism, its -``install_theme`` task:: - - $ doit install_theme -l - Scanning posts . . done! - . install_theme - Themes: - ------- - blogtxt - readable - - $ doit install_theme -n blogtxt - Scanning posts . . done! - . install_theme - Downloading: http://nikola.ralsina.com.ar/themes/blogtxt.zip - Extracting: blogtxt into themes - -And there you are, you now have themes/blogtxt installed. It's very rudimentary, but it -should work in most cases. - -If you create a nice theme, please share it! You can post about it on -`the nikola forum `_ and I will -make it available for download. - -One other option is to tweak an existing theme using a different color scheme, -typography and CSS in general. Nikola provides a ``bootswatch_theme`` option -to create a custom theme by downloading free CSS files from http://bootswatch.com:: - - $ doit bootswatch_theme -n custom_theme -s spruce -p site - Scanning posts . . done! - . bootswatch_theme - Creating custom_theme theme from spruce and site - Downloading: http://bootswatch.com/spruce/bootstrap.min.css - Downloading: http://bootswatch.com/spruce/bootstrap.css - Theme created. Change the THEME setting to "custom_theme" to use it. - -You can even try what different swatches do on an existing site using -their handy `bootswatchlet `_ - -Play with it, there's cool stuff there. This feature was suggested by -`clodo `_. - -Deployment ----------- - -Nikola doesn't really have a concept of deployment. However, if you can specify your -deployment procedure as a series of commands, you can put them in the ``DEPLOY_COMMANDS`` -option, and run them with ``doit deploy``. - -One caveat is that if any command has a % in it, you should double them. - -Here is an example, from my own site's deployment script:: - - DEPLOY_COMMANDS = [ - 'rsync -rav --delete output/* ralsina@lateral.netmanagers.com.ar:/srv/www/lateral', - 'rdiff-backup output ~/bartleblog-backup', - "links -dump 'http://www.twingly.com/ping2?url=lateral.netmanagers.com.ar'", - 'rsync -rav ~/bartleblog-backup/* ralsina@netmanagers.com.ar:bartleblog-backup', - ] - -Other interesting ideas are using -`git as a deployment mechanism `_ (or any other VCS -for that matter), using `lftp mirror `_ or unison, or dropbox, or -Ubuntu One. Any way you can think of to copy files from one place to another is good enough. - -Comments --------- - -While Nikola creates static sites, there is a minimum level of user interaction you -are probably expecting: comments. - -The default templates contain support for `Disqus `_. All you have -to do is register a forum, put its short name in the ``DISQUS_FORUM`` option. - -Disqus is a good option because: - -1) It doesn't require any server-side software on your site -2) They offer you a way to export your comments, so you can take - them with you if you need to. -3) It's free. -4) It's damn nice. - -.. admonition:: Important - - In some cases, when you run the test site, you won't see the comments. - That can be fixed by adding the disqus_developer flag to the templates - but it's probably more trouble than it's worth. - - -Image Galleries ---------------- - -To create an image gallery, all you have to do is add a folder inside ``galleries``, -and put images there. Nikola will take care of creating thumbnails, index page, etc. - -If you click on images on a gallery, you should see a bigger image, thanks to -the excellent `colorbox `_ - -The gallery pages are generated using the ``gallery.tmpl`` template, and you can -customize it there (you could switch to another lightbox instead of colorbox, change -its settings, change the layout, etc.). - -The ``conf.py`` options affecting gallery pages are these:: - - # Galleries are folders in galleries/ - # Final location of galleries will be output / GALLERY_PATH / gallery_name - GALLERY_PATH = "galleries" - THUMBNAIL_SIZE = 180 - MAX_IMAGE_SIZE = 1280 - USE_FILENAME_AS_TITLE = True - -If you add a file in ``galleries/gallery_name/index.txt`` its contents will be -converted to HTML and inserted above the images in the gallery page. - -If you add some image filenames in ``galleries/gallery_name/exclude.meta``, they -will be excluded in the gallery page. - -If ``USE_FILENAME_AS_TITLE`` is True the filename (parsed as a readable string) -is used as the photo caption. If the filename starts with a number, it will -be stripped. For example ``03_an_amazing_sunrise.jpg`` will be render as *An amazing sunrise*. - -Here is a `demo gallery `_ of historic, public domain Nikola -Tesla pictures taken from `this site `_. - -Optimizing Your Website ------------------------ - -One of the main goals of Nikola is to make your site fast and light. So here are a few -tips we have found when setting up Nikola with Apache. If you have more, or -different ones, or about other webservers, please share! - -#. Use a speed testing tool. I used Yahoo's YSlow but you can use any of them, and - it's probably a good idea to use more than one. - -#. Enable compression in Apache:: - - AddOutputFilterByType DEFLATE text/html text/plain text/xml text/css - -#. If even after you did the previous step the CSS files are not sent compressed:: - - AddType text/css .css - -In the future we will be adding HTML/CSS/JS minimization and image recompression but -that's not there yet, so you may want to use 3rd party tools to achieve that. - -Restructured Text Extensions ----------------------------- - -Nikola includes support for a few directives that are not part of docutils, but which -we think are handy for website development. - -Youtube -~~~~~~~ - -To link to a youtube video, you need the id of the video. For example, if the -URL of the video is http://www.youtube.com/watch?v=8N_tupPBtWQ what you need is -**8N_tupPBtWQ** - -Once you have that, all you need to do is:: - - .. youtube:: 8N_tupPBtWQ - -code-block -~~~~~~~~~~ - -This is a somewhat complicated directive to display code nicely. You can just -embed code like this:: - - .. code-block:: python - - print "Hello World!" - -Or you can include the code from a file: - - .. code-block:: python - :include: /foo/bar/baz.py - -listing -~~~~~~~ - -To use this, you have to put your source code files inside ``listings`` or whatever your -``LISTINGS_FOLDER`` variable is set to. Assuming you have a ``foo.py`` inside that folder:: - - .. listing:: foo.py - -Will include the source code from ``foo.py`` and also create a ``listings/foo.py.html`` page -and the listing will have a title linking to it. - -Advanced Code Options -~~~~~~~~~~~~~~~~~~~~~ - -Both code-block and listing support a number of options, including these: - -start-at - A string, the diplayed code will start when it finds this -end-at - A string, the diplayed code will end when it finds this -start-after - A string, the diplayed code will start in the line after this -end-before - A string, the diplayed code will end in the line before this -linenos - Display line numbers -linenos_offset - Use the original file's line numbers (warning: broken) -tab-width - Size of the tabs (default 4) - -License -------- - -Nikola is released under the `GPL version 3 `_ which -is a free software license. Some components shipped along with Nikola, or required by it are -released under other licenses. - -If you are not familiar with free software licensing: In general, you should be able to -do pretty much anything you want, unless you modify Nikola. If you modify it, and share -it with someone else, that someone else should get all your modifications under the same -license you got it. diff --git a/nikola/data/samplesite/stories/manual.txt b/nikola/data/samplesite/stories/manual.txt new file mode 120000 index 0000000..9992900 --- /dev/null +++ b/nikola/data/samplesite/stories/manual.txt @@ -0,0 +1 @@ +../../../../docs/manual.txt \ No newline at end of file diff --git a/nikola/data/samplesite/stories/theming.txt b/nikola/data/samplesite/stories/theming.txt deleted file mode 100644 index 339ecd4..0000000 --- a/nikola/data/samplesite/stories/theming.txt +++ /dev/null @@ -1,236 +0,0 @@ -Theming Nikola -============== - -:Version: 2.1+svn -:Author: Roberto Alsina - -.. class:: alert alert-info pull-right - -.. contents:: - -The Structure -------------- - -Themes are located in the ``themes`` folder where Nikola is installed, one folder per theme. -The folder name is the theme name. - -A Nikola theme consists of three folders: - -assets - This is where you would put your CSS, Javascript and image files. It will be copied - into ``output/assets`` when you build the site, and the templates will contain - references to them. - - The included themes use `Bootstrap `_ - and `Colorbox `_ so they are in assets, - along with CSS files for syntax highligting and reStructuredText, and a - minified copy of jQuery. - - If you want to base your theme on other frameworks (or on no framework at all) - just remember to put there everything you need for deployment. - -templates - This contains the templates used to generate the pages. While Nikola will use a - certain set of template names by default, you can add others for specific parts - of your site. - -messages - Nikola tries to be multilingual. This is where you put the strings for your theme - so that it can be translated into other languages. - -And these optional files: - -parent - A text file that, on its first line, contains the name of the **parent theme**. - Any resources missing on this theme, will be looked up in the parent theme - (and then in the grandparent, etc). - - The ``parent`` is so you don't have to create a full theme each time: just create an - empty theme, set the parent, and add the bits you want modified. - -engine - A text file which, on the first line, contains the name of the template engine - this theme needs. Currently supported values are "mako" and "jinja". - If this file is not given, "mako" is assumed. - -bundles - A text file containing a list of files to be turned into bundles using WebAssets. - For example:: - - assets/css/all.css=bootstrap.css,bootstrap-responsive.css,rst.css,code.css,colorbox.css,custom.css - - This creates a file called "assets/css/all.css" in your output that is the - combination of all the other file paths, relative to the output file. - This makes the page much more efficient because it avoids multiple connections to the server, - at the cost of some extra difficult debugging. - - WebAssets supports bundling CSS and JS files. - - Templates should use either the bundle or the individual files based on the ``use_bundles`` - variable, which in turn is set by the ``USE_BUNDLES`` option. - -Creating a New Theme --------------------- - -In your site's folder, create a ``themes`` folder. Choose a theme to start from, and -create ``themes/yourthemename/parent`` as a file containing the parent theme's name. -There, you just created a new theme. Of course it looks exactly like the other one, -so let's customize it. - -Templates ---------- - -In templates there is a number of files whose name ends in ``.tmpl``. Those are the -theme's page templates. They are done usig the `Mako `_ -template language. If you want to do a theme, you should learn the Mako syntax first. - -Mako has a nifty concept of template inheritance. That means that, a -template can inherit from another and only change small bits of the output. For example, -``base.tmpl`` defines the whole layout for a page but has only a placeholder for content -so ``post.tmpl`` only define the content, and the layout is inherited from ``base.tmpl``. - -These are the templates that come with the included themes: - -base.tmpl - This template defines the basic page layout for the site. It's mostly plain HTML - but defines a few blocks that can be re-defined by inheriting templates: - - * ``extra_head`` is a block that is added before ````, (ex: for adding extra CSS) - * ``belowtitle`` is used by default to display a list of translations but you can put - anything there. - * ``content`` is where the inheriting templates will place the main content of the page. - * ``permalink`` is an absolute path to the page (ex: "/archive/index.html") - - This template always receives the following variables you can use: - - * ``lang`` is the laguage for this page. - * ``title`` is the page's title. - * ``description`` is the page's description. - * ``blog_title`` is the blog's title. - * ``blog_author`` is the blog's author. - * ``messages`` contains the theme's strings and translations. - * ``_link`` is an utility function to create links to other pages in the site. - It takes three arguments, kind, name, lang: - - kind is one of: - - * tag_index (name is ignored) - * tag (and name is the tag name) - * tag_rss (name is the tag name) - * archive (and name is the year, or None for the main archive index) - * index (name is the number in index-number) - * rss (name is ignored) - * gallery (name is the gallery name) - - The returned value is always an absolute path, like "/archive/index.html". - - * ``rel_link`` converts absolute paths to relative ones. You can use it with - ``_link`` and ``permalink`` to create relative links, which makes the site - able to work when moved inside the server. Example: ``rel_link(permalink, url)`` - - * Anything you put in your ``GLOBAL_CONTEXT`` option in ``dodo.py``. This - usually includes ``sidebar_links``, ``search_form``, and others. - - The included themes use at least these: - - * ``rss_link`` a link to custom RSS feed, although it may be empty) - * ``blog_url`` the URL for your site - * ``blog_title`` the name of your site - * ``content_footer`` things like copyright notices, disclaimers, etc. - * ``license`` a larger license badge - * ``analytics`` google scripts, or any JS you want to tack at the end of the body - of the page. - * ``disqus_forum``: a `Disqus `_ ID you can use to enable comments. - - It's probably a bad idea to do a theme that *requires* more than this (please put - a ``README`` in it saying what the user should add in its ``dodo.py``), but there is no - problem in requiring less. - -post.tmpl - Template used for blog posts. Can use everything ``base.tmpl`` uses, plus: - - * ``post``: a Post object. This has a number of members: - - * ``post.title(language)``: returns a localized title - * ``post.date`` - * ``post.tags``: A list of tags - * ``post.text(language)``: the translated text of the post - * ``post.permalink(language, absolute)``: Link to the post in that language. - If ``absolute`` is ``True`` the link contains the full URL. This is useful - for things like Disqus comment forms. - * ``post.next_post`` is None or a Post object that is next newest in the timeline. - * ``post.prev_post`` is None or a Post object that is next oldest in the timeline. - -story.tmpl - Used for pages that are not part of a blog, usually a cleaner, less - intrusive layout than ``post.tmpl``, but same parameters. - -gallery.tmpl - Template used for image galleries. Can use everything ``base.tmpl`` uses, plus: - - * ``text``: A descriptive text for the gallery. - * ``images``: A list of (thumbnail, image) paths. - -index.tmpl - Template used to render the multipost indexes. Can use everything ``base.tmpl`` uses, plus: - - * ``posts``: a list of Post objects, as described above. - * ``prevlink``: a link to a previous page - * ``nextlink``: a link to the next page - -list.tmpl - Template used to display generic lists of links. Can use everything ``base.tmpl`` uses, plus: - - * ``items``: a list of (text, link) elements. - -You can add other templates for specific pages, which the user can the use in his ``post_pages`` -option in ``dodo.py``. Also, keep in mind that your theme is yours, there is no reason why -you would need to maintain the inheritance as it is, or not require whatever data you want. - -Messages and Translations -------------------------- - -When you modify templates, you may want to add text in them (for example: "About Me"). -Instead of adding the text directly, which makes it impossible to translate to other -languages, add it like this:: - - ${messages[lang]["About Me"]} - -Then, in ``messages/en.py`` add it along the other strings:: - - MESSAGES = [ - u"Posts for year %s", - u"Archive", - u"Posts about %s:", - u"Tags", - u"Also available in: ", - u"More posts about", - u"Posted:", - u"Original site", - u"Read in english", - u"About Me", - ] - -Then, when I want to use your theme in spanish, all I have to do is add a line in ``messages/es.py``:: - - MESSAGES = { - u"LANGUAGE": u"Español", - u"Posts for year %s": u"Posts del año %s", - u"Archive": u"Archivo", - u"Posts about %s:": u"Posts sobre %s", - u"Tags": u"Tags", - u"Also available in: ": u"También disponible en: ", - u"More posts about": u"Más posts sobre", - u"Posted:": u"Publicado:", - u"Original site": u"Sitio original", - u"Read in english": u"Leer en español", - u"About Me": u"Acerca del autor", - } - -And voilá, your theme works in spanish. Don't remove strings from these files even if it seems -your theme is not using them. Some are used internally in Nikola to generate titles and -similar things. - -To create a new translation, just copy one of the existing ones, translate the right side of -every string to your language, save it and send it to me, I will add it to Nikola! - diff --git a/nikola/data/samplesite/stories/theming.txt b/nikola/data/samplesite/stories/theming.txt new file mode 120000 index 0000000..d2dddb6 --- /dev/null +++ b/nikola/data/samplesite/stories/theming.txt @@ -0,0 +1 @@ +../../../../docs/theming.txt \ No newline at end of file diff --git a/nikola/data/themes/default/messages/de.py b/nikola/data/themes/default/messages/de.py index f58b0a1..6e16a21 100644 --- a/nikola/data/themes/default/messages/de.py +++ b/nikola/data/themes/default/messages/de.py @@ -4,16 +4,18 @@ MESSAGES = { u"LANGUAGE": u"Deutsch", u"Posts for year %s": u"Einträge aus dem Jahr %s", u"Archive": u"Archiv", - u"Posts about %s:": u"Einträge über %s", + u"Posts about %s": u"Einträge über %s", u"Tags": u"Tags", - u"Also available in: ": u"Auch verfügbar in: ", + u"Also available in": u"Auch verfügbar in", u"More posts about": u"Weitere Einträge über", - u"Posted:": u"Veröffentlicht:", + u"Posted": u"Veröffentlicht", u"Original site": u"Original-Seite", u"Read in English": u"Auf Deutsch lesen", - u"Older posts →": u"Ältere Einträge →", - u"← Newer posts": u"← Neuere Einträge", - u"← Previous post": u"← Vorheriger Eintrag", - u"Next post →": u"Nächster Eintrag →", + u"Older posts": u"Ältere Einträge", + u"Newer posts": u"Neuere Einträge", + u"Previous post": u"Vorheriger Eintrag", + u"Next post": u"Nächster Eintrag", u"Source": u"Source", + u"Read more": u"Weiterlesen", + u"old posts page %d": u'Vorherige Einträge %d' } diff --git a/nikola/data/themes/default/messages/en.py b/nikola/data/themes/default/messages/en.py index 5a4a9bd..95b1210 100644 --- a/nikola/data/themes/default/messages/en.py +++ b/nikola/data/themes/default/messages/en.py @@ -1,17 +1,17 @@ MESSAGES = [ u"Posts for year %s", u"Archive", - u"Posts about %s:", + u"Posts about %s", u"Tags", - u"Also available in: ", + u"Also available in", u"More posts about", - u"Posted:", + u"Posted", u"Original site", u"Read in English", - u"← Newer posts", - u"Older posts →", - u"← Previous post", - u"Next post →", + u"Newer posts", + u"Older posts", + u"Previous post", + u"Next post", u"old posts page %d", u"Read more", u"Source", diff --git a/nikola/data/themes/default/messages/es.py b/nikola/data/themes/default/messages/es.py index 82d2300..78de676 100644 --- a/nikola/data/themes/default/messages/es.py +++ b/nikola/data/themes/default/messages/es.py @@ -4,18 +4,18 @@ MESSAGES = { u"LANGUAGE": u"Español", u"Posts for year %s": u"Posts del año %s", u"Archive": u"Archivo", - u"Posts about %s:": u"Posts sobre %s", + u"Posts about %s": u"Posts sobre %s", u"Tags": u"Tags", - u"Also available in: ": u"También disponible en: ", + u"Also available in": u"También disponible en", u"More posts about": u"Más posts sobre", - u"Posted:": u"Publicado:", + u"Posted": u"Publicado", u"Original site": u"Sitio original", u"Read in English": u"Leer en español", - u"Older posts →": u"Posts anteriores →", - u"← Newer posts": u"← Posts posteriores", - u"← Previous post": u"← Post anterior", - u"Next post →": u"Siguiente post →", + u"Older posts": u"Posts anteriores", + u"Newer posts": u"Posts posteriores", + u"Previous post": u"Post anterior", + u"Next post": u"Siguiente post", u"old posts page %d": u"posts antiguos página %d", - u"Read more": u"Leer mas", + u"Read more": u"Leer más", u"Source": u"Código", } diff --git a/nikola/data/themes/default/messages/fr.py b/nikola/data/themes/default/messages/fr.py index d4bf0a6..5db1a1f 100644 --- a/nikola/data/themes/default/messages/fr.py +++ b/nikola/data/themes/default/messages/fr.py @@ -4,14 +4,14 @@ MESSAGES = { u"LANGUAGE": u"Français", u"Posts for year %s": u"Billets de l'année %s", u"Archive": u"Archives", - u"Posts about %s:": u"Billets sur %s", + u"Posts about %s": u"Billets sur %s", u"Tags": u"Étiquettes", - u"Also available in: ": u"Disponible aussi en : ", + u"Also available in": u"Disponible aussi en", u"More posts about": u"Plus de billets sur", - u"Posted:": u"Publié :", + u"Posted": u"Publié", u"Original site": u"Site d'origine", u"Read in English": u"Lire en français", - u"← Newer posts": u"← Billets récents", - u"Older posts →": u"Anciens billets →", + u"Newer posts": u"Billets récents", + u"Older posts": u"Anciens billets", u"Source": u"Source", } diff --git a/nikola/data/themes/default/messages/gr.py b/nikola/data/themes/default/messages/gr.py index 62139c9..fa6bb32 100644 --- a/nikola/data/themes/default/messages/gr.py +++ b/nikola/data/themes/default/messages/gr.py @@ -4,17 +4,17 @@ MESSAGES = { u"LANGUAGE": u"Ελληνικά", u"Posts for year %s": u"Αναρτήσεις για τη χρονιά %s", u"Archive": u"Αρχείο", - u"Posts about %s:": u"Αναρτήσεις για %s", + u"Posts about %s": u"Αναρτήσεις για %s", u"Tags": u"Ετικέτες", - u"Also available in: ": u"Διαθέσιμο και στο: ", + u"Also available in": u"Διαθέσιμο και στο", u"More posts about": u"Περισσότερες αναρτήσεις για", - u"Posted:": u"Αναρτήθηκε :", + u"Posted": u"Αναρτήθηκε", u"Original site": u"Ιστοσελίδα αρχικής ανάρτησης", u"Read in English": u"Διαβάστε στα Ελληνικά", - u"← Newer posts": u"← Νεότερες αναρτήσεις", - u"Older posts →": u"Παλαιότερες αναρτήσεις →", - u"← Previous post": u"← Προηγούμενη ανάρτηση", - u"Next post →": u"Επόμενη ανάρτηση →", + u"Newer posts": u"Νεότερες αναρτήσεις", + u"Older posts": u"Παλαιότερες αναρτήσεις", + u"Previous post": u"Προηγούμενη ανάρτηση", + u"Next post": u"Επόμενη ανάρτηση", u"old posts page %d": u"σελίδα παλαιότερων αναρτήσεων %d", u"Source": u"Source", } diff --git a/nikola/data/themes/default/messages/it.py b/nikola/data/themes/default/messages/it.py index a4f37f0..01a97d5 100644 --- a/nikola/data/themes/default/messages/it.py +++ b/nikola/data/themes/default/messages/it.py @@ -2,18 +2,18 @@ MESSAGES = { u"LANGUAGE": u"Italiano", u"Posts for year %s": u"Articoli per l'anno %s", u"Archive": u"Archivio", - u"Posts about %s:": u"Articoli su %s", + u"Posts about %s": u"Articoli su %s", u"Tags": u"Tags", - u"Also available in: ": u"Anche disponibile in: ", + u"Also available in": u"Anche disponibile in", u"More posts about": u"Altri articoli su", - u"Posted:": u"Pubblicato:", + u"Posted": u"Pubblicato", u"Original site": u"Sito originale", u"Read in English": u"Leggi in italiano", - u"← Newer posts": u"← Articoli recenti", - u"Older posts →": u"Articoli più vecchi", - u"Older posts →": u"Articoli vecchi", - u"← Previous post": u"← Articolo precedente", - u"Next post →": u"← Articolo successivo", + u"Newer posts": u"Articoli recenti", + u"Older posts": u"Articoli più vecchi", + u"Older posts": u"Articoli vecchi", + u"Previous post": u"Articolo precedente", + u"Next post": u"Articolo successivo", u"old posts page %d": u"pagina dei vecchi articoli %d", u"Read more": u"Espandi", u"Source": u"Source", diff --git a/nikola/data/themes/default/messages/ru.py b/nikola/data/themes/default/messages/ru.py index 2bd652b..5d5cb01 100644 --- a/nikola/data/themes/default/messages/ru.py +++ b/nikola/data/themes/default/messages/ru.py @@ -4,14 +4,18 @@ MESSAGES = { u"LANGUAGE": u"Русский", u"Posts for year %s": u"Записи за %s год", u"Archive": u"Архив", - u"Posts about %s:": u"Записи с тэгом %s:", + u"Posts about %s": u"Записи с тэгом %s:", u"Tags": u"Тэги", - u"Also available in: ": u"Также доступно в: ", + u"Also available in": u"Также доступно в", u"More posts about": u"Больше записей о", - u"Posted:": u"Опубликовано", + u"Posted": u"Опубликовано", u"Original site": u"Оригинальный сайт", u"Read in English": u"Прочесть по-русски", - u"Older posts →": u"Старые записи →", - u"← Newer posts": u"← Новые записи", + u"Older posts": u"Старые записи", + u"Newer posts": u"Новые записи", + u"Previous post": u"Предыдущая запись", + u"Next post": u"Следующая запись", + u"old posts page %d": u"страница со старыми записями %d", + u"Read more": u"Продолжить чтение", u"Source": u"Source", } diff --git a/nikola/data/themes/default/templates/base.tmpl b/nikola/data/themes/default/templates/base.tmpl index b031423..cb5e0dd 100644 --- a/nikola/data/themes/default/templates/base.tmpl +++ b/nikola/data/themes/default/templates/base.tmpl @@ -53,7 +53,7 @@ <%block name="belowtitle"> %if len(translations) > 1: - ${(messages[lang][u"Also available in: "])} + ${(messages[lang][u"Also available in"])}:  %for langname in translations.keys(): %if langname != lang: ${messages[langname]["LANGUAGE"]} diff --git a/nikola/data/themes/default/templates/gallery.tmpl b/nikola/data/themes/default/templates/gallery.tmpl index 3c48413..37d749f 100644 --- a/nikola/data/themes/default/templates/gallery.tmpl +++ b/nikola/data/themes/default/templates/gallery.tmpl @@ -3,11 +3,21 @@ <%block name="sourcelink"> <%block name="content"> + %if text:

${text}

%endif +
    + % for folder in folders: +
  •  ${folder}
  • + % endfor +
    %for image in images:
  • diff --git a/nikola/data/themes/default/templates/index.tmpl b/nikola/data/themes/default/templates/index.tmpl index 45e2172..2c7b4be 100644 --- a/nikola/data/themes/default/templates/index.tmpl +++ b/nikola/data/themes/default/templates/index.tmpl @@ -5,7 +5,7 @@

    ${post.title(lang)}    - ${messages[lang]["Posted:"]} ${post.date} + ${messages[lang]["Posted"]}: ${post.date}


    ${post.text(lang, index_teasers)} @@ -19,12 +19,12 @@ diff --git a/nikola/data/themes/default/templates/post.tmpl b/nikola/data/themes/default/templates/post.tmpl index b40ff89..6bbb460 100644 --- a/nikola/data/themes/default/templates/post.tmpl +++ b/nikola/data/themes/default/templates/post.tmpl @@ -8,7 +8,7 @@ % endif
    - ${messages[lang]["Posted:"]} ${post.date}  |   + ${messages[lang]["Posted"]}: ${post.date}  |   %if len(translations) > 1: %for langname in translations.keys(): @@ -32,12 +32,12 @@ diff --git a/nikola/data/themes/jinja-default/templates/base.tmpl b/nikola/data/themes/jinja-default/templates/base.tmpl index cdd911c..546e1a7 100644 --- a/nikola/data/themes/jinja-default/templates/base.tmpl +++ b/nikola/data/themes/jinja-default/templates/base.tmpl @@ -52,7 +52,7 @@ {% block belowtitle%} {% if translations|length > 1 %} - {{ messages[lang]["Also available in: "] }} + {{ messages[lang]["Also available in"] }}:  {% for langname in translations.keys() %} {% if langname != lang %} {{messages[langname]["LANGUAGE"]}} diff --git a/nikola/data/themes/jinja-default/templates/gallery.tmpl b/nikola/data/themes/jinja-default/templates/gallery.tmpl index a08b148..dcd8a43 100644 --- a/nikola/data/themes/jinja-default/templates/gallery.tmpl +++ b/nikola/data/themes/jinja-default/templates/gallery.tmpl @@ -2,15 +2,25 @@ {% block sourcelink %}{% endblock %} {% block content %} + {% if text %}

    {{ text }}

    {% endif %} +
      + {% for folder in folders %} +
    •  {{folder}}
    • + {% endfor %} +
      {% for image in images %}
    • - + {% endfor %}
    {% endblock %} diff --git a/nikola/data/themes/jinja-default/templates/index.tmpl b/nikola/data/themes/jinja-default/templates/index.tmpl index c1fbb94..6244e10 100644 --- a/nikola/data/themes/jinja-default/templates/index.tmpl +++ b/nikola/data/themes/jinja-default/templates/index.tmpl @@ -4,7 +4,7 @@

    {{post.title(lang)}}    - {{messages[lang]["Posted:"]}} {{post.date}} + {{messages[lang]["Posted"]}}: {{post.date}}


    {{post.text(lang, index_teasers)}} @@ -18,12 +18,12 @@ diff --git a/nikola/data/themes/jinja-default/templates/post.tmpl b/nikola/data/themes/jinja-default/templates/post.tmpl index 876c1a7..4748959 100644 --- a/nikola/data/themes/jinja-default/templates/post.tmpl +++ b/nikola/data/themes/jinja-default/templates/post.tmpl @@ -7,7 +7,7 @@ {% endif %}
    - {{messages[lang]["Posted:"]}} {{post.date}}  |   + {{messages[lang]["Posted"]}}: {{post.date}}  |   {% if translations|length > 1 %} {% for langname in translations.keys() %} @@ -31,12 +31,12 @@ diff --git a/nikola/data/themes/site/templates/post.tmpl b/nikola/data/themes/site/templates/post.tmpl index 99c0f1f..f777366 100644 --- a/nikola/data/themes/site/templates/post.tmpl +++ b/nikola/data/themes/site/templates/post.tmpl @@ -8,7 +8,7 @@ % endif
    - ${messages[lang]["Posted:"]} ${post.date} + ${messages[lang]["Posted"]}: ${post.date} %if len(translations) > 1: %for langname in translations.keys(): @@ -30,12 +30,12 @@ @@ -45,11 +45,11 @@ comments powered by Disqus %endif +
    <%block name="sourcelink">
  • ${messages[lang]["Source"]}
  • - diff --git a/nikola/filters.py b/nikola/filters.py index caea95e..f450d10 100644 --- a/nikola/filters.py +++ b/nikola/filters.py @@ -1,9 +1,11 @@ """Utility functions to help you run filters on files.""" import os +import shutil import subprocess import tempfile + def runinplace(command, infile): """Runs a command in-place on a file. @@ -22,8 +24,10 @@ def runinplace(command, infile): tmpdir = tempfile.mkdtemp() tmpfname = os.path.join(tmpdir, os.path.basename(infile)) command = command.replace('%1', infile) - command = command.replace('%2', infile) + command = command.replace('%2', tmpfname) subprocess.check_call(command, shell=True) + shutil.move(tmpfname, infile) + def yui_compressor(infile): - return runinplace('yui-compressor %1 -o %2', infile) \ No newline at end of file + return runinplace(r'yui-compressor %1 -o %2', infile) diff --git a/nikola/jinja_templates.py b/nikola/jinja_templates.py deleted file mode 100644 index f55465f..0000000 --- a/nikola/jinja_templates.py +++ /dev/null @@ -1,37 +0,0 @@ -######################################## -# Jinja template handlers -######################################## - -import os - -import jinja2 - -lookup = None -cache = {} - - -def get_template_lookup(directories): - return jinja2.Environment(loader=jinja2.FileSystemLoader( - directories, - encoding='utf-8', - )) - - -def render_template(template_name, output_name, context, global_context): - template = lookup.get_template(template_name) - local_context = {} - local_context.update(global_context) - local_context.update(context) - output = template.render(**local_context) - if output_name is not None: - try: - os.makedirs(os.path.dirname(output_name)) - except: - pass - with open(output_name, 'w+') as output: - output.write(output.encode('utf8')) - return output - - -def template_deps(template_name): - return [] diff --git a/nikola/mako_templates.py b/nikola/mako_templates.py deleted file mode 100644 index e4a79d9..0000000 --- a/nikola/mako_templates.py +++ /dev/null @@ -1,65 +0,0 @@ -######################################## -# Mako template handlers -######################################## - -import os -import shutil - -from mako import util, lexer -from mako.lookup import TemplateLookup - -lookup = None -cache = {} - - -def get_deps(filename): - text = util.read_file(filename) - lex = lexer.Lexer(text=text, filename=filename) - lex.parse() - - deps = [] - for n in lex.template.nodes: - if getattr(n, 'keyword', None) == "inherit": - deps.append(n.attributes['file']) - # TODO: include tags are not handled - return deps - - -def get_template_lookup(directories): - cache_dir = os.path.join('cache', '.mako.tmp') - if os.path.exists(cache_dir): - shutil.rmtree(cache_dir) - return TemplateLookup( - directories=directories, - module_directory=cache_dir, - output_encoding='utf-8', - ) - - -def render_template(template_name, output_name, context, global_context): - template = lookup.get_template(template_name) - local_context = {} - local_context.update(global_context) - local_context.update(context) - data = template.render_unicode(**local_context) - if output_name is not None: - try: - os.makedirs(os.path.dirname(output_name)) - except: - pass - with open(output_name, 'w+') as output: - output.write(data) - return data - - -def template_deps(template_name): - # We can cache here because depedencies should - # not change between runs - if cache.get(template_name, None) is None: - template = lookup.get_template(template_name) - dep_filenames = get_deps(template.filename) - deps = [template.filename] - for fname in dep_filenames: - deps += template_deps(fname) - cache[template_name] = tuple(deps) - return list(cache[template_name]) diff --git a/nikola/md.py b/nikola/md.py deleted file mode 100644 index 16bcec8..0000000 --- a/nikola/md.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Implementation of compile_html based on markdown.""" - -__all__ = ['compile_html'] - -import codecs -import os -import re - -from markdown import markdown - - -def compile_html(source, dest): - try: - os.makedirs(os.path.dirname(dest)) - except: - pass - with codecs.open(dest, "w+", "utf8") as out_file: - with codecs.open(source, "r", "utf8") as in_file: - data = in_file.read() - - output = markdown(data, ['fenced_code', 'codehilite']) - # remove the H1 because there is "title" h1. - output = re.sub(r'

    .*

    ', '', output) - # python-markdown's highlighter uses the class 'codehilite' to wrap - # code, # instead of the standard 'code'. None of the standard pygments - # stylesheets use this class, so swap it to be 'code' - output = re.sub(r'(]+class="[^"]*)codehilite([^>]+)', - r'\1code\2', output) - out_file.write(output) diff --git a/nikola/nikola.py b/nikola/nikola.py index aa43398..8b69d02 100644 --- a/nikola/nikola.py +++ b/nikola/nikola.py @@ -1,34 +1,35 @@ # -*- coding: utf-8 -*- -import codecs from collections import defaultdict from copy import copy -import datetime import glob -import json import os -from StringIO import StringIO import sys -import tempfile -import urllib2 import urlparse -from doit.tools import PythonInteractiveAction import lxml.html -from pygments import highlight -from pygments.lexers import get_lexer_for_filename, TextLexer -from pygments.formatters import HtmlFormatter -try: - import webassets -except ImportError: - webassets = None +from yapsy.PluginManager import PluginManager + +if os.getenv('DEBUG'): + import logging + logging.basicConfig(level=logging.DEBUG) +else: + import logging + logging.basicConfig(level=logging.ERROR) from post import Post import utils +from plugin_categories import ( + Command, + LateTask, + PageCompiler, + Task, + TemplateSystem, +) config_changed = utils.config_changed -__all__ = ['Nikola', 'nikola_main'] +__all__ = ['Nikola'] class Nikola(object): @@ -68,6 +69,7 @@ class Nikola(object): 'FILTERS': {}, 'USE_BUNDLES': True, 'TAG_PAGES_ARE_INDEXES': False, + 'THEME': 'default', 'post_compilers': { "rest": ['.txt', '.rst'], "markdown": ['.md', '.mdown', '.markdown'], @@ -75,29 +77,48 @@ class Nikola(object): }, } self.config.update(config) - if not self.config['TRANSLATIONS']: - self.config['TRANSLATIONS']={ - self.config['DEFAULT_LANG']: ''} - - if self.config['USE_BUNDLES'] and not webassets: - self.config['USE_BUNDLES'] = False + self.config['TRANSLATIONS'] = self.config.get('TRANSLATIONS', + {self.config['DEFAULT_LANG']: ''}) - self.get_compile_html = utils.CompileHtmlGetter( - self.config.pop('post_compilers')) - - self.GLOBAL_CONTEXT = self.config['GLOBAL_CONTEXT'] self.THEMES = utils.get_theme_chain(self.config['THEME']) - self.templates_module = utils.get_template_module( - utils.get_template_engine(self.THEMES), self.THEMES) - self.template_deps = self.templates_module.template_deps - - self.theme_bundles = utils.get_theme_bundles(self.THEMES) - self.MESSAGES = utils.load_messages(self.THEMES, self.config['TRANSLATIONS']) - self.GLOBAL_CONTEXT['messages'] = self.MESSAGES + self.plugin_manager = PluginManager(categories_filter={ + "Command": Command, + "Task": Task, + "LateTask": LateTask, + "TemplateSystem": TemplateSystem, + "PageCompiler": PageCompiler, + }) + self.plugin_manager.setPluginInfoExtension('plugin') + self.plugin_manager.setPluginPlaces([ + os.path.join(os.path.dirname(__file__), 'plugins'), + os.path.join(os.getcwd(), 'plugins'), + ]) + self.plugin_manager.collectPlugins() + + self.commands = {} + # Activate all command plugins + for pluginInfo in self.plugin_manager.getPluginsOfCategory("Command"): + self.plugin_manager.activatePluginByName(pluginInfo.name) + pluginInfo.plugin_object.set_site(self) + pluginInfo.plugin_object.short_help = pluginInfo.description + self.commands[pluginInfo.name] = pluginInfo.plugin_object + + # Activate all task plugins + for pluginInfo in self.plugin_manager.getPluginsOfCategory("Task"): + self.plugin_manager.activatePluginByName(pluginInfo.name) + pluginInfo.plugin_object.set_site(self) + + for pluginInfo in self.plugin_manager.getPluginsOfCategory("LateTask"): + self.plugin_manager.activatePluginByName(pluginInfo.name) + pluginInfo.plugin_object.set_site(self) + + # set global_context for template rendering + self.GLOBAL_CONTEXT = self.config.get('GLOBAL_CONTEXT', {}) + self.GLOBAL_CONTEXT['messages'] = self.MESSAGES self.GLOBAL_CONTEXT['_link'] = self.link self.GLOBAL_CONTEXT['rel_link'] = self.rel_link self.GLOBAL_CONTEXT['abs_link'] = self.abs_link @@ -108,19 +129,74 @@ class Nikola(object): 'INDEX_DISPLAY_POST_COUNT'] self.GLOBAL_CONTEXT['use_bundles'] = self.config['USE_BUNDLES'] - self.DEPS_CONTEXT = {} - for k, v in self.GLOBAL_CONTEXT.items(): - if isinstance(v, (str, unicode, int, float, dict)): - self.DEPS_CONTEXT[k] = v + # Load template plugin + template_sys_name = utils.get_template_engine(self.THEMES) + pi = self.plugin_manager.getPluginByName( + template_sys_name, "TemplateSystem") + if pi is None: + sys.stderr.write("Error loading %s template system plugin\n" + % template_sys_name) + sys.exit(1) + self.template_system = pi.plugin_object + self.template_system.set_directories( + [os.path.join(utils.get_theme_path(name), "templates") + for name in self.THEMES]) + + # Load compiler plugins + self.compilers = {} + self.inverse_compilers = {} + + for pluginInfo in self.plugin_manager.getPluginsOfCategory( + "PageCompiler"): + self.compilers[pluginInfo.name] = \ + pluginInfo.plugin_object.compile_html + + def get_compiler(self, source_name): + """Get the correct compiler for a post from `conf.post_compilers` + + To make things easier for users, the mapping in conf.py is + compiler->[extensions], although this is less convenient for us. The + majority of this function is reversing that dictionary and error + checking. + """ + ext = os.path.splitext(source_name)[1] + try: + compile_html = self.inverse_compilers[ext] + except KeyError: + # Find the correct compiler for this files extension + langs = [lang for lang, exts in + self.config['post_compilers'].items() + if ext in exts] + if len(langs) != 1: + if len(set(langs)) > 1: + exit("Your file extension->compiler definition is" + "ambiguous.\nPlease remove one of the file extensions" + "from 'post_compilers' in conf.py\n(The error is in" + "one of %s)" % ', '.join(langs)) + elif len(langs) > 1: + langs = langs[:1] + else: + exit("post_compilers in conf.py does not tell me how to " + "handle '%s' extensions." % ext) + + lang = langs[0] + compile_html = self.compilers[lang] + self.inverse_compilers[ext] = compile_html + + return compile_html def render_template(self, template_name, output_name, context): - data = self.templates_module.render_template( - template_name, None, context, self.GLOBAL_CONTEXT) + local_context = {} + local_context["template_name"] = template_name + local_context.update(self.config['GLOBAL_CONTEXT']) + local_context.update(context) + data = self.template_system.render_template( + template_name, None, local_context) assert output_name.startswith(self.config["OUTPUT_FOLDER"]) url_part = output_name[len(self.config["OUTPUT_FOLDER"]) + 1:] - #this to support windows paths + # This is to support windows paths url_part = "/".join(url_part.split(os.sep)) src = urlparse.urljoin(self.config["BLOG_URL"], url_part) @@ -289,130 +365,39 @@ class Nikola(object): return exists def gen_tasks(self): + task_dep = [] + for pluginInfo in self.plugin_manager.getPluginsOfCategory("Task"): + for task in pluginInfo.plugin_object.gen_tasks(): + yield task + if pluginInfo.plugin_object.is_default: + task_dep.append(pluginInfo.plugin_object.name) - yield self.task_serve(output_folder=self.config['OUTPUT_FOLDER']) - yield self.task_install_theme() - yield self.task_bootswatch_theme() - yield self.gen_task_new_post(self.config['post_pages']) - yield self.gen_task_new_page(self.config['post_pages']) - yield self.gen_task_copy_assets(themes=self.THEMES, - output_folder=self.config['OUTPUT_FOLDER'], - filters=self.config['FILTERS'] - ) - if webassets: - yield self.gen_task_build_bundles(theme_bundles=self.theme_bundles, - output_folder=self.config['OUTPUT_FOLDER'], - filters=self.config['FILTERS'] - ) - yield self.gen_task_deploy(commands=self.config['DEPLOY_COMMANDS']) - yield self.gen_task_sitemap(blog_url=self.config['BLOG_URL'], - output_folder=self.config['OUTPUT_FOLDER'] - ) - yield self.gen_task_render_pages( - translations=self.config['TRANSLATIONS'], - post_pages=self.config['post_pages'], - filters=self.config['FILTERS']) - yield self.gen_task_render_sources( - translations=self.config['TRANSLATIONS'], - default_lang=self.config['DEFAULT_LANG'], - output_folder=self.config['OUTPUT_FOLDER'], - post_pages=self.config['post_pages']) - yield self.gen_task_render_posts( - translations=self.config['TRANSLATIONS'], - default_lang=self.config['DEFAULT_LANG'], - timeline=self.timeline - ) - yield self.gen_task_render_indexes( - translations=self.config['TRANSLATIONS'], - messages=self.MESSAGES, - output_folder=self.config['OUTPUT_FOLDER'], - index_display_post_count=self.config['INDEX_DISPLAY_POST_COUNT'], - index_teasers=self.config['INDEX_TEASERS'], - filters=self.config['FILTERS'], - ) - yield self.gen_task_render_archive( - translations=self.config['TRANSLATIONS'], - messages=self.MESSAGES, - output_folder=self.config['OUTPUT_FOLDER'], - filters=self.config['FILTERS'], - ) - yield self.gen_task_render_tags( - translations=self.config['TRANSLATIONS'], - messages=self.MESSAGES, - blog_title=self.config['BLOG_TITLE'], - blog_url=self.config['BLOG_URL'], - blog_description=self.config['BLOG_DESCRIPTION'], - output_folder=self.config['OUTPUT_FOLDER'], - filters=self.config['FILTERS'], - tag_pages_are_indexes=self.config['TAG_PAGES_ARE_INDEXES'], - index_display_post_count=self.config['INDEX_DISPLAY_POST_COUNT'], - index_teasers=self.config['INDEX_TEASERS'], - ) - yield self.gen_task_render_rss( - translations=self.config['TRANSLATIONS'], - blog_title=self.config['BLOG_TITLE'], - blog_url=self.config['BLOG_URL'], - blog_description=self.config['BLOG_DESCRIPTION'], - output_folder=self.config['OUTPUT_FOLDER']) - yield self.gen_task_render_galleries( - max_image_size=self.config['MAX_IMAGE_SIZE'], - thumbnail_size=self.config['THUMBNAIL_SIZE'], - default_lang=self.config['DEFAULT_LANG'], - output_folder=self.config['OUTPUT_FOLDER'], - use_filename_as_title=self.config['USE_FILENAME_AS_TITLE'], - blog_description=self.config['BLOG_DESCRIPTION'] - ) - yield self.gen_task_render_listings( - listings_folder=self.config['LISTINGS_FOLDER'], - default_lang=self.config['DEFAULT_LANG'], - output_folder=self.config['OUTPUT_FOLDER']) - yield self.gen_task_redirect( - redirections=self.config['REDIRECTIONS'], - output_folder=self.config['OUTPUT_FOLDER']) - yield self.gen_task_copy_files( - output_folder=self.config['OUTPUT_FOLDER'], - files_folders=self.config['FILES_FOLDERS'], - filters=self.config['FILTERS']) - - task_dep = [ - 'render_listings', - 'render_archive', - 'render_galleries', - 'render_indexes', - 'render_pages', - 'render_posts', - 'render_rss', - 'render_sources', - 'render_tags', - 'copy_assets', - 'copy_files', - 'sitemap', - 'redirect' - ] - - if webassets: - task_dep.append( 'build_bundles' ) + for pluginInfo in self.plugin_manager.getPluginsOfCategory("LateTask"): + for task in pluginInfo.plugin_object.gen_tasks(): + yield task + if pluginInfo.plugin_object.is_default: + task_dep.append(pluginInfo.plugin_object.name) yield { 'name': 'all', 'actions': None, 'clean': True, 'task_dep': task_dep - } + } def scan_posts(self): """Scan all the posts.""" if not self._scanned: print "Scanning posts ", targets = set([]) - for wildcard, destination, _, use_in_feeds in self.config['post_pages']: + for wildcard, destination, _, use_in_feeds in \ + self.config['post_pages']: print ".", for base_path in glob.glob(wildcard): post = Post(base_path, destination, use_in_feeds, self.config['TRANSLATIONS'], self.config['DEFAULT_LANG'], self.config['BLOG_URL'], - self.get_compile_html(base_path), self.MESSAGES) for lang, langpath in self.config['TRANSLATIONS'].items(): dest = (destination, langpath, post.pagenames[lang]) @@ -448,7 +433,8 @@ class Nikola(object): post_name = os.path.splitext(post)[0] context = {} post = self.global_data[post_name] - deps = post.deps(lang) + self.template_deps(template_name) + deps = post.deps(lang) + \ + self.template_system.template_deps(template_name) context['post'] = post context['lang'] = lang context['title'] = post.title(lang) @@ -468,6 +454,7 @@ class Nikola(object): deps_dict['NEXT_LINK'] = [post.next_post.permalink(lang)] deps_dict['OUTPUT_FOLDER'] = self.config['OUTPUT_FOLDER'] deps_dict['TRANSLATIONS'] = self.config['TRANSLATIONS'] + deps_dict['global'] = self.config['GLOBAL_CONTEXT'] task = { 'name': output_name.encode('utf-8'), @@ -481,187 +468,11 @@ class Nikola(object): yield utils.apply_filters(task, filters) - def gen_task_render_pages(self, **kw): - """Build final pages from metadata and HTML fragments. - - Required keyword arguments: - - translations - post_pages - """ - self.scan_posts() - flag = False - for lang in kw["translations"]: - for wildcard, destination, template_name, _ in kw["post_pages"]: - for task in self.generic_page_renderer(lang, - wildcard, template_name, destination, kw["filters"]): - # TODO: enable or remove - #task['uptodate'] = task.get('uptodate', []) +\ - #[config_changed(kw)] - task['basename'] = 'render_pages' - flag = True - yield task - if flag == False: # No page rendered, yield a dummy task - yield { - 'basename': 'render_pages', - 'name': 'None', - 'uptodate': [True], - 'actions': [], - } - - def gen_task_render_sources(self, **kw): - """Publish the rst sources because why not? - - Required keyword arguments: - - translations - default_lang - post_pages - output_folder - """ - self.scan_posts() - flag = False - for lang in kw["translations"]: - # TODO: timeline is global - for post in self.timeline: - output_name = os.path.join(kw['output_folder'], - post.destination_path(lang, post.source_ext())) - source = post.source_path - if lang != kw["default_lang"]: - source_lang = source + '.' + lang - if os.path.exists(source_lang): - source = source_lang - yield { - 'basename': 'render_sources', - 'name': output_name.encode('utf8'), - 'file_dep': [source], - 'targets': [output_name], - 'actions': [(utils.copy_file, (source, output_name))], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - if flag == False: # No page rendered, yield a dummy task - yield { - 'basename': 'render_sources', - 'name': 'None', - 'uptodate': [True], - 'actions': [], - } - - def gen_task_render_posts(self, **kw): - """Build HTML fragments from metadata and reSt. - - Required keyword arguments: - - translations - default_lang - timeline - """ - self.scan_posts() - flag = False - for lang in kw["translations"]: - # TODO: timeline is global, get rid of it - deps_dict = copy(kw) - deps_dict.pop('timeline') - for post in kw['timeline']: - source = post.source_path - dest = post.base_path - if lang != kw["default_lang"]: - dest += '.' + lang - source_lang = source + '.' + lang - if os.path.exists(source_lang): - source = source_lang - flag = True - yield { - 'basename': 'render_posts', - 'name': dest.encode('utf-8'), - 'file_dep': post.fragment_deps(lang), - 'targets': [dest], - 'actions': [(post.compile_html, [source, dest])], - 'clean': True, - 'uptodate': [config_changed(deps_dict)], - } - if flag == False: # Return a dummy task - yield { - 'basename': 'render_posts', - 'name': 'None', - 'uptodate': [True], - 'actions': [], - } - - def gen_task_render_indexes(self, **kw): - """Render post-per-page indexes. - The default is 10. - - Required keyword arguments: - - translations - output_folder - index_display_post_count - index_teasers - """ - self.scan_posts() - template_name = "index.tmpl" - # TODO: timeline is global, get rid of it - posts = [x for x in self.timeline if x.use_in_feeds] - # Split in smaller lists - lists = [] - while posts: - lists.append(posts[:kw["index_display_post_count"]]) - posts = posts[kw["index_display_post_count"]:] - num_pages = len(lists) - if not lists: - yield { - 'basename': 'render_indexes', - 'actions': [], - } - for lang in kw["translations"]: - for i, post_list in enumerate(lists): - context = {} - if self.config.get("INDEXES_TITLE", ""): - indexes_title = self.config['INDEXES_TITLE'] - else: - indexes_title = self.config["BLOG_TITLE"] - if not i: - output_name = "index.html" - context["title"] = indexes_title - else: - output_name = "index-%s.html" % i - if self.config.get("INDEXES_PAGES", ""): - indexes_pages = self.config["INDEXES_PAGES"] % i - else: - indexes_pages = " (" + kw["messages"][lang]["old posts page %d"] % i + ")" - context["title"] = indexes_title + indexes_pages - context["prevlink"] = None - context["nextlink"] = None - context['index_teasers'] = kw['index_teasers'] - if i > 1: - context["prevlink"] = "index-%s.html" % (i - 1) - if i == 1: - context["prevlink"] = "index.html" - if i < num_pages - 1: - context["nextlink"] = "index-%s.html" % (i + 1) - context["permalink"] = self.link("index", i, lang) - output_name = os.path.join( - kw['output_folder'], self.path("index", i, lang)) - for task in self.generic_post_list_renderer( - lang, - post_list, - output_name, - template_name, - kw['filters'], - context, - ): - task['uptodate'] = task.get('updtodate', []) +\ - [config_changed(kw)] - task['basename'] = 'render_indexes' - yield task - def generic_post_list_renderer(self, lang, posts, output_name, template_name, filters, extra_context): """Renders pages with lists of posts.""" - deps = self.template_deps(template_name) + deps = self.template_system.template_deps(template_name) for post in posts: deps += post.deps(lang) context = {} @@ -675,6 +486,7 @@ class Nikola(object): deps_context = copy(context) deps_context["posts"] = [(p.titles[lang], p.permalink(lang)) for p in posts] + deps_context["global"] = self.config['GLOBAL_CONTEXT'] task = { 'name': output_name.encode('utf8'), 'targets': [output_name], @@ -686,1026 +498,3 @@ class Nikola(object): } yield utils.apply_filters(task, filters) - - def gen_task_render_archive(self, **kw): - """Render the post archives. - - Required keyword arguments: - - translations - messages - output_folder - """ - # TODO add next/prev links for years - template_name = "list.tmpl" - # TODO: posts_per_year is global, kill it - for year, posts in self.posts_per_year.items(): - for lang in kw["translations"]: - output_name = os.path.join( - kw['output_folder'], self.path("archive", year, lang)) - post_list = [self.global_data[post] for post in posts] - post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) - post_list.reverse() - context = {} - context["lang"] = lang - context["items"] = [("[%s] %s" % - (post.date, post.title(lang)), post.permalink(lang)) - for post in post_list] - context["permalink"] = self.link("archive", year, lang) - context["title"] = kw["messages"][lang]["Posts for year %s"]\ - % year - for task in self.generic_post_list_renderer( - lang, - post_list, - output_name, - template_name, - kw['filters'], - context, - ): - task['uptodate'] = task.get('updtodate', []) +\ - [config_changed(kw)] - yield task - - # And global "all your years" page - years = self.posts_per_year.keys() - years.sort(reverse=True) - template_name = "list.tmpl" - kw['years'] = years - for lang in kw["translations"]: - context = {} - output_name = os.path.join( - kw['output_folder'], self.path("archive", None, lang)) - context["title"] = kw["messages"][lang]["Archive"] - context["items"] = [(year, self.link("archive", year, lang)) - for year in years] - context["permalink"] = self.link("archive", None, lang) - for task in self.generic_post_list_renderer( - lang, - [], - output_name, - template_name, - kw['filters'], - context, - ): - task['uptodate'] = task.get('updtodate', []) +\ - [config_changed(kw)] - task['basename'] = 'render_archive' - yield task - - def gen_task_render_tags(self, **kw): - """Render the tag pages. - - Required keyword arguments: - - translations - messages - blog_title - blog_url - blog_description - output_folder - tag_pages_are_indexes - index_display_post_count - index_teasers - """ - if not self.posts_per_tag: - yield { - 'basename': 'render_tags', - 'actions': [], - } - return - def page_name(tagname, i, lang): - """Given tag, n, returns a page name.""" - name = self.path("tag", tag, lang) - if i: - name = name.replace('.html', '-%s.html' % i) - return name - - for tag, posts in self.posts_per_tag.items(): - post_list = [self.global_data[post] for post in posts] - post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) - post_list.reverse() - for lang in kw["translations"]: - #Render RSS - output_name = os.path.join(kw['output_folder'], - self.path("tag_rss", tag, lang)) - deps = [] - post_list = [self.global_data[post] for post in posts - if self.global_data[post].use_in_feeds] - post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) - post_list.reverse() - for post in post_list: - deps += post.deps(lang) - yield { - 'name': output_name.encode('utf8'), - 'file_dep': deps, - 'targets': [output_name], - 'actions': [(utils.generic_rss_renderer, - (lang, "%s (%s)" % (kw["blog_title"], tag), - kw["blog_url"], kw["blog_description"], - post_list, output_name))], - 'clean': True, - 'uptodate': [config_changed(kw)], - 'basename': 'render_tags' - } - - # Render HTML - if kw['tag_pages_are_indexes']: - # We render a sort of index page collection using only - # this tag's posts. - - # FIXME: deduplicate this with render_indexes - template_name = "index.tmpl" - # Split in smaller lists - lists = [] - while post_list: - lists.append(post_list[:kw["index_display_post_count"]]) - post_list = post_list[kw["index_display_post_count"]:] - num_pages = len(lists) - for i, post_list in enumerate(lists): - context = {} - # On a tag page, the feeds are the tag's feeds, plus the site's - rss_link = \ - """""" % \ - (tag, lang, self.link("tag_rss", tag, lang)) - context ['rss_link'] = rss_link - output_name = os.path.join(kw['output_folder'], - page_name(tag, i, lang)) - context["title"] = kw["messages"][lang][u"Posts about %s:"]\ - % tag - context["prevlink"] = None - context["nextlink"] = None - context['index_teasers'] = kw['index_teasers'] - if i > 1: - context["prevlink"] = os.path.basename(page_name(tag, i - 1, lang)) - if i == 1: - context["prevlink"] = os.path.basename(page_name(tag, 0, lang)) - if i < num_pages - 1: - context["nextlink"] = os.path.basename(page_name(tag, i + 1, lang)) - context["permalink"] = self.link("tag", tag, lang) - context["tag"] = tag - for task in self.generic_post_list_renderer( - lang, - post_list, - output_name, - template_name, - kw['filters'], - context, - ): - task['uptodate'] = task.get('updtodate', []) +\ - [config_changed(kw)] - task['basename'] = 'render_tags' - yield task - else: - # We render a single flat link list with this tag's posts - template_name = "tag.tmpl" - output_name = os.path.join(kw['output_folder'], - self.path("tag", tag, lang)) - context = {} - context["lang"] = lang - context["title"] = kw["messages"][lang][u"Posts about %s:"]\ - % tag - context["items"] = [("[%s] %s" % (post.date, post.title(lang)), - post.permalink(lang)) for post in post_list] - context["permalink"] = self.link("tag", tag, lang) - context["tag"] = tag - for task in self.generic_post_list_renderer( - lang, - post_list, - output_name, - template_name, - kw['filters'], - context, - ): - task['uptodate'] = task.get('updtodate', []) +\ - [config_changed(kw)] - task['basename'] = 'render_tags' - yield task - - # And global "all your tags" page - tags = self.posts_per_tag.keys() - tags.sort() - template_name = "tags.tmpl" - kw['tags'] = tags - for lang in kw["translations"]: - output_name = os.path.join( - kw['output_folder'], self.path('tag_index', None, lang)) - context = {} - context["title"] = kw["messages"][lang][u"Tags"] - context["items"] = [(tag, self.link("tag", tag, lang)) - for tag in tags] - context["permalink"] = self.link("tag_index", None, lang) - for task in self.generic_post_list_renderer( - lang, - [], - output_name, - template_name, - kw['filters'], - context, - ): - task['uptodate'] = task.get('updtodate', []) +\ - [config_changed(kw)] - yield task - - def gen_task_render_rss(self, **kw): - """Generate RSS feeds. - - Required keyword arguments: - - translations - blog_title - blog_url - blog_description - output_folder - """ - - self.scan_posts() - # TODO: timeline is global, kill it - for lang in kw["translations"]: - output_name = os.path.join(kw['output_folder'], - self.path("rss", None, lang)) - deps = [] - posts = [x for x in self.timeline if x.use_in_feeds][:10] - for post in posts: - deps += post.deps(lang) - yield { - 'basename': 'render_rss', - 'name': output_name, - 'file_dep': deps, - 'targets': [output_name], - 'actions': [(utils.generic_rss_renderer, - (lang, kw["blog_title"], kw["blog_url"], - kw["blog_description"], posts, output_name))], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - - def gen_task_render_listings(self, **kw): - """ - Required keyword arguments: - - listings_folder - output_folder - default_lang - """ - - # Things to ignore in listings - ignored_extensions = (".pyc",) - - def render_listing(in_name, out_name): - with open(in_name, 'r') as fd: - try: - lexer = get_lexer_for_filename(in_name) - except: - lexer = TextLexer() - code = highlight(fd.read(), lexer, - HtmlFormatter(cssclass='code', - linenos="table", - nowrap=False, - lineanchors=utils.slugify(f), - anchorlinenos=True)) - title = os.path.basename(in_name) - crumbs = out_name.split(os.sep)[1:-1] + [title] - # TODO: write this in human - paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in range(len(crumbs[:-2]))] + ['.', '#'] - context = { - 'code': code, - 'title': title, - 'crumbs': zip(paths, crumbs), - 'lang': kw['default_lang'], - 'description': title, - } - self.render_template('listing.tmpl', out_name, context) - flag = True - template_deps = self.template_deps('listing.tmpl') - for root, dirs, files in os.walk(kw['listings_folder']): - # Render all files - for f in files: - ext = os.path.splitext(f)[-1] - if ext in ignored_extensions: - continue - flag = False - in_name = os.path.join(root, f) - out_name = os.path.join( - kw['output_folder'], - root, - f) + '.html' - yield { - 'basename': 'render_listings', - 'name': out_name.encode('utf8'), - 'file_dep': template_deps + [in_name], - 'targets': [out_name], - 'actions': [(render_listing, [in_name, out_name])], - } - if flag: - yield { - 'basename': 'render_listings', - 'actions': [], - } - - def gen_task_render_galleries(self, **kw): - """Render image galleries. - - Required keyword arguments: - - image_size - thumbnail_size, - default_lang, - output_folder, - use_filename_as_title - """ - - # FIXME: lots of work is done even when images don't change, - # which should be moved into the task. - # Also, this is getting complex enough to be refactored into a file. - - template_name = "gallery.tmpl" - - gallery_list = glob.glob("galleries/*") - # Fail quick if we don't have galleries, so we don't - # require PIL - Image = None - if not gallery_list: - yield { - 'basename': 'render_galleries', - 'actions': [], - } - return - try: - import Image as _Image - import ExifTags - Image = _Image - except ImportError: - try: - from PIL import Image as _Image, ExifTags - Image = _Image - except ImportError: - pass - if Image: - def _resize_image(src, dst, max_size): - im = Image.open(src) - w, h = im.size - if w > max_size or h > max_size: - size = max_size, max_size - try: - exif = im._getexif() - except Exception: - exif = None - if exif is not None: - for tag, value in exif.items(): - decoded = ExifTags.TAGS.get(tag, tag) - - if decoded == 'Orientation': - if value == 3: - im = im.rotate(180) - elif value == 6: - im = im.rotate(270) - elif value == 8: - im = im.rotate(90) - - break - - im.thumbnail(size, Image.ANTIALIAS) - im.save(dst) - - else: - utils.copy_file(src, dst) - - def create_thumb(src, dst): - return _resize_image(src, dst, kw['thumbnail_size']) - - def create_resized_image(src, dst): - return _resize_image(src, dst, kw['max_image_size']) - - dates = {} - def image_date(src): - if src not in dates: - im = Image.open(src) - try: - exif = im._getexif() - except Exception: - exif = None - if exif is not None: - for tag, value in exif.items(): - decoded = ExifTags.TAGS.get(tag, tag) - if decoded == 'DateTimeOriginal': - try: - dates[src] = datetime.datetime.strptime(value, r'%Y:%m:%d %H:%M:%S') - break - except ValueError: #invalid EXIF date - pass - if src not in dates: - dates[src] = datetime.datetime.fromtimestamp(os.stat(src).st_mtime) - return dates[src] - - else: - create_thumb = utils.copy_file - create_resized_image = utils.copy_file - - # gallery_path is "gallery/name" - for gallery_path in gallery_list: - # gallery_name is "name" - gallery_name = os.path.basename(gallery_path) - # output_gallery is "output/GALLERY_PATH/name" - output_gallery = os.path.dirname(os.path.join(kw["output_folder"], - self.path("gallery", gallery_name, None))) - if not os.path.isdir(output_gallery): - yield { - 'basename': 'render_galleries', - 'name': output_gallery, - 'actions': [(os.makedirs, (output_gallery,))], - 'targets': [output_gallery], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - # image_list contains "gallery/name/image_name.jpg" - image_list = glob.glob(gallery_path + "/*jpg") +\ - glob.glob(gallery_path + "/*JPG") +\ - glob.glob(gallery_path + "/*PNG") +\ - glob.glob(gallery_path + "/*png") - - # Filter ignore images - try: - def add_gallery_path(index): - return "{0}/{1}".format(gallery_path, index) - - exclude_path = os.path.join(gallery_path, "exclude.meta") - try: - f = open(exclude_path, 'r') - excluded_image_name_list = f.read().split() - except IOError: - excluded_image_name_list = [] - - excluded_image_list = map(add_gallery_path, - excluded_image_name_list) - image_set = set(image_list) - set(excluded_image_list) - image_list = list(image_set) - except IOError: - pass - - image_list = [x for x in image_list if "thumbnail" not in x] - # Sort by date - image_list.sort(cmp=lambda a,b: cmp(image_date(a), image_date(b))) - image_name_list = [os.path.basename(x) for x in image_list] - - thumbs = [] - # Do thumbnails and copy originals - for img, img_name in zip(image_list, image_name_list): - # img is "galleries/name/image_name.jpg" - # img_name is "image_name.jpg" - # fname, ext are "image_name", ".jpg" - fname, ext = os.path.splitext(img_name) - # thumb_path is - # "output/GALLERY_PATH/name/image_name.thumbnail.jpg" - thumb_path = os.path.join(output_gallery, - fname + ".thumbnail" + ext) - # thumb_path is "output/GALLERY_PATH/name/image_name.jpg" - orig_dest_path = os.path.join(output_gallery, img_name) - thumbs.append(os.path.basename(thumb_path)) - yield { - 'basename': 'render_galleries', - 'name': thumb_path, - 'file_dep': [img], - 'targets': [thumb_path], - 'actions': [ - (create_thumb, (img, thumb_path)) - ], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - yield { - 'basename': 'render_galleries', - 'name': orig_dest_path, - 'file_dep': [img], - 'targets': [orig_dest_path], - 'actions': [ - (create_resized_image, (img, orig_dest_path)) - ], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - - # Remove excluded images - if excluded_image_name_list: - for img, img_name in zip(excluded_image_list, - excluded_image_name_list): - # img_name is "image_name.jpg" - # fname, ext are "image_name", ".jpg" - fname, ext = os.path.splitext(img_name) - excluded_thumb_dest_path = os.path.join(output_gallery, - fname + ".thumbnail" + ext) - excluded_dest_path = os.path.join(output_gallery, img_name) - yield { - 'basename': 'render_galleries', - 'name': excluded_thumb_dest_path, - 'file_dep': [exclude_path], - #'targets': [excluded_thumb_dest_path], - 'actions': [ - (utils.remove_file, (excluded_thumb_dest_path,)) - ], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - yield { - 'basename': 'render_galleries', - 'name': excluded_dest_path, - 'file_dep': [exclude_path], - #'targets': [excluded_dest_path], - 'actions': [ - (utils.remove_file, (excluded_dest_path,)) - ], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - - output_name = os.path.join(output_gallery, "index.html") - context = {} - context["lang"] = kw["default_lang"] - context["title"] = os.path.basename(gallery_path) - context["description"] = kw["blog_description"] - if kw['use_filename_as_title']: - img_titles = ['title="%s"' % utils.unslugify(fn[:-4]) - for fn in image_name_list] - else: - img_titles = [''] * len(image_name_list) - context["images"] = zip(image_name_list, thumbs, img_titles) - context["permalink"] = self.link("gallery", gallery_name, None) - - # Use galleries/name/index.txt to generate a blurb for - # the gallery, if it exists - index_path = os.path.join(gallery_path, "index.txt") - index_dst_path = os.path.join(gallery_path, "index.html") - if os.path.exists(index_path): - compile_html = self.get_compile_html(index_path) - yield { - 'basename': 'render_galleries', - 'name': index_dst_path.encode('utf-8'), - 'file_dep': [index_path], - 'targets': [index_dst_path], - 'actions': [(compile_html, - [index_path, index_dst_path])], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - - file_dep = self.template_deps(template_name) + image_list - - def render_gallery(output_name, context, index_dst_path): - if os.path.exists(index_dst_path): - with codecs.open(index_dst_path, "rb", "utf8") as fd: - context['text'] = fd.read() - file_dep.append(index_dst_path) - else: - context['text'] = '' - self.render_template(template_name, output_name, context) - - yield { - 'basename': 'render_galleries', - 'name': gallery_path, - 'file_dep': file_dep, - 'targets': [output_name], - 'actions': [(render_gallery, - (output_name, context, index_dst_path))], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - - @staticmethod - def gen_task_redirect(**kw): - """Generate redirections. - - Required keyword arguments: - - redirections - output_folder - """ - - def create_redirect(src, dst): - with codecs.open(src, "wb+", "utf8") as fd: - fd.write(('' + - '' + - '') % dst) - - if not kw['redirections']: - # If there are no redirections, still needs to create a - # dummy action so dependencies don't fail - yield { - 'basename': 'redirect', - 'name': 'None', - 'uptodate': [True], - 'actions': [], - } - else: - for src, dst in kw["redirections"]: - src_path = os.path.join(kw["output_folder"], src) - yield { - 'basename': 'redirect', - 'name': src_path, - 'targets': [src_path], - 'actions': [(create_redirect, (src_path, dst))], - 'clean': True, - 'uptodate': [config_changed(kw)], - } - - @staticmethod - def gen_task_copy_files(**kw): - """Copy static files into the output folder. - - required keyword arguments: - - output_folder - files_folders - """ - - flag = False - for src in kw['files_folders']: - dst = kw['output_folder'] - filters = kw['filters'] - real_dst = os.path.join(dst, kw['files_folders'][src]) - for task in utils.copy_tree(src, real_dst, link_cutoff=dst): - flag = True - task['basename'] = 'copy_files' - task['uptodate'] = task.get('uptodate', []) +\ - [config_changed(kw)] - yield utils.apply_filters(task, filters) - if not flag: - yield { - 'basename': 'copy_files', - 'actions': (), - } - - @staticmethod - def gen_task_copy_assets(**kw): - """Create tasks to copy the assets of the whole theme chain. - - If a file is present on two themes, use the version - from the "youngest" theme. - - Required keyword arguments: - - themes - output_folder - - """ - tasks = {} - for theme_name in kw['themes']: - src = os.path.join(utils.get_theme_path(theme_name), 'assets') - dst = os.path.join(kw['output_folder'], 'assets') - for task in utils.copy_tree(src, dst): - if task['name'] in tasks: - continue - tasks[task['name']] = task - task['uptodate'] = task.get('uptodate', []) + \ - [config_changed(kw)] - task['basename'] = 'copy_assets' - yield utils.apply_filters(task, kw['filters']) - - @staticmethod - def gen_task_build_bundles(**kw): - """Create tasks to build bundles from theme assets. - - theme_bundles - output_folder - filters - """ - - def build_bundle(output, inputs): - env = webassets.Environment( - os.path.join(kw['output_folder'], os.path.dirname(output)), - os.path.dirname(output)) - bundle = webassets.Bundle(*inputs, - output=os.path.basename(output)) - env.register(output, bundle) - # This generates the file - env[output].urls() - - flag = False - for name, files in kw['theme_bundles'].items(): - output_path = os.path.join(kw['output_folder'], name) - dname = os.path.dirname(name) - file_dep = [os.path.join('output', dname, fname) - for fname in files] - task = { - 'task_dep': ['copy_assets', 'copy_files'], - 'file_dep': file_dep, - 'name': name, - 'actions': [(build_bundle, (name, files))], - 'targets': [os.path.join(kw['output_folder'], name)], - 'basename': 'build_bundles', - 'uptodate': [config_changed(kw)] - } - flag = True - yield utils.apply_filters(task, kw['filters']) - if flag == False: # No page rendered, yield a dummy task - yield { - 'basename': 'build_bundles', - 'name': 'None', - 'uptodate': [True], - 'actions': [], - } - - - @staticmethod - def new_post(post_pages, is_post=True): - # Guess where we should put this - for path, _, _, use_in_rss in post_pages: - if use_in_rss == is_post: - break - else: - path = post_pages[0][0] - - print "Creating New Post" - print "-----------------\n" - title = raw_input("Enter title: ").decode(sys.stdin.encoding) - slug = utils.slugify(title) - data = u'\n'.join([ - title, - slug, - datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') - ]) - output_path = os.path.dirname(path) - meta_path = os.path.join(output_path, slug + ".meta") - pattern = os.path.basename(path) - if pattern.startswith("*."): - suffix = pattern[1:] - else: - suffix = ".txt" - txt_path = os.path.join(output_path, slug + suffix) - - if os.path.isfile(meta_path) or os.path.isfile(txt_path): - print "The title already exists!" - exit() - - with codecs.open(meta_path, "wb+", "utf8") as fd: - fd.write(data) - with codecs.open(txt_path, "wb+", "utf8") as fd: - fd.write(u"Write your post here.") - print "Your post's metadata is at: ", meta_path - print "Your post's text is at: ", txt_path - - @classmethod - def new_page(cls): - cls.new_post(False) - - @classmethod - def gen_task_new_post(cls, post_pages): - """Create a new post (interactive).""" - yield { - "basename": "new_post", - "actions": [PythonInteractiveAction(cls.new_post, (post_pages,))], - } - - @classmethod - def gen_task_new_page(cls, post_pages): - """Create a new post (interactive).""" - yield { - "basename": "new_page", - "actions": [PythonInteractiveAction(cls.new_post, - (post_pages, False,))], - } - - @staticmethod - def gen_task_deploy(**kw): - """Deploy site. - - Required keyword arguments: - - commands - - """ - yield { - "basename": "deploy", - "actions": kw['commands'], - "verbosity": 2, - } - - @staticmethod - def gen_task_sitemap(**kw): - """Generate Google sitemap. - - Required keyword arguments: - - blog_url - output_folder - """ - - output_path = os.path.abspath(kw['output_folder']) - sitemap_path = os.path.join(output_path, "sitemap.xml.gz") - - def sitemap(): - # Generate config - config_data = """ - - - - - """ % ( - kw["blog_url"], - sitemap_path, - output_path, - kw["blog_url"], - ) - config_file = tempfile.NamedTemporaryFile(delete=False) - config_file.write(config_data) - config_file.close() - - # Generate sitemap - import sitemap_gen as smap - sitemap = smap.CreateSitemapFromFile(config_file.name, True) - if not sitemap: - smap.output.Log('Configuration file errors -- exiting.', 0) - else: - sitemap.Generate() - smap.output.Log('Number of errors: %d' % - smap.output.num_errors, 1) - smap.output.Log('Number of warnings: %d' % - smap.output.num_warns, 1) - os.unlink(config_file.name) - - yield { - "basename": "sitemap", - "task_dep": [ - "render_archive", - "render_indexes", - "render_pages", - "render_posts", - "render_rss", - "render_sources", - "render_tags"], - "targets": [sitemap_path], - "actions": [(sitemap,)], - "uptodate": [config_changed(kw)], - "clean": True, - } - - @staticmethod - def task_serve(**kw): - """ - Start test server. (doit serve [--address 127.0.0.1] [--port 8000]) - By default, the server runs on port 8000 on the IP address 127.0.0.1. - - required keyword arguments: - - output_folder - """ - - def serve(address, port): - from BaseHTTPServer import HTTPServer - from SimpleHTTPServer import SimpleHTTPRequestHandler - - class OurHTTPRequestHandler(SimpleHTTPRequestHandler): - extensions_map = dict(SimpleHTTPRequestHandler.extensions_map) - extensions_map[""] = "text/plain" - - os.chdir(kw['output_folder']) - - httpd = HTTPServer((address, port), OurHTTPRequestHandler) - sa = httpd.socket.getsockname() - print "Serving HTTP on", sa[0], "port", sa[1], "..." - httpd.serve_forever() - - yield { - "basename": 'serve', - "actions": [(serve,)], - "verbosity": 2, - "params": [{'short': 'a', - 'name': 'address', - 'long': 'address', - 'type': str, - 'default': '127.0.0.1', - 'help': 'Bind address (default: 127.0.0.1)'}, - {'short': 'p', - 'name': 'port', - 'long': 'port', - 'type': int, - 'default': 8000, - 'help': 'Port number (default: 8000)'}], - } - - @staticmethod - def task_install_theme(): - """Install theme. (doit install_theme -n themename [-u URL]|[-l]).""" - - def install_theme(name, url, listing): - if name is None and not listing: - print "This command needs either the -n or the -l option." - return False - data = urllib2.urlopen(url).read() - data = json.loads(data) - if listing: - print "Themes:" - print "-------" - for theme in sorted(data.keys()): - print theme - return True - else: - if name in data: - if os.path.isfile("themes"): - raise IOError("'themes' isn't a directory!") - elif not os.path.isdir("themes"): - try: - os.makedirs("themes") - except: - raise OSError("mkdir 'theme' error!") - print 'Downloading: %s' % data[name] - zip_file = StringIO() - zip_file.write(urllib2.urlopen(data[name]).read()) - print 'Extracting: %s into themes' % name - utils.extract_all(zip_file) - else: - print "Can't find theme %s" % name - return False - - yield { - "basename": 'install_theme', - "actions": [(install_theme,)], - "verbosity": 2, - "params": [ - { - 'short': 'u', - 'name': 'url', - 'long': 'url', - 'type': str, - 'default': 'http://nikola.ralsina.com.ar/themes/index.json', - 'help': 'URL for theme collection.' - }, - { - 'short': 'l', - 'name': 'listing', - 'long': 'list', - 'type': bool, - 'default': False, - 'help': 'List available themes.' - }, - { - 'short': 'n', - 'name': 'name', - 'long': 'name', - 'type': str, - 'default': None, - 'help': 'Name of theme to install.' - }], - } - - @staticmethod - def task_bootswatch_theme(): - """Given a swatch name and a parent theme, creates a custom theme.""" - def bootswatch_theme(name, parent, swatch): - print "Creating %s theme from %s and %s" % (name, swatch, parent) - try: - os.makedirs(os.path.join('themes', name, 'assets', 'css')) - except: - pass - for fname in ('bootstrap.min.css', 'bootstrap.css'): - url = 'http://bootswatch.com/%s/%s' % (swatch, fname) - print "Downloading: ", url - data = urllib2.urlopen(url).read() - with open(os.path.join( - 'themes', name, 'assets', 'css', fname), 'wb+') as output: - output.write(data) - - with open(os.path.join('themes', name, 'parent'), 'wb+') as output: - output.write(parent) - print 'Theme created. Change the THEME setting to "%s" to use it.'\ - % name - - yield { - "basename": 'bootswatch_theme', - "actions": [(bootswatch_theme,)], - "verbosity": 2, - "params": [ - { - 'short': 'p', - 'name': 'parent', - 'long': 'parent', - 'type': str, - 'default': 'site', - 'help': 'Name of parent theme.' - }, - { - 'short': 's', - 'name': 'swatch', - 'long': 'swatch', - 'type': str, - 'default': 'slate', - 'help': 'Name of the swatch from bootswatch.com' - }, - { - 'short': 'n', - 'name': 'name', - 'long': 'name', - 'type': str, - 'default': 'custom', - 'help': 'Name of the new theme' - } - ], - } - - -def nikola_main(): - print "Starting doit..." - os.system("doit -f %s" % __file__) diff --git a/nikola/plugin_categories.py b/nikola/plugin_categories.py new file mode 100644 index 0000000..cc59b24 --- /dev/null +++ b/nikola/plugin_categories.py @@ -0,0 +1,85 @@ +__all__ = [ + 'Command', + 'LateTask', + 'PageCompiler', + 'Task', + 'TemplateSystem' +] + +from yapsy.IPlugin import IPlugin + + +class BasePlugin(IPlugin): + """Base plugin class.""" + + def set_site(self, site): + """Sets site, which is a Nikola instance.""" + self.site = site + + +class Command(BasePlugin): + """These plugins are exposed via the command line.""" + + name = "dummy_command" + + short_help = "A short explanation." + + def run(self): + """Do whatever this command does.""" + raise Exception("Implement Me First") + + +class BaseTask(BasePlugin): + """PLugins of this type are task generators.""" + + name = "dummy_task" + + # default tasks are executed by default. + # the others have to be specifie in the command line. + is_default = True + + def gen_tasks(self): + """Task generator.""" + raise Exception("Implement Me First") + + +class Task(BaseTask): + """PLugins of this type are task generators.""" + + +class LateTask(BaseTask): + """Plugins of this type are executed after all plugins of type Task.""" + + name = "dummy_latetask" + + +class TemplateSystem(object): + """Plugins of this type wrap templating systems.""" + + name = "dummy templates" + + def set_directories(self, directories): + """Sets the list of folders where templates are located.""" + raise Exception("Implement Me First") + + def template_deps(self, template_name): + """Returns filenames which are dependencies for a template.""" + raise Exception("Implement Me First") + + def render_template(name, output_name, context): + """Renders template to a file using context. + + This must save the data to output_name *and* return it + so that the caller may do additional processing. + """ + raise Exception("Implement Me First") + + +class PageCompiler(object): + """Plugins that compile text files into HTML.""" + + name = "dummy compiler" + + def compile_html(self, source, dest): + """Compile the source, save it on dest.""" + raise Exception("Implement Me First") diff --git a/nikola/plugins/command_bootswatch_theme.plugin b/nikola/plugins/command_bootswatch_theme.plugin new file mode 100644 index 0000000..f75f734 --- /dev/null +++ b/nikola/plugins/command_bootswatch_theme.plugin @@ -0,0 +1,10 @@ +[Core] +Name = bootswatch_theme +Module = command_bootswatch_theme + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Given a swatch name and a parent theme, creates a custom theme. + diff --git a/nikola/plugins/command_bootswatch_theme.py b/nikola/plugins/command_bootswatch_theme.py new file mode 100644 index 0000000..f077eb1 --- /dev/null +++ b/nikola/plugins/command_bootswatch_theme.py @@ -0,0 +1,47 @@ +from optparse import OptionParser +import os +import urllib2 + +from nikola.plugin_categories import Command + + +class CommandBootswatchTheme(Command): + """Given a swatch name and a parent theme, creates a custom theme.""" + + name = "bootswatch_theme" + + def run(self, *args): + """Given a swatch name and a parent theme, creates a custom theme.""" + + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option("-n", "--name", dest="name", + help="New theme name (default: custom)", default='custom') + parser.add_option("-s", "--swatch", dest="swatch", + help="Name of the swatch from bootswatch.com (default: slate)", + default='slate') + parser.add_option("-p", "--parent", dest="parent", + help="Parent theme name (default: site)", default='site') + (options, args) = parser.parse_args(list(args)) + + name = options.name + swatch = options.swatch + parent = options.parent + + print "Creating '%s' theme from '%s' and '%s'" % ( + name, swatch, parent) + try: + os.makedirs(os.path.join('themes', name, 'assets', 'css')) + except: + pass + for fname in ('bootstrap.min.css', 'bootstrap.css'): + url = 'http://bootswatch.com/%s/%s' % (swatch, fname) + print "Downloading: ", url + data = urllib2.urlopen(url).read() + with open(os.path.join( + 'themes', name, 'assets', 'css', fname), 'wb+') as output: + output.write(data) + + with open(os.path.join('themes', name, 'parent'), 'wb+') as output: + output.write(parent) + print 'Theme created. Change the THEME setting to "%s" to use it.'\ + % name diff --git a/nikola/plugins/command_build.plugin b/nikola/plugins/command_build.plugin new file mode 100644 index 0000000..7d029a7 --- /dev/null +++ b/nikola/plugins/command_build.plugin @@ -0,0 +1,10 @@ +[Core] +Name = build +Module = command_build + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Build the site. + diff --git a/nikola/plugins/command_build.py b/nikola/plugins/command_build.py new file mode 100644 index 0000000..cface15 --- /dev/null +++ b/nikola/plugins/command_build.py @@ -0,0 +1,32 @@ +import os +import tempfile + +from nikola.plugin_categories import Command + + +class CommandBuild(Command): + """Build the site.""" + + name = "build" + + def run(self, *args): + """Build the site using doit.""" + + # FIXME: this is crap, do it right + with tempfile.NamedTemporaryFile(suffix='.py', delete=False) as dodo: + dodo.write(''' +from doit.reporter import ExecutedOnlyReporter +DOIT_CONFIG = { + 'reporter': ExecutedOnlyReporter, + 'default_tasks': ['render_site'], +} +from nikola import Nikola +import conf +SITE = Nikola(**conf.__dict__) + + +def task_render_site(): + return SITE.gen_tasks() + ''') + dodo.flush() + os.system('doit -f %s -d . %s' % (dodo.name, ' '.join(args))) diff --git a/nikola/plugins/command_check.plugin b/nikola/plugins/command_check.plugin new file mode 100644 index 0000000..d4dcd1c --- /dev/null +++ b/nikola/plugins/command_check.plugin @@ -0,0 +1,10 @@ +[Core] +Name = check +Module = command_check + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Check the generated site + diff --git a/nikola/plugins/command_check.py b/nikola/plugins/command_check.py new file mode 100644 index 0000000..ce1e2e3 --- /dev/null +++ b/nikola/plugins/command_check.py @@ -0,0 +1,109 @@ +from optparse import OptionParser +import os +import sys +import urllib +from urlparse import urlparse + +import lxml.html + +from nikola.plugin_categories import Command + + +class CommandCheck(Command): + """Check the generated site.""" + + name = "check" + + def run(self, *args): + """Check the generated site.""" + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option('-l', '--check-links', dest='links', + action='store_true', + help='Check for dangling links.') + parser.add_option('-f', '--check-files', dest='files', + action='store_true', + help='Check for unknown files.') + + (options, args) = parser.parse_args(list(args)) + if options.links: + scan_links() + if options.files: + scan_files() + +existing_targets = set([]) + + +def analize(task): + try: + filename = task.split(":")[-1] + d = lxml.html.fromstring(open(filename).read()) + for l in d.iterlinks(): + target = l[0].attrib[l[1]] + if target == "#": + continue + parsed = urlparse(target) + if parsed.scheme: + continue + if parsed.fragment: + target = target.split('#')[0] + target_filename = os.path.abspath( + os.path.join(os.path.dirname(filename), + urllib.unquote(target))) + if target_filename not in existing_targets: + if os.path.exists(target_filename): + existing_targets.add(target_filename) + else: + print "In %s broken link: " % filename, target + if '--find-sources' in sys.argv: + print "Possible sources:" + print os.popen( + 'nikola build list --deps %s' % task, 'r').read() + print "===============================\n" + + except Exception as exc: + print "Error with:", filename, exc + + +def scan_links(): + print "Checking Links:\n===============\n" + for task in os.popen('nikola build list --all', 'r').readlines(): + task = task.strip() + if task.split(':')[0] in ( + 'render_tags', + 'render_archive', + 'render_galleries', + 'render_indexes', + 'render_pages', + 'render_site') and '.html' in task: + analize(task) + + +def scan_files(): + print "Checking Files:\n===============\n" + task_fnames = set([]) + real_fnames = set([]) + # First check that all targets are generated in the right places + for task in os.popen('nikola build list --all', 'r').readlines(): + task = task.strip() + if 'output' in task and ':' in task: + fname = task.split(':')[-1] + task_fnames.add(fname) + # And now check that there are no non-target files + for root, dirs, files in os.walk('output'): + for src_name in files: + fname = os.path.join(root, src_name) + real_fnames.add(fname) + + only_on_output = list(real_fnames - task_fnames) + if only_on_output: + only_on_output.sort() + print "\nFiles from unknown origins:\n" + for f in only_on_output: + print f + + only_on_input = list(task_fnames - real_fnames) + if only_on_input: + only_on_input.sort() + print "\nFiles not generated:\n" + for f in only_on_input: + print f diff --git a/nikola/plugins/command_deploy.plugin b/nikola/plugins/command_deploy.plugin new file mode 100644 index 0000000..c8776b5 --- /dev/null +++ b/nikola/plugins/command_deploy.plugin @@ -0,0 +1,9 @@ +[Core] +Name = deploy +Module = command_deploy + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Deploy the site diff --git a/nikola/plugins/command_deploy.py b/nikola/plugins/command_deploy.py new file mode 100644 index 0000000..cb2eb41 --- /dev/null +++ b/nikola/plugins/command_deploy.py @@ -0,0 +1,16 @@ +from optparse import OptionParser +import os + +from nikola.plugin_categories import Command + + +class Deploy(Command): + """Deploy site. """ + name = "deploy" + + def run(self, *args): + parser = OptionParser(usage="nikola %s [options]" % self.name) + (options, args) = parser.parse_args(list(args)) + for command in self.site.config['DEPLOY_COMMANDS']: + print "==>", command + os.system(command) diff --git a/nikola/plugins/command_import_wordpress.plugin b/nikola/plugins/command_import_wordpress.plugin new file mode 100644 index 0000000..a2477b9 --- /dev/null +++ b/nikola/plugins/command_import_wordpress.plugin @@ -0,0 +1,10 @@ +[Core] +Name = import_wordpress +Module = command_import_wordpress + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Import a wordpress site from a XML dump (requires markdown). + diff --git a/nikola/plugins/command_import_wordpress.py b/nikola/plugins/command_import_wordpress.py new file mode 100644 index 0000000..e75d022 --- /dev/null +++ b/nikola/plugins/command_import_wordpress.py @@ -0,0 +1,163 @@ +import codecs +import os +from urlparse import urlparse +from urllib import urlopen + +from lxml import etree, html +from mako.template import Template + +from nikola.plugin_categories import Command +from nikola import utils + +links = {} + + +class CommandImportWordpress(Command): + """Import a wordpress dump.""" + + name = "import_wordpress" + + def run(self, fname=None): + # Parse the data + if fname is None: + print "Usage: nikola import_wordpress wordpress_dump.xml" + return + context = {} + with open(fname) as fd: + xml = [] + for line in fd: + # These explode etree and are useless + if ' %s" % (url, dst_path) + with open(dst_path, 'wb+') as fd: + fd.write(urlopen(url).read()) + dst_url = '/'.join(dst_path.split(os.sep)[2:]) + links[link] = '/' + dst_url + links[url] = '/' + dst_url + return + + +def import_item(item): + """Takes an item from the feed and creates a post file.""" + title = get_text_tag(item, 'title', 'NO TITLE') + # link is something like http://foo.com/2012/09/01/hello-world/ + # So, take the path, utils.slugify it, and that's our slug + slug = utils.slugify(urlparse(get_text_tag(item, 'link', None)).path) + description = get_text_tag(item, 'description', '') + post_date = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_date', None) + post_type = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_type', 'post') + status = get_text_tag(item, + '{http://wordpress.org/export/1.2/}status', 'publish') + content = get_text_tag(item, + '{http://purl.org/rss/1.0/modules/content/}encoded', '') + + tags = [] + if status != 'publish': + tags.append('draft') + for tag in item.findall('category'): + text = tag.text + if text == 'Uncategorized': + continue + tags.append(text) + + if post_type == 'attachment': + return + elif post_type == 'post': + out_folder = 'posts' + else: + out_folder = 'stories' + # Write metadata + with codecs.open(os.path.join('new_site', out_folder, slug + '.meta'), + "w+", "utf8") as fd: + fd.write(u'%s\n' % title) + fd.write(u'%s\n' % slug) + fd.write(u'%s\n' % post_date) + fd.write(u'%s\n' % ','.join(tags)) + fd.write(u'\n') + fd.write(u'%s\n' % description) + with open(os.path.join( + 'new_site', out_folder, slug + '.wp'), "wb+") as fd: + if content.strip(): + try: + doc = html.document_fromstring(content) + doc.rewrite_links(replacer) + fd.write(html.tostring(doc, encoding='utf8')) + except: + import pdb + pdb.set_trace() diff --git a/nikola/plugins/command_init.plugin b/nikola/plugins/command_init.plugin new file mode 100644 index 0000000..3c6bd21 --- /dev/null +++ b/nikola/plugins/command_init.plugin @@ -0,0 +1,10 @@ +[Core] +Name = init +Module = command_init + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create a new site. + diff --git a/nikola/plugins/command_init.py b/nikola/plugins/command_init.py new file mode 100644 index 0000000..a032370 --- /dev/null +++ b/nikola/plugins/command_init.py @@ -0,0 +1,34 @@ +from optparse import OptionParser +import os +import shutil + +import nikola +from nikola.plugin_categories import Command + + +class CommandInit(Command): + """Create a new site.""" + + name = "init" + + usage = """Usage: nikola init folder [options]. + +That will create a sample site in the specified folder. +The destination folder must not exist. +""" + + def run(self, *args): + """Create a new site.""" + parser = OptionParser(usage=self.usage) + (options, args) = parser.parse_args(list(args)) + + target = args[0] + if target is None: + print self.usage + else: + src = os.path.join(os.path.dirname(nikola.__file__), + 'data', 'samplesite') + shutil.copytree(src, target) + print "A new site with some sample data has been created at %s."\ + % target + print "See README.txt in that folder for more information." diff --git a/nikola/plugins/command_install_theme.plugin b/nikola/plugins/command_install_theme.plugin new file mode 100644 index 0000000..f010074 --- /dev/null +++ b/nikola/plugins/command_install_theme.plugin @@ -0,0 +1,10 @@ +[Core] +Name = install_theme +Module = command_install_theme + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Install a theme into the current site. + diff --git a/nikola/plugins/command_install_theme.py b/nikola/plugins/command_install_theme.py new file mode 100644 index 0000000..293ce97 --- /dev/null +++ b/nikola/plugins/command_install_theme.py @@ -0,0 +1,62 @@ +from optparse import OptionParser +import os +import urllib2 +import json +from io import StringIO + +from nikola.plugin_categories import Command +from nikola import utils + + +class CommandInstallTheme(Command): + """Start test server.""" + + name = "install_theme" + + def run(self, *args): + """Install theme into current site.""" + + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option("-l", "--list", dest="list", + action="store_true", + help="Show list of available themes.") + parser.add_option("-n", "--name", dest="name", + help="Theme name", default=None) + parser.add_option("-u", "--url", dest="url", + help="URL for the theme repository" + "(default: http://nikola.ralsina.com.ar/themes/index.json)", + default='http://nikola.ralsina.com.ar/themes/index.json') + (options, args) = parser.parse_args(list(args)) + + listing = options.list + name = options.name + url = options.url + + if name is None and not listing: + print "This command needs either the -n or the -l option." + return False + data = urllib2.urlopen(url).read() + data = json.loads(data) + if listing: + print "Themes:" + print "-------" + for theme in sorted(data.keys()): + print theme + return True + else: + if name in data: + if os.path.isfile("themes"): + raise IOError("'themes' isn't a directory!") + elif not os.path.isdir("themes"): + try: + os.makedirs("themes") + except: + raise OSError("mkdir 'theme' error!") + print 'Downloading: %s' % data[name] + zip_file = StringIO() + zip_file.write(urllib2.urlopen(data[name]).read()) + print 'Extracting: %s into themes' % name + utils.extract_all(zip_file) + else: + print "Can't find theme %s" % name + return False diff --git a/nikola/plugins/command_new_post.plugin b/nikola/plugins/command_new_post.plugin new file mode 100644 index 0000000..6d70aff --- /dev/null +++ b/nikola/plugins/command_new_post.plugin @@ -0,0 +1,10 @@ +[Core] +Name = new_post +Module = command_new_post + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create a new post. + diff --git a/nikola/plugins/command_new_post.py b/nikola/plugins/command_new_post.py new file mode 100644 index 0000000..574df5f --- /dev/null +++ b/nikola/plugins/command_new_post.py @@ -0,0 +1,100 @@ +import codecs +import datetime +from optparse import OptionParser +import os +import sys + +from nikola.plugin_categories import Command +from nikola import utils + + +class CommandNewPost(Command): + """Create a new post.""" + + name = "new_post" + + def run(self, *args): + """Create a new post.""" + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option('-p', '--page', dest='is_post', + action='store_false', + help='Create a page instead of a blog post.') + parser.add_option('-t', '--title', dest='title', + help='Title for the page/post.', default=None) + parser.add_option('--tags', dest='tags', + help='Comma-separated tags for the page/post.', + default='') + parser.add_option('-1', dest='onefile', + action='store_true', + help='Create post with embedded metadata (single file format).', + default=self.site.config.get('ONE_FILE_POSTS', True)) + parser.add_option('-f', '--format', + dest='post_format', + default='rest', + help='Format for post (rest or markdown)') + (options, args) = parser.parse_args(list(args)) + + is_post = options.is_post + title = options.title + tags = options.tags + onefile = options.onefile + post_format = options.post_format + + # Guess where we should put this + for path, _, _, use_in_rss in self.site.config['post_pages']: + if use_in_rss == is_post: + break + else: + path = self.site.config['post_pages'][0][0] + + print "Creating New Post" + print "-----------------\n" + if title is None: + title = raw_input("Enter title: ").decode(sys.stdin.encoding) + else: + print "Title: ", title + slug = utils.slugify(title) + date = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') + data = [ + title, + slug, + date, + tags + ] + output_path = os.path.dirname(path) + meta_path = os.path.join(output_path, slug + ".meta") + pattern = os.path.basename(path) + if pattern.startswith("*."): + suffix = pattern[1:] + else: + suffix = ".txt" + txt_path = os.path.join(output_path, slug + suffix) + + if (not onefile and os.path.isfile(meta_path)) or \ + os.path.isfile(txt_path): + print "The title already exists!" + exit() + + if onefile: + if post_format not in ('rest', 'markdown'): + print "ERROR: Unknown post format %s" % post_format + return + with codecs.open(txt_path, "wb+", "utf8") as fd: + if post_format == 'markdown': + fd.write('\n') + fd.write(u"Write your post here.") + else: + with codecs.open(meta_path, "wb+", "utf8") as fd: + fd.write(data) + with codecs.open(txt_path, "wb+", "utf8") as fd: + fd.write(u"Write your post here.") + print "Your post's metadata is at: ", meta_path + print "Your post's text is at: ", txt_path diff --git a/nikola/plugins/command_serve.plugin b/nikola/plugins/command_serve.plugin new file mode 100644 index 0000000..684935d --- /dev/null +++ b/nikola/plugins/command_serve.plugin @@ -0,0 +1,10 @@ +[Core] +Name = serve +Module = command_serve + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Start test server. + diff --git a/nikola/plugins/command_serve.py b/nikola/plugins/command_serve.py new file mode 100644 index 0000000..626b117 --- /dev/null +++ b/nikola/plugins/command_serve.py @@ -0,0 +1,40 @@ +from optparse import OptionParser +import os +from BaseHTTPServer import HTTPServer +from SimpleHTTPServer import SimpleHTTPRequestHandler + +from nikola.plugin_categories import Command + + +class CommandBuild(Command): + """Start test server.""" + + name = "serve" + + def run(self, *args): + """Start test server.""" + + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option("-p", "--port", dest="port", + help="Port numer (default: 8000)", default=8000, + type="int") + parser.add_option("-a", "--address", dest="address", + help="Address to bind (default: 127.0.0.1)", + default='127.0.0.1') + (options, args) = parser.parse_args(list(args)) + + out_dir = self.site.config['OUTPUT_FOLDER'] + if not os.path.isdir(out_dir): + print "Error: Missing '%s' folder?" % out_dir + else: + os.chdir(out_dir) + httpd = HTTPServer((options.address, options.port), + OurHTTPRequestHandler) + sa = httpd.socket.getsockname() + print "Serving HTTP on", sa[0], "port", sa[1], "..." + httpd.serve_forever() + + +class OurHTTPRequestHandler(SimpleHTTPRequestHandler): + extensions_map = dict(SimpleHTTPRequestHandler.extensions_map) + extensions_map[""] = "text/plain" diff --git a/nikola/plugins/compile_html.plugin b/nikola/plugins/compile_html.plugin new file mode 100644 index 0000000..f6cdfbc --- /dev/null +++ b/nikola/plugins/compile_html.plugin @@ -0,0 +1,10 @@ +[Core] +Name = html +Module = compile_html + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile HTML into HTML (just copy) + diff --git a/nikola/plugins/compile_html.py b/nikola/plugins/compile_html.py new file mode 100644 index 0000000..8241030 --- /dev/null +++ b/nikola/plugins/compile_html.py @@ -0,0 +1,20 @@ +"""Implementation of compile_html based on markdown.""" + +import os +import shutil + + +from nikola.plugin_categories import PageCompiler + + +class CompileHtml(PageCompiler): + """Compile HTML into HTML.""" + + name = "html" + + def compile_html(self, source, dest): + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + shutil.copyfile(source, dest) diff --git a/nikola/plugins/compile_markdown.plugin b/nikola/plugins/compile_markdown.plugin new file mode 100644 index 0000000..f3e119b --- /dev/null +++ b/nikola/plugins/compile_markdown.plugin @@ -0,0 +1,10 @@ +[Core] +Name = markdown +Module = compile_markdown + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile Markdown into HTML + diff --git a/nikola/plugins/compile_markdown/__init__.py b/nikola/plugins/compile_markdown/__init__.py new file mode 100644 index 0000000..958cfa3 --- /dev/null +++ b/nikola/plugins/compile_markdown/__init__.py @@ -0,0 +1,33 @@ +"""Implementation of compile_html based on markdown.""" + +import codecs +import os +import re + +from markdown import markdown + +from nikola.plugin_categories import PageCompiler + + +class CompileMarkdown(PageCompiler): + """Compile reSt into HTML.""" + + name = "markdown" + + def compile_html(self, source, dest): + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + output = markdown(data, ['fenced_code', 'codehilite']) + # remove the H1 because there is "title" h1. + output = re.sub(r'

    .*

    ', '', output) + # python-markdown's highlighter uses the class 'codehilite' to wrap + # code, # instead of the standard 'code'. None of the standard + # pygments stylesheets use this class, so swap it to be 'code' + output = re.sub(r'(]+class="[^"]*)codehilite([^>]+)', + r'\1code\2', output) + out_file.write(output) diff --git a/nikola/plugins/compile_rest.plugin b/nikola/plugins/compile_rest.plugin new file mode 100644 index 0000000..67eb562 --- /dev/null +++ b/nikola/plugins/compile_rest.plugin @@ -0,0 +1,10 @@ +[Core] +Name = rest +Module = compile_rest + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile reSt into HTML + diff --git a/nikola/plugins/compile_rest/__init__.py b/nikola/plugins/compile_rest/__init__.py new file mode 100644 index 0000000..0a25a06 --- /dev/null +++ b/nikola/plugins/compile_rest/__init__.py @@ -0,0 +1,79 @@ +import codecs +import os + +import docutils.core +import docutils.io +from docutils.parsers.rst import directives + +from pygments_code_block_directive import ( + code_block_directive, + listings_directive) +directives.register_directive('code-block', code_block_directive) +directives.register_directive('listing', listings_directive) + +import pygments_code_block_directive +# Below is to make pyflakes happy (sigh) +pygments_code_block_directive +from youtube import youtube +directives.register_directive('youtube', youtube) + +from nikola.plugin_categories import PageCompiler + + +class CompileRest(PageCompiler): + """Compile reSt into HTML.""" + + name = "rest" + + def compile_html(self, source, dest): + """Compile reSt into HTML.""" + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + error_level = 100 + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + output, error_level = rst2html(data, + settings_overrides={'initial_header_level': 2}) + out_file.write(output) + if error_level < 3: + return True + else: + return False + + +def rst2html(source, source_path=None, source_class=docutils.io.StringInput, + destination_path=None, + reader=None, reader_name='standalone', + parser=None, parser_name='restructuredtext', + writer=None, writer_name='html', + settings=None, settings_spec=None, + settings_overrides=None, config_section=None, + enable_exit_status=None): + """ + Set up & run a `Publisher`, and return a dictionary of document parts. + Dictionary keys are the names of parts, and values are Unicode strings; + encoding is up to the client. For programmatic use with string I/O. + + For encoded string input, be sure to set the 'input_encoding' setting to + the desired encoding. Set it to 'unicode' for unencoded Unicode string + input. Here's how:: + + publish_parts(..., settings_overrides={'input_encoding': 'unicode'}) + + Parameters: see `publish_programmatically`. + """ + output, pub = docutils.core.publish_programmatically( + source=source, source_path=source_path, source_class=source_class, + destination_class=docutils.io.StringOutput, + destination=None, destination_path=destination_path, + reader=reader, reader_name=reader_name, + parser=parser, parser_name=parser_name, + writer=writer, writer_name=writer_name, + settings=settings, settings_spec=settings_spec, + settings_overrides=settings_overrides, + config_section=config_section, + enable_exit_status=enable_exit_status) + return pub.writer.parts['fragment'], pub.document.reporter.max_level diff --git a/nikola/plugins/compile_rest/pygments_code_block_directive.py b/nikola/plugins/compile_rest/pygments_code_block_directive.py new file mode 100644 index 0000000..ac91f3c --- /dev/null +++ b/nikola/plugins/compile_rest/pygments_code_block_directive.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +#$Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $ +#$Revision: 2443 $ + +# :Author: a Pygments author|contributor; Felix Wiemann; Guenter Milde +# :Date: $Date: 2012-02-28 21:07:21 -0300 (Tue, 28 Feb 2012) $ +# :Copyright: This module has been placed in the public domain. +# +# This is a merge of `Using Pygments in ReST documents`_ from the pygments_ +# documentation, and a `proof of concept`_ by Felix Wiemann. +# +# ========== =========================================================== +# 2007-06-01 Removed redundancy from class values. +# 2007-06-04 Merge of successive tokens of same type +# (code taken from pygments.formatters.others). +# 2007-06-05 Separate docutils formatter script +# Use pygments' CSS class names (like the html formatter) +# allowing the use of pygments-produced style sheets. +# 2007-06-07 Merge in the formatting of the parsed tokens +# (misnamed as docutils_formatter) as class DocutilsInterface +# 2007-06-08 Failsave implementation (fallback to a standard literal block +# if pygments not found) +# ========== =========================================================== +# +# :: + +"""Define and register a code-block directive using pygments""" + + +# Requirements +# ------------ +# :: + +import codecs +from copy import copy +import os +import urlparse + +from docutils import nodes, core +from docutils.parsers.rst import directives + +pygments = None +try: + import pygments + from pygments.lexers import get_lexer_by_name + from pygments.formatters.html import _get_ttype_class +except ImportError: + pass + + +# Customisation +# ------------- +# +# Do not insert inline nodes for the following tokens. +# (You could add e.g. Token.Punctuation like ``['', 'p']``.) :: + +unstyled_tokens = [''] + + +# DocutilsInterface +# ----------------- +# +# This interface class combines code from +# pygments.formatters.html and pygments.formatters.others. +# +# It does not require anything of docutils and could also become a part of +# pygments:: + +class DocutilsInterface(object): + """Parse `code` string and yield "classified" tokens. + + Arguments + + code -- string of source code to parse + language -- formal language the code is written in. + + Merge subsequent tokens of the same token-type. + + Yields the tokens as ``(ttype_class, value)`` tuples, + where ttype_class is taken from pygments.token.STANDARD_TYPES and + corresponds to the class argument used in pygments html output. + + """ + + def __init__(self, code, language, custom_args={}): + self.code = code + self.language = language + self.custom_args = custom_args + + def lex(self): + """Get lexer for language (use text as fallback)""" + try: + if self.language and unicode(self.language).lower() != 'none': + lexer = get_lexer_by_name(self.language.lower(), + **self.custom_args + ) + else: + lexer = get_lexer_by_name('text', **self.custom_args) + except ValueError: + # what happens if pygment isn't present ? + lexer = get_lexer_by_name('text') + return pygments.lex(self.code, lexer) + + def join(self, tokens): + """join subsequent tokens of same token-type + """ + tokens = iter(tokens) + (lasttype, lastval) = tokens.next() + for ttype, value in tokens: + if ttype is lasttype: + lastval += value + else: + yield(lasttype, lastval) + (lasttype, lastval) = (ttype, value) + yield(lasttype, lastval) + + def __iter__(self): + """parse code string and yield "clasified" tokens + """ + try: + tokens = self.lex() + except IOError: + yield ('', self.code) + return + + for ttype, value in self.join(tokens): + yield (_get_ttype_class(ttype), value) + + +# code_block_directive +# -------------------- +# :: + +def code_block_directive(name, arguments, options, content, lineno, + content_offset, block_text, state, state_machine): + """Parse and classify content of a code_block.""" + if 'include' in options: + try: + if 'encoding' in options: + encoding = options['encoding'] + else: + encoding = 'utf-8' + content = codecs.open( + options['include'], 'r', encoding).read().rstrip() + except (IOError, UnicodeError): # no file or problem reading it + content = u'' + line_offset = 0 + if content: + # here we define the start-at and end-at options + # so that limit is included in extraction + # this is different than the start-after directive of docutils + # (docutils/parsers/rst/directives/misc.py L73+) + # which excludes the beginning + # the reason is we want to be able to define a start-at like + # def mymethod(self) + # and have such a definition included + + after_text = options.get('start-at', None) + if after_text: + # skip content in include_text before + # *and NOT incl.* a matching text + after_index = content.find(after_text) + if after_index < 0: + raise state_machine.reporter.severe( + 'Problem with "start-at" option of "%s" ' + 'code-block directive:\nText not found.' % + options['start-at']) + content = content[after_index:] + line_offset = len(content[:after_index].splitlines()) + + after_text = options.get('start-after', None) + if after_text: + # skip content in include_text before + # *and incl.* a matching text + after_index = content.find(after_text) + if after_index < 0: + raise state_machine.reporter.severe( + 'Problem with "start-after" option of "%s" ' + 'code-block directive:\nText not found.' % + options['start-after']) + line_offset = len(content[:after_index + + len(after_text)].splitlines()) + content = content[after_index + len(after_text):] + + # same changes here for the same reason + before_text = options.get('end-at', None) + if before_text: + # skip content in include_text after + # *and incl.* a matching text + before_index = content.find(before_text) + if before_index < 0: + raise state_machine.reporter.severe( + 'Problem with "end-at" option of "%s" ' + 'code-block directive:\nText not found.' % + options['end-at']) + content = content[:before_index + len(before_text)] + + before_text = options.get('end-before', None) + if before_text: + # skip content in include_text after + # *and NOT incl.* a matching text + before_index = content.find(before_text) + if before_index < 0: + raise state_machine.reporter.severe( + 'Problem with "end-before" option of "%s" ' + 'code-block directive:\nText not found.' % + options['end-before']) + content = content[:before_index] + + else: + content = u'\n'.join(content) + + if 'tabsize' in options: + tabw = options['tabsize'] + else: + tabw = int(options.get('tab-width', 8)) + + content = content.replace('\t', ' ' * tabw) + + withln = "linenos" in options + if not "linenos_offset" in options: + line_offset = 0 + + language = arguments[0] + # create a literal block element and set class argument + code_block = nodes.literal_block(classes=["code", language]) + + if withln: + lineno = 1 + line_offset + total_lines = content.count('\n') + 1 + line_offset + lnwidth = len(str(total_lines)) + fstr = "\n%%%dd " % lnwidth + code_block += nodes.inline(fstr[1:] % lineno, fstr[1:] % lineno, + classes=['linenumber']) + + # parse content with pygments and add to code_block element + content = content.rstrip() + if pygments is None: + code_block += nodes.Text(content, content) + else: + # The [:-1] is because pygments adds a trailing \n which looks bad + l = list(DocutilsInterface(content, language, options)) + if l[-1] == ('', u'\n'): + l = l[:-1] + for cls, value in l: + if withln and "\n" in value: + # Split on the "\n"s + values = value.split("\n") + # The first piece, pass as-is + code_block += nodes.Text(values[0], values[0]) + # On the second and later pieces, insert \n and linenos + linenos = range(lineno, lineno + len(values)) + for chunk, ln in zip(values, linenos)[1:]: + if ln <= total_lines: + code_block += nodes.inline(fstr % ln, fstr % ln, + classes=['linenumber']) + code_block += nodes.Text(chunk, chunk) + lineno += len(values) - 1 + + elif cls in unstyled_tokens: + # insert as Text to decrease the verbosity of the output. + code_block += nodes.Text(value, value) + else: + code_block += nodes.inline(value, value, classes=[cls]) + + return [code_block] + +# Custom argument validators +# -------------------------- +# :: +# +# Move to separated module?? + + +def string_list(argument): + """ + Converts a space- or comma-separated list of values into a python list + of strings. + (Directive option conversion function) + Based in positive_int_list of docutils.parsers.rst.directives + """ + if ',' in argument: + entries = argument.split(',') + else: + entries = argument.split() + return entries + + +def string_bool(argument): + """ + Converts True, true, False, False in python boolean values + """ + if argument is None: + msg = 'argument required but none supplied; choose "True" or "False"' + raise ValueError(msg) + + elif argument.lower() == 'true': + return True + elif argument.lower() == 'false': + return False + else: + raise ValueError('"%s" unknown; choose from "True" or "False"' + % argument) + + +def csharp_unicodelevel(argument): + return directives.choice(argument, ('none', 'basic', 'full')) + + +def lhs_litstyle(argument): + return directives.choice(argument, ('bird', 'latex')) + + +def raw_compress(argument): + return directives.choice(argument, ('gz', 'bz2')) + + +def listings_directive(name, arguments, options, content, lineno, + content_offset, block_text, state, state_machine): + fname = arguments[0] + options['include'] = os.path.join('listings', fname) + target = urlparse.urlunsplit(("link", 'listing', fname, '', '')) + generated_nodes = [core.publish_doctree('`%s <%s>`_' % (fname, target))[0]] + generated_nodes += code_block_directive(name, [arguments[1]], + options, content, lineno, content_offset, block_text, + state, state_machine) + return generated_nodes + +code_block_directive.arguments = (1, 0, 1) +listings_directive.arguments = (2, 0, 1) +code_block_directive.content = 1 +listings_directive.content = 1 +code_block_directive.options = {'include': directives.unchanged_required, + 'start-at': directives.unchanged_required, + 'end-at': directives.unchanged_required, + 'start-after': directives.unchanged_required, + 'end-before': directives.unchanged_required, + 'linenos': directives.unchanged, + 'linenos_offset': directives.unchanged, + 'tab-width': directives.unchanged, + # generic + 'stripnl': string_bool, + 'stripall': string_bool, + 'ensurenl': string_bool, + 'tabsize': directives.positive_int, + 'encoding': directives.encoding, + # Lua + 'func_name_hightlighting': string_bool, + 'disabled_modules': string_list, + # Python Console + 'python3': string_bool, + # Delphi + 'turbopascal': string_bool, + 'delphi': string_bool, + 'freepascal': string_bool, + 'units': string_list, + # Modula2 + 'pim': string_bool, + 'iso': string_bool, + 'objm2': string_bool, + 'gm2ext': string_bool, + # CSharp + 'unicodelevel': csharp_unicodelevel, + # Literate haskell + 'litstyle': lhs_litstyle, + # Raw + 'compress': raw_compress, + # Rst + 'handlecodeblocks': string_bool, + # Php + 'startinline': string_bool, + 'funcnamehighlighting': string_bool, + 'disabledmodules': string_list, + } + +listings_directive.options = copy(code_block_directive.options) +listings_directive.options.pop('include') + +# .. _doctutils: http://docutils.sf.net/ +# .. _pygments: http://pygments.org/ +# .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/ +# .. _proof of concept: +# http://article.gmane.org/gmane.text.docutils.user/3689 +# +# Test output +# ----------- +# +# If called from the command line, call the docutils publisher to render the +# input:: + +if __name__ == '__main__': + from docutils.core import publish_cmdline, default_description + from docutils.parsers.rst import directives + directives.register_directive('code-block', code_block_directive) + description = "code-block directive test output" + default_description + try: + import locale + locale.setlocale(locale.LC_ALL, '') + except Exception: + pass + publish_cmdline(writer_name='html', description=description) diff --git a/nikola/plugins/compile_rest/youtube.py b/nikola/plugins/compile_rest/youtube.py new file mode 100644 index 0000000..584160b --- /dev/null +++ b/nikola/plugins/compile_rest/youtube.py @@ -0,0 +1,33 @@ +from docutils import nodes +from docutils.parsers.rst import directives + +CODE = """\ + +""" + + +def youtube(name, args, options, content, lineno, + contentOffset, blockText, state, stateMachine): + """ Restructured text extension for inserting youtube embedded videos """ + if len(content) == 0: + return + string_vars = { + 'yid': content[0], + 'width': 425, + 'height': 344, + 'extra': '' + } + extra_args = content[1:] # Because content[0] is ID + extra_args = [ea.strip().split("=") for ea in extra_args] # key=value + extra_args = [ea for ea in extra_args if len(ea) == 2] # drop bad lines + extra_args = dict(extra_args) + if 'width' in extra_args: + string_vars['width'] = extra_args.pop('width') + if 'height' in extra_args: + string_vars['height'] = extra_args.pop('height') + return [nodes.raw('', CODE % (string_vars), format='html')] +youtube.content = True +directives.register_directive('youtube', youtube) diff --git a/nikola/plugins/task_archive.plugin b/nikola/plugins/task_archive.plugin new file mode 100644 index 0000000..23f93ed --- /dev/null +++ b/nikola/plugins/task_archive.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_archive +Module = task_archive + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generates the blog's archive pages. + diff --git a/nikola/plugins/task_archive.py b/nikola/plugins/task_archive.py new file mode 100644 index 0000000..4c97101 --- /dev/null +++ b/nikola/plugins/task_archive.py @@ -0,0 +1,77 @@ +import os + +from nikola.plugin_categories import Task +from nikola.utils import config_changed + + +class Archive(Task): + """Render the post archives.""" + + name = "render_archive" + + def gen_tasks(self): + kw = { + "messages": self.site.MESSAGES, + "translations": self.site.config['TRANSLATIONS'], + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + self.site.scan_posts() + # TODO add next/prev links for years + template_name = "list.tmpl" + # TODO: posts_per_year is global, kill it + for year, posts in self.site.posts_per_year.items(): + for lang in kw["translations"]: + output_name = os.path.join( + kw['output_folder'], self.site.path("archive", year, lang)) + post_list = [self.site.global_data[post] for post in posts] + post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) + post_list.reverse() + context = {} + context["lang"] = lang + context["items"] = [("[%s] %s" % + (post.date, post.title(lang)), post.permalink(lang)) + for post in post_list] + context["permalink"] = self.site.link("archive", year, lang) + context["title"] = kw["messages"][lang]["Posts for year %s"]\ + % year + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task + + # And global "all your years" page + years = self.site.posts_per_year.keys() + years.sort(reverse=True) + template_name = "list.tmpl" + kw['years'] = years + for lang in kw["translations"]: + context = {} + output_name = os.path.join( + kw['output_folder'], self.site.path("archive", None, lang)) + context["title"] = kw["messages"][lang]["Archive"] + context["items"] = [(year, self.site.link("archive", year, lang)) + for year in years] + context["permalink"] = self.site.link("archive", None, lang) + for task in self.site.generic_post_list_renderer( + lang, + [], + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task diff --git a/nikola/plugins/task_copy_assets.plugin b/nikola/plugins/task_copy_assets.plugin new file mode 100644 index 0000000..b11133f --- /dev/null +++ b/nikola/plugins/task_copy_assets.plugin @@ -0,0 +1,10 @@ +[Core] +Name = copy_assets +Module = task_copy_assets + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Copy theme assets into output. + diff --git a/nikola/plugins/task_copy_assets.py b/nikola/plugins/task_copy_assets.py new file mode 100644 index 0000000..ac31fd7 --- /dev/null +++ b/nikola/plugins/task_copy_assets.py @@ -0,0 +1,35 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class CopyAssets(Task): + """Copy theme assets into output.""" + + name = "copy_assets" + + def gen_tasks(self): + """Create tasks to copy the assets of the whole theme chain. + + If a file is present on two themes, use the version + from the "youngest" theme. + """ + + kw = { + "themes": self.site.THEMES, + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + + tasks = {} + for theme_name in kw['themes']: + src = os.path.join(utils.get_theme_path(theme_name), 'assets') + dst = os.path.join(kw['output_folder'], 'assets') + for task in utils.copy_tree(src, dst): + if task['name'] in tasks: + continue + tasks[task['name']] = task + task['uptodate'] = [utils.config_changed(kw)] + task['basename'] = self.name + yield utils.apply_filters(task, kw['filters']) diff --git a/nikola/plugins/task_copy_files.plugin b/nikola/plugins/task_copy_files.plugin new file mode 100644 index 0000000..0bfc5be --- /dev/null +++ b/nikola/plugins/task_copy_files.plugin @@ -0,0 +1,10 @@ +[Core] +Name = copy_files +Module = task_copy_files + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Copy static files into the output. + diff --git a/nikola/plugins/task_copy_files.py b/nikola/plugins/task_copy_files.py new file mode 100644 index 0000000..a053905 --- /dev/null +++ b/nikola/plugins/task_copy_files.py @@ -0,0 +1,35 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class CopyFiles(Task): + """Copy static files into the output folder.""" + + name = "copy_files" + + def gen_tasks(self): + """Copy static files into the output folder.""" + + kw = { + 'files_folders': self.site.config['FILES_FOLDERS'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + 'filters': self.site.config['FILTERS'], + } + + flag = False + for src in kw['files_folders']: + dst = kw['output_folder'] + filters = kw['filters'] + real_dst = os.path.join(dst, kw['files_folders'][src]) + for task in utils.copy_tree(src, real_dst, link_cutoff=dst): + flag = True + task['basename'] = self.name + task['uptodate'] = [utils.config_changed(kw)] + yield utils.apply_filters(task, filters) + if not flag: + yield { + 'basename': self.name, + 'actions': (), + } diff --git a/nikola/plugins/task_create_bundles.plugin b/nikola/plugins/task_create_bundles.plugin new file mode 100644 index 0000000..5d4f6d3 --- /dev/null +++ b/nikola/plugins/task_create_bundles.plugin @@ -0,0 +1,10 @@ +[Core] +Name = create_bundles +Module = task_create_bundles + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Theme bundles using WebAssets + diff --git a/nikola/plugins/task_create_bundles.py b/nikola/plugins/task_create_bundles.py new file mode 100644 index 0000000..ebca0b7 --- /dev/null +++ b/nikola/plugins/task_create_bundles.py @@ -0,0 +1,85 @@ +import os + +try: + import webassets +except ImportError: + webassets = None # NOQA + +from nikola.plugin_categories import LateTask +from nikola import utils + + +class BuildBundles(LateTask): + """Bundle assets using WebAssets.""" + + name = "build_bundles" + + def set_site(self, site): + super(BuildBundles, self).set_site(site) + if webassets is None: + self.site.config['USE_BUNDLES'] = False + + def gen_tasks(self): + """Bundle assets using WebAssets.""" + + kw = { + 'filters': self.site.config['FILTERS'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + 'theme_bundles': get_theme_bundles(self.site.THEMES), + } + + def build_bundle(output, inputs): + out_dir = os.path.join(kw['output_folder'], os.path.dirname(output)) + inputs = [i for i in inputs if os.path.isfile( + os.path.join(out_dir, i))] + cache_dir = os.path.join('cache', 'webassets') + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + env = webassets.Environment(out_dir, os.path.dirname(output), + cache=cache_dir) + bundle = webassets.Bundle(*inputs, + output=os.path.basename(output)) + env.register(output, bundle) + # This generates the file + env[output].urls() + + flag = False + if webassets is not None and self.site.config['USE_BUNDLES'] is not False: + for name, files in kw['theme_bundles'].items(): + output_path = os.path.join(kw['output_folder'], name) + dname = os.path.dirname(name) + file_dep = [os.path.join('output', dname, fname) + for fname in files] + task = { + 'file_dep': file_dep, + 'basename': self.name, + 'name': output_path, + 'actions': [(build_bundle, (name, files))], + 'targets': [output_path], + 'uptodate': [utils.config_changed(kw)] + } + flag = True + yield utils.apply_filters(task, kw['filters']) + if flag is False: # No page rendered, yield a dummy task + yield { + 'basename': self.name, + 'uptodate': [True], + 'name': 'None', + 'actions': [], + } + + +def get_theme_bundles(themes): + """Given a theme chain, return the bundle definitions.""" + bundles = {} + for theme_name in themes: + bundles_path = os.path.join( + utils.get_theme_path(theme_name), 'bundles') + if os.path.isfile(bundles_path): + with open(bundles_path) as fd: + for line in fd: + name, files = line.split('=') + files = [f.strip() for f in files.split(',')] + bundles[name.strip()] = files + break + return bundles diff --git a/nikola/plugins/task_indexes.plugin b/nikola/plugins/task_indexes.plugin new file mode 100644 index 0000000..1536006 --- /dev/null +++ b/nikola/plugins/task_indexes.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_index +Module = task_indexes + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generates the blog's index pages. + diff --git a/nikola/plugins/task_indexes.py b/nikola/plugins/task_indexes.py new file mode 100644 index 0000000..2311ef3 --- /dev/null +++ b/nikola/plugins/task_indexes.py @@ -0,0 +1,81 @@ +import os + +from nikola.plugin_categories import Task +from nikola.utils import config_changed + + +class Indexes(Task): + """Render the blog indexes.""" + + name = "render_indexes" + + def gen_tasks(self): + self.site.scan_posts() + + kw = { + "translations": self.site.config['TRANSLATIONS'], + "index_display_post_count": + self.site.config['INDEX_DISPLAY_POST_COUNT'], + "messages": self.site.MESSAGES, + "index_teasers": self.site.config['INDEX_TEASERS'], + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + + template_name = "index.tmpl" + # TODO: timeline is global, get rid of it + posts = [x for x in self.site.timeline if x.use_in_feeds] + # Split in smaller lists + lists = [] + while posts: + lists.append(posts[:kw["index_display_post_count"]]) + posts = posts[kw["index_display_post_count"]:] + num_pages = len(lists) + if not lists: + yield { + 'basename': 'render_indexes', + 'actions': [], + } + for lang in kw["translations"]: + for i, post_list in enumerate(lists): + context = {} + if self.site.config.get("INDEXES_TITLE", ""): + indexes_title = self.site.config['INDEXES_TITLE'] + else: + indexes_title = self.site.config["BLOG_TITLE"] + if not i: + output_name = "index.html" + context["title"] = indexes_title + else: + output_name = "index-%s.html" % i + if self.site.config.get("INDEXES_PAGES", ""): + indexes_pages = self.site.config["INDEXES_PAGES"] % i + else: + indexes_pages = " (" + \ + kw["messages"][lang]["old posts page %d"] % i + ")" + context["title"] = indexes_title + indexes_pages + context["prevlink"] = None + context["nextlink"] = None + context['index_teasers'] = kw['index_teasers'] + if i > 1: + context["prevlink"] = "index-%s.html" % (i - 1) + if i == 1: + context["prevlink"] = "index.html" + if i < num_pages - 1: + context["nextlink"] = "index-%s.html" % (i + 1) + context["permalink"] = self.site.link("index", i, lang) + output_name = os.path.join( + kw['output_folder'], self.site.path("index", i, lang)) + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = 'render_indexes' + yield task diff --git a/nikola/plugins/task_redirect.plugin b/nikola/plugins/task_redirect.plugin new file mode 100644 index 0000000..285720b --- /dev/null +++ b/nikola/plugins/task_redirect.plugin @@ -0,0 +1,10 @@ +[Core] +Name = redirect +Module = task_redirect + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create redirect pages. + diff --git a/nikola/plugins/task_redirect.py b/nikola/plugins/task_redirect.py new file mode 100644 index 0000000..7c2ccb1 --- /dev/null +++ b/nikola/plugins/task_redirect.py @@ -0,0 +1,48 @@ +import codecs +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class Redirect(Task): + """Copy theme assets into output.""" + + name = "redirect" + + def gen_tasks(self): + """Generate redirections tasks.""" + + kw = { + 'redirections': self.site.config['REDIRECTIONS'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + } + + if not kw['redirections']: + # If there are no redirections, still needs to create a + # dummy action so dependencies don't fail + yield { + 'basename': self.name, + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } + + else: + for src, dst in kw["redirections"]: + src_path = os.path.join(kw["output_folder"], src) + yield { + 'basename': self.name, + 'name': src_path, + 'targets': [src_path], + 'actions': [(create_redirect, (src_path, dst))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + +def create_redirect(src, dst): + with codecs.open(src, "wb+", "utf8") as fd: + fd.write(('' + + '' + + '') % dst) diff --git a/nikola/plugins/task_render_galleries.plugin b/nikola/plugins/task_render_galleries.plugin new file mode 100644 index 0000000..e0a86c0 --- /dev/null +++ b/nikola/plugins/task_render_galleries.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_galleries +Module = task_render_galleries + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create image galleries automatically. + diff --git a/nikola/plugins/task_render_galleries.py b/nikola/plugins/task_render_galleries.py new file mode 100644 index 0000000..27e13ea --- /dev/null +++ b/nikola/plugins/task_render_galleries.py @@ -0,0 +1,305 @@ +import codecs +import datetime +import glob +import os +import uuid + +Image = None +try: + import Image as _Image + import ExifTags + Image = _Image +except ImportError: + try: + from PIL import Image, ExifTags # NOQA + except ImportError: + pass + + +from nikola.plugin_categories import Task +from nikola import utils + + +class Galleries(Task): + """Copy theme assets into output.""" + + name = "render_galleries" + dates = {} + + def gen_tasks(self): + """Render image galleries.""" + + kw = { + 'thumbnail_size': self.site.config['THUMBNAIL_SIZE'], + 'max_image_size': self.site.config['MAX_IMAGE_SIZE'], + 'output_folder': self.site.config['OUTPUT_FOLDER'], + 'default_lang': self.site.config['DEFAULT_LANG'], + 'blog_description': self.site.config['BLOG_DESCRIPTION'], + 'use_filename_as_title': self.site.config['USE_FILENAME_AS_TITLE'], + } + + # FIXME: lots of work is done even when images don't change, + # which should be moved into the task. + + template_name = "gallery.tmpl" + + gallery_list = [] + for root, dirs, files in os.walk('galleries'): + gallery_list.append(root) + if not gallery_list: + yield { + 'basename': 'render_galleries', + 'actions': [], + } + return + + # gallery_path is "gallery/name" + for gallery_path in gallery_list: + # gallery_name is "name" + splitted = gallery_path.split(os.sep)[1:] + if not splitted: + gallery_name = '' + else: + gallery_name = os.path.join(*splitted) + # output_gallery is "output/GALLERY_PATH/name" + output_gallery = os.path.dirname(os.path.join(kw["output_folder"], + self.site.path("gallery", gallery_name, None))) + if not os.path.isdir(output_gallery): + yield { + 'basename': 'render_galleries', + 'name': output_gallery, + 'actions': [(os.makedirs, (output_gallery,))], + 'targets': [output_gallery], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + # image_list contains "gallery/name/image_name.jpg" + image_list = glob.glob(gallery_path + "/*jpg") +\ + glob.glob(gallery_path + "/*JPG") +\ + glob.glob(gallery_path + "/*PNG") +\ + glob.glob(gallery_path + "/*png") + + # Filter ignore images + try: + def add_gallery_path(index): + return "{0}/{1}".format(gallery_path, index) + + exclude_path = os.path.join(gallery_path, "exclude.meta") + try: + f = open(exclude_path, 'r') + excluded_image_name_list = f.read().split() + except IOError: + excluded_image_name_list = [] + + excluded_image_list = map(add_gallery_path, + excluded_image_name_list) + image_set = set(image_list) - set(excluded_image_list) + image_list = list(image_set) + except IOError: + pass + + # List of sub-galleries + folder_list = [x.split(os.sep)[-2] for x in + glob.glob(os.path.join(gallery_path, '*') + os.sep)] + + crumbs = gallery_path.split(os.sep)[:-1] + crumbs.append(os.path.basename(gallery_name)) + # TODO: write this in human + paths = ['/'.join(['..'] * (len(crumbs) - 1 - i)) for i in + range(len(crumbs[:-1]))] + ['#'] + crumbs = zip(paths, crumbs) + + image_list = [x for x in image_list if "thumbnail" not in x] + # Sort by date + image_list.sort(cmp=lambda a, b: cmp( + self.image_date(a), self.image_date(b))) + image_name_list = [os.path.basename(x) for x in image_list] + + thumbs = [] + # Do thumbnails and copy originals + for img, img_name in zip(image_list, image_name_list): + # img is "galleries/name/image_name.jpg" + # img_name is "image_name.jpg" + # fname, ext are "image_name", ".jpg" + fname, ext = os.path.splitext(img_name) + # thumb_path is + # "output/GALLERY_PATH/name/image_name.thumbnail.jpg" + thumb_path = os.path.join(output_gallery, + fname + ".thumbnail" + ext) + # thumb_path is "output/GALLERY_PATH/name/image_name.jpg" + orig_dest_path = os.path.join(output_gallery, img_name) + thumbs.append(os.path.basename(thumb_path)) + yield { + 'basename': 'render_galleries', + 'name': thumb_path, + 'file_dep': [img], + 'targets': [thumb_path], + 'actions': [ + (self.resize_image, + (img, thumb_path, kw['thumbnail_size'])) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + yield { + 'basename': 'render_galleries', + 'name': orig_dest_path, + 'file_dep': [img], + 'targets': [orig_dest_path], + 'actions': [ + (self.resize_image, + (img, orig_dest_path, kw['max_image_size'])) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + # Remove excluded images + if excluded_image_name_list: + for img, img_name in zip(excluded_image_list, + excluded_image_name_list): + # img_name is "image_name.jpg" + # fname, ext are "image_name", ".jpg" + fname, ext = os.path.splitext(img_name) + excluded_thumb_dest_path = os.path.join(output_gallery, + fname + ".thumbnail" + ext) + excluded_dest_path = os.path.join(output_gallery, img_name) + yield { + 'basename': 'render_galleries', + 'name': excluded_thumb_dest_path, + 'file_dep': [exclude_path], + #'targets': [excluded_thumb_dest_path], + 'actions': [ + (utils.remove_file, (excluded_thumb_dest_path,)) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + yield { + 'basename': 'render_galleries', + 'name': excluded_dest_path, + 'file_dep': [exclude_path], + #'targets': [excluded_dest_path], + 'actions': [ + (utils.remove_file, (excluded_dest_path,)) + ], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + output_name = os.path.join(output_gallery, "index.html") + context = {} + context["lang"] = kw["default_lang"] + context["title"] = os.path.basename(gallery_path) + context["description"] = kw["blog_description"] + if kw['use_filename_as_title']: + img_titles = ['title="%s"' % utils.unslugify(fn[:-4]) + for fn in image_name_list] + else: + img_titles = [''] * len(image_name_list) + context["images"] = zip(image_name_list, thumbs, img_titles) + context["folders"] = folder_list + context["crumbs"] = crumbs + context["permalink"] = self.site.link( + "gallery", gallery_name, None) + + # Use galleries/name/index.txt to generate a blurb for + # the gallery, if it exists + index_path = os.path.join(gallery_path, "index.txt") + cache_dir = os.path.join('cache', 'galleries') + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + index_dst_path = os.path.join(cache_dir, unicode(uuid.uuid1())+'.html') + if os.path.exists(index_path): + compile_html = self.site.get_compiler(index_path) + yield { + 'basename': 'render_galleries', + 'name': index_dst_path.encode('utf-8'), + 'file_dep': [index_path], + 'targets': [index_dst_path], + 'actions': [(compile_html, + [index_path, index_dst_path])], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + + file_dep = self.site.template_system.template_deps( + template_name) + image_list + + def render_gallery(output_name, context, index_dst_path): + if os.path.exists(index_dst_path): + with codecs.open(index_dst_path, "rb", "utf8") as fd: + context['text'] = fd.read() + file_dep.append(index_dst_path) + else: + context['text'] = '' + self.site.render_template(template_name, output_name, context) + + yield { + 'basename': 'render_galleries', + 'name': output_name, + 'file_dep': file_dep, + 'targets': [output_name], + 'actions': [(render_gallery, + (output_name, context, index_dst_path))], + 'clean': True, + 'uptodate': [utils.config_changed({ + 1: kw, + 2: self.site.config['GLOBAL_CONTEXT']})], + } + + def resize_image(self, src, dst, max_size): + """Make a copy of the image in the requested size.""" + if not Image: + utils.copy_file(src, dst) + return + im = Image.open(src) + w, h = im.size + if w > max_size or h > max_size: + size = max_size, max_size + try: + exif = im._getexif() + except Exception: + exif = None + if exif is not None: + for tag, value in exif.items(): + decoded = ExifTags.TAGS.get(tag, tag) + + if decoded == 'Orientation': + if value == 3: + im = im.rotate(180) + elif value == 6: + im = im.rotate(270) + elif value == 8: + im = im.rotate(90) + + break + + im.thumbnail(size, Image.ANTIALIAS) + im.save(dst) + + else: + utils.copy_file(src, dst) + + def image_date(self, src): + """Try to figure out the date of the image.""" + if src not in self.dates: + im = Image.open(src) + try: + exif = im._getexif() + except Exception: + exif = None + if exif is not None: + for tag, value in exif.items(): + decoded = ExifTags.TAGS.get(tag, tag) + if decoded == 'DateTimeOriginal': + try: + self.dates[src] = datetime.datetime.strptime( + value, r'%Y:%m:%d %H:%M:%S') + break + except ValueError: # Invalid EXIF date. + pass + if src not in self.dates: + self.dates[src] = datetime.datetime.fromtimestamp( + os.stat(src).st_mtime) + return self.dates[src] diff --git a/nikola/plugins/task_render_listings.plugin b/nikola/plugins/task_render_listings.plugin new file mode 100644 index 0000000..1f897b9 --- /dev/null +++ b/nikola/plugins/task_render_listings.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_listings +Module = task_render_listings + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Render code listings into output + diff --git a/nikola/plugins/task_render_listings.py b/nikola/plugins/task_render_listings.py new file mode 100644 index 0000000..7ec6e42 --- /dev/null +++ b/nikola/plugins/task_render_listings.py @@ -0,0 +1,81 @@ +import os + +from pygments import highlight +from pygments.lexers import get_lexer_for_filename, TextLexer +from pygments.formatters import HtmlFormatter + +from nikola.plugin_categories import Task +from nikola import utils + + +class Listings(Task): + """Render pretty listings.""" + + name = "render_listings" + + def gen_tasks(self): + """Render pretty code listings.""" + kw = { + "default_lang": self.site.config["DEFAULT_LANG"], + "listings_folder": self.site.config["LISTINGS_FOLDER"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + } + + # Things to ignore in listings + ignored_extensions = (".pyc",) + + def render_listing(in_name, out_name): + with open(in_name, 'r') as fd: + try: + lexer = get_lexer_for_filename(in_name) + except: + lexer = TextLexer() + code = highlight(fd.read(), lexer, + HtmlFormatter(cssclass='code', + linenos="table", + nowrap=False, + lineanchors=utils.slugify(f), + anchorlinenos=True)) + title = os.path.basename(in_name) + crumbs = out_name.split(os.sep)[1:-1] + [title] + # TODO: write this in human + paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in + range(len(crumbs[:-2]))] + ['.', '#'] + context = { + 'code': code, + 'title': title, + 'crumbs': zip(paths, crumbs), + 'lang': kw['default_lang'], + 'description': title, + } + self.site.render_template('listing.tmpl', out_name, context) + flag = True + template_deps = self.site.template_system.template_deps('listing.tmpl') + for root, dirs, files in os.walk(kw['listings_folder']): + # Render all files + for f in files: + ext = os.path.splitext(f)[-1] + if ext in ignored_extensions: + continue + flag = False + in_name = os.path.join(root, f) + out_name = os.path.join( + kw['output_folder'], + root, + f) + '.html' + yield { + 'basename': self.name, + 'name': out_name.encode('utf8'), + 'file_dep': template_deps + [in_name], + 'targets': [out_name], + 'actions': [(render_listing, [in_name, out_name])], + # This is necessary to reflect changes in blog title, + # sidebar links, etc. + 'uptodate': [utils.config_changed( + self.site.config['GLOBAL_CONTEXT'])] + } + if flag: + yield { + 'basename': self.name, + 'actions': [], + } diff --git a/nikola/plugins/task_render_pages.plugin b/nikola/plugins/task_render_pages.plugin new file mode 100644 index 0000000..e2a358c --- /dev/null +++ b/nikola/plugins/task_render_pages.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_pages +Module = task_render_pages + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create pages in the output. + diff --git a/nikola/plugins/task_render_pages.py b/nikola/plugins/task_render_pages.py new file mode 100644 index 0000000..954dc47 --- /dev/null +++ b/nikola/plugins/task_render_pages.py @@ -0,0 +1,35 @@ +from nikola.plugin_categories import Task +from nikola.utils import config_changed + + +class RenderPages(Task): + """Render pages into output.""" + + name = "render_pages" + + def gen_tasks(self): + """Build final pages from metadata and HTML fragments.""" + kw = { + "post_pages": self.site.config["post_pages"], + "translations": self.site.config["TRANSLATIONS"], + "filters": self.site.config["FILTERS"], + } + self.site.scan_posts() + flag = False + for lang in kw["translations"]: + for wildcard, destination, template_name, _ in kw["post_pages"]: + for task in self.site.generic_page_renderer(lang, + wildcard, template_name, destination, kw["filters"]): + task['uptodate'] = [config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + flag = True + yield task + if flag is False: # No page rendered, yield a dummy task + yield { + 'basename': self.name, + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } diff --git a/nikola/plugins/task_render_posts.plugin b/nikola/plugins/task_render_posts.plugin new file mode 100644 index 0000000..0d19ea9 --- /dev/null +++ b/nikola/plugins/task_render_posts.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_posts +Module = task_render_posts + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Create HTML fragments out of posts. + diff --git a/nikola/plugins/task_render_posts.py b/nikola/plugins/task_render_posts.py new file mode 100644 index 0000000..44888f2 --- /dev/null +++ b/nikola/plugins/task_render_posts.py @@ -0,0 +1,52 @@ +from copy import copy +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class RenderPosts(Task): + """Build HTML fragments from metadata and text.""" + + name = "render_posts" + + def gen_tasks(self): + """Build HTML fragments from metadata and text.""" + self.site.scan_posts() + kw = { + "translations": self.site.config["TRANSLATIONS"], + "timeline": self.site.timeline, + "default_lang": self.site.config["DEFAULT_LANG"], + } + + flag = False + for lang in kw["translations"]: + # TODO: timeline is global, get rid of it + deps_dict = copy(kw) + deps_dict.pop('timeline') + for post in kw['timeline']: + source = post.source_path + dest = post.base_path + if lang != kw["default_lang"]: + dest += '.' + lang + source_lang = source + '.' + lang + if os.path.exists(source_lang): + source = source_lang + flag = True + yield { + 'basename': self.name, + 'name': dest.encode('utf-8'), + 'file_dep': post.fragment_deps(lang), + 'targets': [dest], + 'actions': [(self.site.get_compiler(post.source_path), + [source, dest])], + 'clean': True, + 'uptodate': [utils.config_changed(deps_dict)], + } + if flag is False: # Return a dummy task + yield { + 'basename': self.name, + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } diff --git a/nikola/plugins/task_render_rss.plugin b/nikola/plugins/task_render_rss.plugin new file mode 100644 index 0000000..20caf15 --- /dev/null +++ b/nikola/plugins/task_render_rss.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_rss +Module = task_render_rss + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generate RSS feeds. + diff --git a/nikola/plugins/task_render_rss.py b/nikola/plugins/task_render_rss.py new file mode 100644 index 0000000..bee1192 --- /dev/null +++ b/nikola/plugins/task_render_rss.py @@ -0,0 +1,41 @@ +import os + +from nikola import utils +from nikola.plugin_categories import Task + + +class RenderRSS(Task): + """Generate RSS feeds.""" + + name = "render_rss" + + def gen_tasks(self): + """Generate RSS feeds.""" + kw = { + "translations": self.site.config["TRANSLATIONS"], + "filters": self.site.config["FILTERS"], + "blog_title": self.site.config["BLOG_TITLE"], + "blog_url": self.site.config["BLOG_URL"], + "blog_description": self.site.config["BLOG_DESCRIPTION"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + } + self.site.scan_posts() + # TODO: timeline is global, kill it + for lang in kw["translations"]: + output_name = os.path.join(kw['output_folder'], + self.site.path("rss", None, lang)) + deps = [] + posts = [x for x in self.site.timeline if x.use_in_feeds][:10] + for post in posts: + deps += post.deps(lang) + yield { + 'basename': 'render_rss', + 'name': output_name, + 'file_dep': deps, + 'targets': [output_name], + 'actions': [(utils.generic_rss_renderer, + (lang, kw["blog_title"], kw["blog_url"], + kw["blog_description"], posts, output_name))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } diff --git a/nikola/plugins/task_render_sources.plugin b/nikola/plugins/task_render_sources.plugin new file mode 100644 index 0000000..5b59598 --- /dev/null +++ b/nikola/plugins/task_render_sources.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_sources +Module = task_render_sources + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Copy page sources into the output. + diff --git a/nikola/plugins/task_render_sources.py b/nikola/plugins/task_render_sources.py new file mode 100644 index 0000000..ae5ce23 --- /dev/null +++ b/nikola/plugins/task_render_sources.py @@ -0,0 +1,54 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class Sources(Task): + """Copy page sources into the output.""" + + name = "render_sources" + + def gen_tasks(self): + """Publish the page sources into the output. + + Required keyword arguments: + + translations + default_lang + post_pages + output_folder + """ + kw = { + "translations": self.site.config["TRANSLATIONS"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + "default_lang": self.site.config["DEFAULT_LANG"], + } + + self.site.scan_posts() + flag = False + for lang in kw["translations"]: + for post in self.site.timeline: + output_name = os.path.join(kw['output_folder'], + post.destination_path(lang, post.source_ext())) + source = post.source_path + if lang != kw["default_lang"]: + source_lang = source + '.' + lang + if os.path.exists(source_lang): + source = source_lang + yield { + 'basename': 'render_sources', + 'name': output_name.encode('utf8'), + 'file_dep': [source], + 'targets': [output_name], + 'actions': [(utils.copy_file, (source, output_name))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + } + if flag is False: # No page rendered, yield a dummy task + yield { + 'basename': 'render_sources', + 'name': 'None', + 'uptodate': [True], + 'actions': [], + } diff --git a/nikola/plugins/task_render_tags.plugin b/nikola/plugins/task_render_tags.plugin new file mode 100644 index 0000000..b826e87 --- /dev/null +++ b/nikola/plugins/task_render_tags.plugin @@ -0,0 +1,10 @@ +[Core] +Name = render_tags +Module = task_render_tags + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Render the tag pages and feeds. + diff --git a/nikola/plugins/task_render_tags.py b/nikola/plugins/task_render_tags.py new file mode 100644 index 0000000..61629ec --- /dev/null +++ b/nikola/plugins/task_render_tags.py @@ -0,0 +1,180 @@ +import os + +from nikola.plugin_categories import Task +from nikola import utils + + +class RenderTags(Task): + """Render the tag pages and feeds.""" + + name = "render_tags" + + def gen_tasks(self): + """Render the tag pages and feeds.""" + + kw = { + "translations": self.site.config["TRANSLATIONS"], + "blog_title": self.site.config["BLOG_TITLE"], + "blog_url": self.site.config["BLOG_URL"], + "blog_description": self.site.config["BLOG_DESCRIPTION"], + "messages": self.site.MESSAGES, + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + "tag_pages_are_indexes": self.site.config['TAG_PAGES_ARE_INDEXES'], + "index_display_post_count": + self.site.config['INDEX_DISPLAY_POST_COUNT'], + "index_teasers": self.site.config['INDEX_TEASERS'], + } + + self.site.scan_posts() + + if not self.site.posts_per_tag: + yield { + 'basename': self.name, + 'actions': [], + } + return + + def page_name(tagname, i, lang): + """Given tag, n, returns a page name.""" + name = self.site.path("tag", tag, lang) + if i: + name = name.replace('.html', '-%s.html' % i) + return name + + for tag, posts in self.site.posts_per_tag.items(): + post_list = [self.site.global_data[post] for post in posts] + post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) + post_list.reverse() + for lang in kw["translations"]: + #Render RSS + output_name = os.path.join(kw['output_folder'], + self.site.path("tag_rss", tag, lang)) + deps = [] + post_list = [self.site.global_data[post] for post in posts + if self.site.global_data[post].use_in_feeds] + post_list.sort(cmp=lambda a, b: cmp(a.date, b.date)) + post_list.reverse() + for post in post_list: + deps += post.deps(lang) + yield { + 'name': output_name.encode('utf8'), + 'file_dep': deps, + 'targets': [output_name], + 'actions': [(utils.generic_rss_renderer, + (lang, "%s (%s)" % (kw["blog_title"], tag), + kw["blog_url"], kw["blog_description"], + post_list, output_name))], + 'clean': True, + 'uptodate': [utils.config_changed(kw)], + 'basename': self.name + } + + # Render HTML + if kw['tag_pages_are_indexes']: + # We render a sort of index page collection using only + # this tag's posts. + + # FIXME: deduplicate this with render_indexes + template_name = "index.tmpl" + # Split in smaller lists + lists = [] + while post_list: + lists.append(post_list[ + :kw["index_display_post_count"]]) + post_list = post_list[ + kw["index_display_post_count"]:] + num_pages = len(lists) + for i, post_list in enumerate(lists): + context = {} + # On a tag page, the feeds include the tag's feeds + rss_link = \ + """""" % \ + (tag, lang, self.site.link("tag_rss", tag, lang)) + context['rss_link'] = rss_link + output_name = os.path.join(kw['output_folder'], + page_name(tag, i, lang)) + context["title"] = kw["messages"][lang][ + u"Posts about %s"] % tag + context["prevlink"] = None + context["nextlink"] = None + context['index_teasers'] = kw['index_teasers'] + if i > 1: + context["prevlink"] = os.path.basename( + page_name(tag, i - 1, lang)) + if i == 1: + context["prevlink"] = os.path.basename( + page_name(tag, 0, lang)) + if i < num_pages - 1: + context["nextlink"] = os.path.basename( + page_name(tag, i + 1, lang)) + context["permalink"] = self.site.link("tag", tag, lang) + context["tag"] = tag + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [utils.config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task + else: + # We render a single flat link list with this tag's posts + template_name = "tag.tmpl" + output_name = os.path.join(kw['output_folder'], + self.site.path("tag", tag, lang)) + context = {} + context["lang"] = lang + context["title"] = kw["messages"][lang][ + u"Posts about %s"] % tag + context["items"] = [("[%s] %s" % (post.date, + post.title(lang)), + post.permalink(lang)) for post in post_list] + context["permalink"] = self.site.link("tag", tag, lang) + context["tag"] = tag + for task in self.site.generic_post_list_renderer( + lang, + post_list, + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [utils.config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + task['basename'] = self.name + yield task + + # And global "all your tags" page + tags = self.site.posts_per_tag.keys() + tags.sort() + template_name = "tags.tmpl" + kw['tags'] = tags + for lang in kw["translations"]: + output_name = os.path.join( + kw['output_folder'], self.site.path('tag_index', None, lang)) + context = {} + context["title"] = kw["messages"][lang][u"Tags"] + context["items"] = [(tag, self.site.link("tag", tag, lang)) + for tag in tags] + context["permalink"] = self.site.link("tag_index", None, lang) + for task in self.site.generic_post_list_renderer( + lang, + [], + output_name, + template_name, + kw['filters'], + context, + ): + task['uptodate'] = [utils.config_changed({ + 1: task['uptodate'][0].config, + 2: kw})] + yield task diff --git a/nikola/plugins/task_sitemap.plugin b/nikola/plugins/task_sitemap.plugin new file mode 100644 index 0000000..f6b01d7 --- /dev/null +++ b/nikola/plugins/task_sitemap.plugin @@ -0,0 +1,10 @@ +[Core] +Name = sitemap +Module = task_sitemap + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Generate google sitemap. + diff --git a/nikola/plugins/task_sitemap/__init__.py b/nikola/plugins/task_sitemap/__init__.py new file mode 100644 index 0000000..87b72bf --- /dev/null +++ b/nikola/plugins/task_sitemap/__init__.py @@ -0,0 +1,62 @@ +import os +import tempfile + +from nikola.plugin_categories import LateTask +from nikola.utils import config_changed + +import sitemap_gen as smap + + +class Sitemap(LateTask): + """Copy theme assets into output.""" + + name = "sitemap" + + def gen_tasks(self): + """Generate Google sitemap.""" + kw = { + "blog_url": self.site.config["BLOG_URL"], + "output_folder": self.site.config["OUTPUT_FOLDER"], + } + output_path = os.path.abspath(kw['output_folder']) + sitemap_path = os.path.join(output_path, "sitemap.xml.gz") + + def sitemap(): + # Generate config + config_data = """ + + + + + """ % ( + kw["blog_url"], + sitemap_path, + output_path, + kw["blog_url"], + ) + config_file = tempfile.NamedTemporaryFile(delete=False) + config_file.write(config_data) + config_file.close() + + # Generate sitemap + sitemap = smap.CreateSitemapFromFile(config_file.name, True) + if not sitemap: + smap.output.Log('Configuration file errors -- exiting.', 0) + else: + sitemap.Generate() + smap.output.Log('Number of errors: %d' % + smap.output.num_errors, 1) + smap.output.Log('Number of warnings: %d' % + smap.output.num_warns, 1) + os.unlink(config_file.name) + + yield { + "basename": "sitemap", + "targets": [sitemap_path], + "actions": [(sitemap,)], + "uptodate": [config_changed(kw)], + "clean": True, + } diff --git a/nikola/plugins/task_sitemap/sitemap_gen.py b/nikola/plugins/task_sitemap/sitemap_gen.py new file mode 100755 index 0000000..43e7c32 --- /dev/null +++ b/nikola/plugins/task_sitemap/sitemap_gen.py @@ -0,0 +1,2241 @@ +#!/usr/bin/env python +# flake8: noqa +# +# Copyright (c) 2004, 2005 Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of Google nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# +# The sitemap_gen.py script is written in Python 2.2 and released to +# the open source community for continuous improvements under the BSD +# 2.0 new license, which can be found at: +# +# http://www.opensource.org/licenses/bsd-license.php +# + +__usage__ = \ +"""A simple script to automatically produce sitemaps for a webserver, +in the Google Sitemap Protocol (GSP). + +Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] + --config=config.xml, specifies config file location + --help, displays usage message + --testing, specified when user is experimenting +""" + +# Please be careful that all syntax used in this file can be parsed on +# Python 1.5 -- this version check is not evaluated until after the +# entire file has been parsed. +import sys +if sys.hexversion < 0x02020000: + print 'This script requires Python 2.2 or later.' + print 'Currently run with version: %s' % sys.version + sys.exit(1) + +import fnmatch +import glob +import gzip +import hashlib +import os +import re +import stat +import time +import types +import urllib +import urlparse +import xml.sax + +# True and False were introduced in Python2.2.2 +try: + testTrue=True + del testTrue +except NameError: + True=1 + False=0 + +# Text encodings +ENC_ASCII = 'ASCII' +ENC_UTF8 = 'UTF-8' +ENC_IDNA = 'IDNA' +ENC_ASCII_LIST = ['ASCII', 'US-ASCII', 'US', 'IBM367', 'CP367', 'ISO646-US' + 'ISO_646.IRV:1991', 'ISO-IR-6', 'ANSI_X3.4-1968', + 'ANSI_X3.4-1986', 'CPASCII' ] +ENC_DEFAULT_LIST = ['ISO-8859-1', 'ISO-8859-2', 'ISO-8859-5'] + +# Maximum number of urls in each sitemap, before next Sitemap is created +MAXURLS_PER_SITEMAP = 50000 + +# Suffix on a Sitemap index file +SITEINDEX_SUFFIX = '_index.xml' + +# Regular expressions tried for extracting URLs from access logs. +ACCESSLOG_CLF_PATTERN = re.compile( + r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' + ) + +# Match patterns for lastmod attributes +LASTMOD_PATTERNS = map(re.compile, [ + r'^\d\d\d\d$', + r'^\d\d\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', + ]) + +# Match patterns for changefreq attributes +CHANGEFREQ_PATTERNS = [ + 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' + ] + +# XML formats +SITEINDEX_HEADER = \ + '\n' \ + '\n' \ + '\n' +SITEINDEX_FOOTER = '\n' +SITEINDEX_ENTRY = \ + ' \n' \ + ' %(loc)s\n' \ + ' %(lastmod)s\n' \ + ' \n' +SITEMAP_HEADER = \ + '\n' \ + '\n' +SITEMAP_FOOTER = '\n' +SITEURL_XML_PREFIX = ' \n' +SITEURL_XML_SUFFIX = ' \n' + +# Search engines to notify with the updated sitemaps +# +# This list is very non-obvious in what's going on. Here's the gist: +# Each item in the list is a 6-tuple of items. The first 5 are "almost" +# the same as the input arguments to urlparse.urlunsplit(): +# 0 - schema +# 1 - netloc +# 2 - path +# 3 - query <-- EXCEPTION: specify a query map rather than a string +# 4 - fragment +# Additionally, add item 5: +# 5 - query attribute that should be set to the new Sitemap URL +# Clear as mud, I know. +NOTIFICATION_SITES = [ + ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap') + ] + + +class Error(Exception): + """ + Base exception class. In this module we tend not to use our own exception + types for very much, but they come in very handy on XML parsing with SAX. + """ + pass +#end class Error + + +class SchemaError(Error): + """Failure to process an XML file according to the schema we know.""" + pass +#end class SchemeError + + +class Encoder: + """ + Manages wide-character/narrow-character conversions for just about all + text that flows into or out of the script. + + You should always use this class for string coercion, as opposed to + letting Python handle coercions automatically. Reason: Python + usually assumes ASCII (7-bit) as a default narrow character encoding, + which is not the kind of data we generally deal with. + + General high-level methodologies used in sitemap_gen: + + [PATHS] + File system paths may be wide or narrow, depending on platform. + This works fine, just be aware of it and be very careful to not + mix them. That is, if you have to pass several file path arguments + into a library call, make sure they are all narrow or all wide. + This class has MaybeNarrowPath() which should be called on every + file system path you deal with. + + [URLS] + URL locations are stored in Narrow form, already escaped. This has the + benefit of keeping escaping and encoding as close as possible to the format + we read them in. The downside is we may end up with URLs that have + intermingled encodings -- the root path may be encoded in one way + while the filename is encoded in another. This is obviously wrong, but + it should hopefully be an issue hit by very few users. The workaround + from the user level (assuming they notice) is to specify a default_encoding + parameter in their config file. + + [OTHER] + Other text, such as attributes of the URL class, configuration options, + etc, are generally stored in Unicode for simplicity. + """ + + def __init__(self): + self._user = None # User-specified default encoding + self._learned = [] # Learned default encodings + self._widefiles = False # File system can be wide + + # Can the file system be Unicode? + try: + self._widefiles = os.path.supports_unicode_filenames + except AttributeError: + try: + self._widefiles = sys.getwindowsversion() == os.VER_PLATFORM_WIN32_NT + except AttributeError: + pass + + # Try to guess a working default + try: + encoding = sys.getfilesystemencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [ encoding ] + except AttributeError: + pass + + if not self._learned: + encoding = sys.getdefaultencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [ encoding ] + + # If we had no guesses, start with some European defaults + if not self._learned: + self._learned = ENC_DEFAULT_LIST + #end def __init__ + + def SetUserEncoding(self, encoding): + self._user = encoding + #end def SetUserEncoding + + def NarrowText(self, text, encoding): + """ Narrow a piece of arbitrary text """ + if type(text) != types.UnicodeType: + return text + + # Try the passed in preference + if encoding: + try: + result = text.encode(encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return text.encode(self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return text.encode(self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return text.encode(ENC_UTF8) + except UnicodeError: + pass + + # Something is seriously wrong if we get to here + return text.encode(ENC_ASCII, 'ignore') + #end def NarrowText + + def MaybeNarrowPath(self, text): + """ Paths may be allowed to stay wide """ + if self._widefiles: + return text + return self.NarrowText(text, None) + #end def MaybeNarrowPath + + def WidenText(self, text, encoding): + """ Widen a piece of arbitrary text """ + if type(text) != types.StringType: + return text + + # Try the passed in preference + if encoding: + try: + result = unicode(text, encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return unicode(text, self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return unicode(text, self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return unicode(text, ENC_UTF8) + except UnicodeError: + pass + + # Getting here means it wasn't UTF-8 and we had no working default. + # We really don't have anything "right" we can do anymore. + output.Warn('Unrecognized encoding in text: %s' % text) + if not self._user: + output.Warn('You may need to set a default_encoding in your ' + 'configuration file.') + return text.decode(ENC_ASCII, 'ignore') + #end def WidenText +#end class Encoder +encoder = Encoder() + + +class Output: + """ + Exposes logging functionality, and tracks how many errors + we have thus output. + + Logging levels should be used as thus: + Fatal -- extremely sparingly + Error -- config errors, entire blocks of user 'intention' lost + Warn -- individual URLs lost + Log(,0) -- Un-suppressable text that's not an error + Log(,1) -- touched files, major actions + Log(,2) -- parsing notes, filtered or duplicated URLs + Log(,3) -- each accepted URL + """ + + def __init__(self): + self.num_errors = 0 # Count of errors + self.num_warns = 0 # Count of warnings + + self._errors_shown = {} # Shown errors + self._warns_shown = {} # Shown warnings + self._verbose = 0 # Level of verbosity + #end def __init__ + + def Log(self, text, level): + """ Output a blurb of diagnostic text, if the verbose level allows it """ + if text: + text = encoder.NarrowText(text, None) + if self._verbose >= level: + print text + #end def Log + + def Warn(self, text): + """ Output and count a warning. Suppress duplicate warnings. """ + if text: + text = encoder.NarrowText(text, None) + hash = hashlib.md5(text).digest() + if not self._warns_shown.has_key(hash): + self._warns_shown[hash] = 1 + print '[WARNING] ' + text + else: + self.Log('(suppressed) [WARNING] ' + text, 3) + self.num_warns = self.num_warns + 1 + #end def Warn + + def Error(self, text): + """ Output and count an error. Suppress duplicate errors. """ + if text: + text = encoder.NarrowText(text, None) + hash = hashlib.md5(text).digest() + if not self._errors_shown.has_key(hash): + self._errors_shown[hash] = 1 + print '[ERROR] ' + text + else: + self.Log('(suppressed) [ERROR] ' + text, 3) + self.num_errors = self.num_errors + 1 + #end def Error + + def Fatal(self, text): + """ Output an error and terminate the program. """ + if text: + text = encoder.NarrowText(text, None) + print '[FATAL] ' + text + else: + print 'Fatal error.' + sys.exit(1) + #end def Fatal + + def SetVerbose(self, level): + """ Sets the verbose level. """ + try: + if type(level) != types.IntType: + level = int(level) + if (level >= 0) and (level <= 3): + self._verbose = level + return + except ValueError: + pass + self.Error('Verbose level (%s) must be between 0 and 3 inclusive.' % level) + #end def SetVerbose +#end class Output +output = Output() + + +class URL(object): + """ URL is a smart structure grouping together the properties we + care about for a single web reference. """ + __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' + + def __init__(self): + self.loc = None # URL -- in Narrow characters + self.lastmod = None # ISO8601 timestamp of last modify + self.changefreq = None # Text term for update frequency + self.priority = None # Float between 0 and 1 (inc) + #end def __init__ + + def __cmp__(self, other): + if self.loc < other.loc: + return -1 + if self.loc > other.loc: + return 1 + return 0 + #end def __cmp__ + + def TrySetAttribute(self, attribute, value): + """ Attempt to set the attribute to the value, with a pretty try + block around it. """ + if attribute == 'loc': + self.loc = self.Canonicalize(value) + else: + try: + setattr(self, attribute, value) + except AttributeError: + output.Warn('Unknown URL attribute: %s' % attribute) + #end def TrySetAttribute + + def IsAbsolute(loc): + """ Decide if the URL is absolute or not """ + if not loc: + return False + narrow = encoder.NarrowText(loc, None) + (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) + if (not scheme) or (not netloc): + return False + return True + #end def IsAbsolute + IsAbsolute = staticmethod(IsAbsolute) + + def Canonicalize(loc): + """ Do encoding and canonicalization on a URL string """ + if not loc: + return loc + + # Let the encoder try to narrow it + narrow = encoder.NarrowText(loc, None) + + # Escape components individually + (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) + unr = '-._~' + sub = '!$&\'()*+,;=' + netloc = urllib.quote(netloc, unr + sub + '%:@/[]') + path = urllib.quote(path, unr + sub + '%:@/') + query = urllib.quote(query, unr + sub + '%:@/?') + frag = urllib.quote(frag, unr + sub + '%:@/?') + + # Try built-in IDNA encoding on the netloc + try: + (ignore, widenetloc, ignore, ignore, ignore) = urlparse.urlsplit(loc) + for c in widenetloc: + if c >= unichr(128): + netloc = widenetloc.encode(ENC_IDNA) + netloc = urllib.quote(netloc, unr + sub + '%:@/[]') + break + except UnicodeError: + # urlsplit must have failed, based on implementation differences in the + # library. There is not much we can do here, except ignore it. + pass + except LookupError: + output.Warn('An International Domain Name (IDN) is being used, but this ' + 'version of Python does not have support for IDNA encoding. ' + ' (IDNA support was introduced in Python 2.3) The encoding ' + 'we have used instead is wrong and will probably not yield ' + 'valid URLs.') + bad_netloc = False + if '%' in netloc: + bad_netloc = True + + # Put it all back together + narrow = urlparse.urlunsplit((scheme, netloc, path, query, frag)) + + # I let '%' through. Fix any that aren't pre-existing escapes. + HEXDIG = '0123456789abcdefABCDEF' + list = narrow.split('%') + narrow = list[0] + del list[0] + for item in list: + if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): + narrow = narrow + '%' + item + else: + narrow = narrow + '%25' + item + + # Issue a warning if this is a bad URL + if bad_netloc: + output.Warn('Invalid characters in the host or domain portion of a URL: ' + + narrow) + + return narrow + #end def Canonicalize + Canonicalize = staticmethod(Canonicalize) + + def Validate(self, base_url, allow_fragment): + """ Verify the data in this URL is well-formed, and override if not. """ + assert type(base_url) == types.StringType + + # Test (and normalize) the ref + if not self.loc: + output.Warn('Empty URL') + return False + if allow_fragment: + self.loc = urlparse.urljoin(base_url, self.loc) + if not self.loc.startswith(base_url): + output.Warn('Discarded URL for not starting with the base_url: %s' % + self.loc) + self.loc = None + return False + + # Test the lastmod + if self.lastmod: + match = False + self.lastmod = self.lastmod.upper() + for pattern in LASTMOD_PATTERNS: + match = pattern.match(self.lastmod) + if match: + break + if not match: + output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' + 'URL: %s' % (self.lastmod, self.loc)) + self.lastmod = None + + # Test the changefreq + if self.changefreq: + match = False + self.changefreq = self.changefreq.lower() + for pattern in CHANGEFREQ_PATTERNS: + if self.changefreq == pattern: + match = True + break + if not match: + output.Warn('Changefreq "%s" is not a valid change frequency on URL ' + ': %s' % (self.changefreq, self.loc)) + self.changefreq = None + + # Test the priority + if self.priority: + priority = -1.0 + try: + priority = float(self.priority) + except ValueError: + pass + if (priority < 0.0) or (priority > 1.0): + output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' + 'on URL: %s' % (self.priority, self.loc)) + self.priority = None + + return True + #end def Validate + + def MakeHash(self): + """ Provides a uniform way of hashing URLs """ + if not self.loc: + return None + if self.loc.endswith('/'): + return hashlib.md5(self.loc[:-1]).digest() + return hashlib.md5(self.loc).digest() + #end def MakeHash + + def Log(self, prefix='URL', level=3): + """ Dump the contents, empty or not, to the log. """ + out = prefix + ':' + + for attribute in self.__slots__: + value = getattr(self, attribute) + if not value: + value = '' + out = out + (' %s=[%s]' % (attribute, value)) + + output.Log('%s' % encoder.NarrowText(out, None), level) + #end def Log + + def WriteXML(self, file): + """ Dump non-empty contents to the output file, in XML format. """ + if not self.loc: + return + out = SITEURL_XML_PREFIX + + for attribute in self.__slots__: + value = getattr(self, attribute) + if value: + if type(value) == types.UnicodeType: + value = encoder.NarrowText(value, None) + elif type(value) != types.StringType: + value = str(value) + value = xml.sax.saxutils.escape(value) + out = out + (' <%s>%s\n' % (attribute, value, attribute)) + + out = out + SITEURL_XML_SUFFIX + file.write(out) + #end def WriteXML +#end class URL + + +class Filter: + """ + A filter on the stream of URLs we find. A filter is, in essence, + a wildcard applied to the stream. You can think of this as an + operator that returns a tri-state when given a URL: + + True -- this URL is to be included in the sitemap + None -- this URL is undecided + False -- this URL is to be dropped from the sitemap + """ + + def __init__(self, attributes): + self._wildcard = None # Pattern for wildcard match + self._regexp = None # Pattern for regexp match + self._pass = False # "Drop" filter vs. "Pass" filter + + if not ValidateAttributes('FILTER', attributes, + ('pattern', 'type', 'action')): + return + + # Check error count on the way in + num_errors = output.num_errors + + # Fetch the attributes + pattern = attributes.get('pattern') + type = attributes.get('type', 'wildcard') + action = attributes.get('action', 'drop') + if type: + type = type.lower() + if action: + action = action.lower() + + # Verify the attributes + if not pattern: + output.Error('On a filter you must specify a "pattern" to match') + elif (not type) or ((type != 'wildcard') and (type != 'regexp')): + output.Error('On a filter you must specify either \'type="wildcard"\' ' + 'or \'type="regexp"\'') + elif (action != 'pass') and (action != 'drop'): + output.Error('If you specify a filter action, it must be either ' + '\'action="pass"\' or \'action="drop"\'') + + # Set the rule + if action == 'drop': + self._pass = False + elif action == 'pass': + self._pass = True + + if type == 'wildcard': + self._wildcard = pattern + elif type == 'regexp': + try: + self._regexp = re.compile(pattern) + except re.error: + output.Error('Bad regular expression: %s' % pattern) + + # Log the final results iff we didn't add any errors + if num_errors == output.num_errors: + output.Log('Filter: %s any URL that matches %s "%s"' % + (action, type, pattern), 2) + #end def __init__ + + def Apply(self, url): + """ Process the URL, as above. """ + if (not url) or (not url.loc): + return None + + if self._wildcard: + if fnmatch.fnmatchcase(url.loc, self._wildcard): + return self._pass + return None + + if self._regexp: + if self._regexp.search(url.loc): + return self._pass + return None + + assert False # unreachable + #end def Apply +#end class Filter + + +class InputURL: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a single URL, manually specified in the config file. + """ + + def __init__(self, attributes): + self._url = None # The lonely URL + + if not ValidateAttributes('URL', attributes, + ('href', 'lastmod', 'changefreq', 'priority')): + return + + url = URL() + for attr in attributes.keys(): + if attr == 'href': + url.TrySetAttribute('loc', attributes[attr]) + else: + url.TrySetAttribute(attr, attributes[attr]) + + if not url.loc: + output.Error('Url entries must have an href attribute.') + return + + self._url = url + output.Log('Input: From URL "%s"' % self._url.loc, 2) + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if self._url: + consumer(self._url, True) + #end def ProduceURLs +#end class InputURL + + +class InputURLList: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a text file with a list of URLs + """ + + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + + if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From URLLIST "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Urllist entries must have a "path" attribute.') + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'URLLIST') + if not file: + return + + # Iterate lines + linenum = 0 + for line in file.readlines(): + linenum = linenum + 1 + + # Strip comments and empty lines + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + if (not line) or line[0] == '#': + continue + + # Split the line on space + url = URL() + cols = line.split(' ') + for i in range(0,len(cols)): + cols[i] = cols[i].strip() + url.TrySetAttribute('loc', cols[0]) + + # Extract attributes from the other columns + for i in range(1,len(cols)): + if cols[i]: + try: + (attr_name, attr_val) = cols[i].split('=', 1) + url.TrySetAttribute(attr_name, attr_val) + except ValueError: + output.Warn('Line %d: Unable to parse attribute: %s' % + (linenum, cols[i])) + + # Pass it on + consumer(url, False) + + file.close() + if frame: + frame.close() + #end def ProduceURLs +#end class InputURLList + + +class InputDirectory: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a directory that acts as base for walking the filesystem. + """ + + def __init__(self, attributes, base_url): + self._path = None # The directory + self._url = None # The URL equivelant + self._default_file = None + + if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', + 'default_file')): + return + + # Prep the path -- it MUST end in a sep + path = attributes.get('path') + if not path: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + path = encoder.MaybeNarrowPath(path) + if not path.endswith(os.sep): + path = path + os.sep + if not os.path.isdir(path): + output.Error('Can not locate directory: %s' % path) + return + + # Prep the URL -- it MUST end in a sep + url = attributes.get('url') + if not url: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + url = URL.Canonicalize(url) + if not url.endswith('/'): + url = url + '/' + if not url.startswith(base_url): + url = urlparse.urljoin(base_url, url) + if not url.startswith(base_url): + output.Error('The directory URL "%s" is not relative to the ' + 'base_url: %s' % (url, base_url)) + return + + # Prep the default file -- it MUST be just a filename + file = attributes.get('default_file') + if file: + file = encoder.MaybeNarrowPath(file) + if os.sep in file: + output.Error('The default_file "%s" can not include path information.' + % file) + file = None + + self._path = path + self._url = url + self._default_file = file + if file: + output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' + % (path, url, file), 2) + else: + output.Log('Input: From DIRECTORY "%s" (%s) with no default file' + % (path, url), 2) + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if not self._path: + return + + root_path = self._path + root_URL = self._url + root_file = "index.html" + + def DecideFilename(name): + assert "/" not in name + + if name in ( "robots.txt, " ): + return False + + if ".thumbnail." in name: + return False + + if re.match( r"google[a-f0-9]+.html", name ): + return False + + return not re.match( r"^index(\-\d+)?.html$", name ) + + def DecideDirectory(dirpath): + subpath = dirpath[len(root_path):] + + assert not subpath.startswith( "/" ), subpath + + for remove in ( "assets", ): + if subpath == remove or subpath.startswith( remove + os.path.sep ): + return False + else: + return True + + def PerFile(dirpath, name): + """ + Called once per file. + Note that 'name' will occasionally be None -- for a directory itself + """ + if not DecideDirectory(dirpath): + return + + if name is not None and not DecideFilename(name): + return + + # Pull a timestamp + url = URL() + isdir = False + try: + if name: + path = os.path.join(dirpath, name) + else: + path = dirpath + isdir = os.path.isdir(path) + time = None + if isdir and root_file: + file = os.path.join(path, root_file) + try: + time = os.stat(file)[stat.ST_MTIME]; + except OSError: + pass + if not time: + time = os.stat(path)[stat.ST_MTIME]; + url.lastmod = TimestampISO8601(time) + except OSError: + pass + except ValueError: + pass + + # Build a URL + middle = dirpath[len(root_path):] + if os.sep != '/': + middle = middle.replace(os.sep, '/') + if middle: + middle = middle + '/' + if name: + middle = middle + name + if isdir: + middle = middle + '/' + url.TrySetAttribute('loc', root_URL + encoder.WidenText(middle, None)) + + # Suppress default files. (All the way down here so we can log it.) + if name and (root_file == name): + url.Log(prefix='IGNORED (default file)', level=2) + return + + consumer(url, False) + #end def PerFile + + def PerDirectory(ignore, dirpath, namelist): + """ + Called once per directory with a list of all the contained files/dirs. + """ + ignore = ignore # Avoid warnings of an unused parameter + + if not dirpath.startswith(root_path): + output.Warn('Unable to decide what the root path is for directory: ' + '%s' % dirpath) + return + + if not DecideDirectory(dirpath): + return + + for name in namelist: + PerFile(dirpath, name) + #end def PerDirectory + + output.Log('Walking DIRECTORY "%s"' % self._path, 1) + PerFile(self._path, None) + os.path.walk(self._path, PerDirectory, None) + #end def ProduceURLs +#end class InputDirectory + + +class InputAccessLog: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles access logs. It's non-trivial in that we want to + auto-detect log files in the Common Logfile Format (as used by Apache, + for instance) and the Extended Log File Format (as used by IIS, for + instance). + """ + + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + self._is_elf = False # Extended Log File Format? + self._is_clf = False # Common Logfile Format? + self._elf_status = -1 # ELF field: '200' + self._elf_method = -1 # ELF field: 'HEAD' + self._elf_uri = -1 # ELF field: '/foo?bar=1' + self._elf_urifrag1 = -1 # ELF field: '/foo' + self._elf_urifrag2 = -1 # ELF field: 'bar=1' + + if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Accesslog entries must have a "path" attribute.') + #end def __init__ + + def RecognizeELFLine(self, line): + """ Recognize the Fields directive that heads an ELF file """ + if not line.startswith('#Fields:'): + return False + fields = line.split(' ') + del fields[0] + for i in range(0, len(fields)): + field = fields[i].strip() + if field == 'sc-status': + self._elf_status = i + elif field == 'cs-method': + self._elf_method = i + elif field == 'cs-uri': + self._elf_uri = i + elif field == 'cs-uri-stem': + self._elf_urifrag1 = i + elif field == 'cs-uri-query': + self._elf_urifrag2 = i + output.Log('Recognized an Extended Log File Format file.', 2) + return True + #end def RecognizeELFLine + + def GetELFLine(self, line): + """ Fetch the requested URL from an ELF line """ + fields = line.split(' ') + count = len(fields) + + # Verify status was Ok + if self._elf_status >= 0: + if self._elf_status >= count: + return None + if not fields[self._elf_status].strip() == '200': + return None + + # Verify method was HEAD or GET + if self._elf_method >= 0: + if self._elf_method >= count: + return None + if not fields[self._elf_method].strip() in ('HEAD', 'GET'): + return None + + # Pull the full URL if we can + if self._elf_uri >= 0: + if self._elf_uri >= count: + return None + url = fields[self._elf_uri].strip() + if url != '-': + return url + + # Put together a fragmentary URL + if self._elf_urifrag1 >= 0: + if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: + return None + urlfrag1 = fields[self._elf_urifrag1].strip() + urlfrag2 = None + if self._elf_urifrag2 >= 0: + urlfrag2 = fields[self._elf_urifrag2] + if urlfrag1 and (urlfrag1 != '-'): + if urlfrag2 and (urlfrag2 != '-'): + urlfrag1 = urlfrag1 + '?' + urlfrag2 + return urlfrag1 + + return None + #end def GetELFLine + + def RecognizeCLFLine(self, line): + """ Try to tokenize a logfile line according to CLF pattern and see if + it works. """ + match = ACCESSLOG_CLF_PATTERN.match(line) + recognize = match and (match.group(1) in ('HEAD', 'GET')) + if recognize: + output.Log('Recognized a Common Logfile Format file.', 2) + return recognize + #end def RecognizeCLFLine + + def GetCLFLine(self, line): + """ Fetch the requested URL from a CLF line """ + match = ACCESSLOG_CLF_PATTERN.match(line) + if match: + request = match.group(1) + if request in ('HEAD', 'GET'): + return match.group(2) + return None + #end def GetCLFLine + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') + if not file: + return + + # Iterate lines + for line in file.readlines(): + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + + # If we don't know the format yet, try them both + if (not self._is_clf) and (not self._is_elf): + self._is_elf = self.RecognizeELFLine(line) + self._is_clf = self.RecognizeCLFLine(line) + + # Digest the line + match = None + if self._is_elf: + match = self.GetELFLine(line) + elif self._is_clf: + match = self.GetCLFLine(line) + if not match: + continue + + # Pass it on + url = URL() + url.TrySetAttribute('loc', match) + consumer(url, True) + + file.close() + if frame: + frame.close() + #end def ProduceURLs +#end class InputAccessLog + + +class InputSitemap(xml.sax.handler.ContentHandler): + + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles Sitemap files and Sitemap index files. For the sake + of simplicity in design (and simplicity in interfacing with the SAX + package), we do not handle these at the same time, recursively. Instead + we read an index file completely and make a list of Sitemap files, then + go back and process each Sitemap. + """ + + class _ContextBase(object): + + """Base class for context handlers in our SAX processing. A context + handler is a class that is responsible for understanding one level of + depth in the XML schema. The class knows what sub-tags are allowed, + and doing any processing specific for the tag we're in. + + This base class is the API filled in by specific context handlers, + all defined below. + """ + + def __init__(self, subtags): + """Initialize with a sequence of the sub-tags that would be valid in + this context.""" + self._allowed_tags = subtags # Sequence of sub-tags we can have + self._last_tag = None # Most recent seen sub-tag + #end def __init__ + + def AcceptTag(self, tag): + """Returns True iff opening a sub-tag is valid in this context.""" + valid = tag in self._allowed_tags + if valid: + self._last_tag = tag + else: + self._last_tag = None + return valid + #end def AcceptTag + + def AcceptText(self, text): + """Returns True iff a blurb of text is valid in this context.""" + return False + #end def AcceptText + + def Open(self): + """The context is opening. Do initialization.""" + pass + #end def Open + + def Close(self): + """The context is closing. Return our result, if any.""" + pass + #end def Close + + def Return(self, result): + """We're returning to this context after handling a sub-tag. This + method is called with the result data from the sub-tag that just + closed. Here in _ContextBase, if we ever see a result it means + the derived child class forgot to override this method.""" + if result: + raise NotImplementedError + #end def Return + #end class _ContextBase + + class _ContextUrlSet(_ContextBase): + + """Context handler for the document node in a Sitemap.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('url',)) + #end def __init__ + #end class _ContextUrlSet + + class _ContextUrl(_ContextBase): + + """Context handler for a URL node in a Sitemap.""" + + def __init__(self, consumer): + """Initialize this context handler with the callable consumer that + wants our URLs.""" + InputSitemap._ContextBase.__init__(self, URL.__slots__) + self._url = None # The URL object we're building + self._consumer = consumer # Who wants to consume it + #end def __init__ + + def Open(self): + """Initialize the URL.""" + assert not self._url + self._url = URL() + #end def Open + + def Close(self): + """Pass the URL to the consumer and reset it to None.""" + assert self._url + self._consumer(self._url, False) + self._url = None + #end def Close + + def Return(self, result): + """A value context has closed, absorb the data it gave us.""" + assert self._url + if result: + self._url.TrySetAttribute(self._last_tag, result) + #end def Return + #end class _ContextUrl + + class _ContextSitemapIndex(_ContextBase): + + """Context handler for the document node in an index file.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('sitemap',)) + self._loclist = [] # List of accumulated Sitemap URLs + #end def __init__ + + def Open(self): + """Just a quick verify of state.""" + assert not self._loclist + #end def Open + + def Close(self): + """Return our list of accumulated URLs.""" + if self._loclist: + temp = self._loclist + self._loclist = [] + return temp + #end def Close + + def Return(self, result): + """Getting a new loc URL, add it to the collection.""" + if result: + self._loclist.append(result) + #end def Return + #end class _ContextSitemapIndex + + class _ContextSitemap(_ContextBase): + + """Context handler for a Sitemap entry in an index file.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('loc', 'lastmod')) + self._loc = None # The URL to the Sitemap + #end def __init__ + + def Open(self): + """Just a quick verify of state.""" + assert not self._loc + #end def Open + + def Close(self): + """Return our URL to our parent.""" + if self._loc: + temp = self._loc + self._loc = None + return temp + output.Warn('In the Sitemap index file, a "sitemap" entry had no "loc".') + #end def Close + + def Return(self, result): + """A value has closed. If it was a 'loc', absorb it.""" + if result and (self._last_tag == 'loc'): + self._loc = result + #end def Return + #end class _ContextSitemap + + class _ContextValue(_ContextBase): + + """Context handler for a single value. We return just the value. The + higher level context has to remember what tag led into us.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ()) + self._text = None + #end def __init__ + + def AcceptText(self, text): + """Allow all text, adding it to our buffer.""" + if self._text: + self._text = self._text + text + else: + self._text = text + return True + #end def AcceptText + + def Open(self): + """Initialize our buffer.""" + self._text = None + #end def Open + + def Close(self): + """Return what's in our buffer.""" + text = self._text + self._text = None + if text: + text = text.strip() + return text + #end def Close + #end class _ContextValue + + def __init__(self, attributes): + """Initialize with a dictionary of attributes from our entry in the + config file.""" + xml.sax.handler.ContentHandler.__init__(self) + self._pathlist = None # A list of files + self._current = -1 # Current context in _contexts + self._contexts = None # The stack of contexts we allow + self._contexts_idx = None # ...contexts for index files + self._contexts_stm = None # ...contexts for Sitemap files + + if not ValidateAttributes('SITEMAP', attributes, ['path']): + return + + # Init the first file path + path = attributes.get('path') + if path: + path = encoder.MaybeNarrowPath(path) + if os.path.isfile(path): + output.Log('Input: From SITEMAP "%s"' % path, 2) + self._pathlist = [path] + else: + output.Error('Can not locate file "%s"' % path) + else: + output.Error('Sitemap entries must have a "path" attribute.') + #end def __init__ + + def ProduceURLs(self, consumer): + """In general: Produces URLs from our data source, hand them to the + callable consumer. + + In specific: Iterate over our list of paths and delegate the actual + processing to helper methods. This is a complexity no other data source + needs to suffer. We are unique in that we can have files that tell us + to bring in other files. + + Note the decision to allow an index file or not is made in this method. + If we call our parser with (self._contexts == None) the parser will + grab whichever context stack can handle the file. IE: index is allowed. + If instead we set (self._contexts = ...) before parsing, the parser + will only use the stack we specify. IE: index not allowed. + """ + # Set up two stacks of contexts + self._contexts_idx = [InputSitemap._ContextSitemapIndex(), + InputSitemap._ContextSitemap(), + InputSitemap._ContextValue()] + + self._contexts_stm = [InputSitemap._ContextUrlSet(), + InputSitemap._ContextUrl(consumer), + InputSitemap._ContextValue()] + + # Process the first file + assert self._pathlist + path = self._pathlist[0] + self._contexts = None # We allow an index file here + self._ProcessFile(path) + + # Iterate over remaining files + self._contexts = self._contexts_stm # No index files allowed + for path in self._pathlist[1:]: + self._ProcessFile(path) + #end def ProduceURLs + + def _ProcessFile(self, path): + """Do per-file reading/parsing/consuming for the file path passed in.""" + assert path + + # Open our file + (frame, file) = OpenFileForRead(path, 'SITEMAP') + if not file: + return + + # Rev up the SAX engine + try: + self._current = -1 + xml.sax.parse(file, self) + except SchemaError: + output.Error('An error in file "%s" made us abort reading the Sitemap.' + % path) + except IOError: + output.Error('Cannot read from file "%s"' % path) + except xml.sax._exceptions.SAXParseException, e: + output.Error('XML error in the file "%s" (line %d, column %d): %s' % + (path, e._linenum, e._colnum, e.getMessage())) + + # Clean up + file.close() + if frame: + frame.close() + #end def _ProcessFile + + def _MungeLocationListIntoFiles(self, urllist): + """Given a list of URLs, munge them into our self._pathlist property. + We do this by assuming all the files live in the same directory as + the first file in the existing pathlist. That is, we assume a + Sitemap index points to Sitemaps only in the same directory. This + is not true in general, but will be true for any output produced + by this script. + """ + assert self._pathlist + path = self._pathlist[0] + path = os.path.normpath(path) + dir = os.path.dirname(path) + wide = False + if type(path) == types.UnicodeType: + wide = True + + for url in urllist: + url = URL.Canonicalize(url) + output.Log('Index points to Sitemap file at: %s' % url, 2) + (scheme, netloc, path, query, frag) = urlparse.urlsplit(url) + file = os.path.basename(path) + file = urllib.unquote(file) + if wide: + file = encoder.WidenText(file) + if dir: + file = dir + os.sep + file + if file: + self._pathlist.append(file) + output.Log('Will attempt to read Sitemap file: %s' % file, 1) + #end def _MungeLocationListIntoFiles + + def startElement(self, tag, attributes): + """SAX processing, called per node in the config stream. + As long as the new tag is legal in our current context, this + becomes an Open call on one context deeper. + """ + # If this is the document node, we may have to look for a context stack + if (self._current < 0) and not self._contexts: + assert self._contexts_idx and self._contexts_stm + if tag == 'urlset': + self._contexts = self._contexts_stm + elif tag == 'sitemapindex': + self._contexts = self._contexts_idx + output.Log('File is a Sitemap index.', 2) + else: + output.Error('The document appears to be neither a Sitemap nor a ' + 'Sitemap index.') + raise SchemaError + + # Display a kinder error on a common mistake + if (self._current < 0) and (self._contexts == self._contexts_stm) and ( + tag == 'sitemapindex'): + output.Error('A Sitemap index can not refer to another Sitemap index.') + raise SchemaError + + # Verify no unexpected attributes + if attributes: + text = '' + for attr in attributes.keys(): + # The document node will probably have namespaces + if self._current < 0: + if attr.find('xmlns') >= 0: + continue + if attr.find('xsi') >= 0: + continue + if text: + text = text + ', ' + text = text + attr + if text: + output.Warn('Did not expect any attributes on any tag, instead tag ' + '"%s" had attributes: %s' % (tag, text)) + + # Switch contexts + if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): + self._current = self._current + 1 + assert self._current < len(self._contexts) + self._contexts[self._current].Open() + else: + output.Error('Can not accept tag "%s" where it appears.' % tag) + raise SchemaError + #end def startElement + + def endElement(self, tag): + """SAX processing, called per node in the config stream. + This becomes a call to Close on one context followed by a call + to Return on the previous. + """ + tag = tag # Avoid warning on unused argument + assert self._current >= 0 + retval = self._contexts[self._current].Close() + self._current = self._current - 1 + if self._current >= 0: + self._contexts[self._current].Return(retval) + elif retval and (self._contexts == self._contexts_idx): + self._MungeLocationListIntoFiles(retval) + #end def endElement + + def characters(self, text): + """SAX processing, called when text values are read. Important to + note that one single text value may be split across multiple calls + of this method. + """ + if (self._current < 0) or ( + not self._contexts[self._current].AcceptText(text)): + if text.strip(): + output.Error('Can not accept text "%s" where it appears.' % text) + raise SchemaError + #end def characters +#end class InputSitemap + + +class FilePathGenerator: + """ + This class generates filenames in a series, upon request. + You can request any iteration number at any time, you don't + have to go in order. + + Example of iterations for '/path/foo.xml.gz': + 0 --> /path/foo.xml.gz + 1 --> /path/foo1.xml.gz + 2 --> /path/foo2.xml.gz + _index.xml --> /path/foo_index.xml + """ + + def __init__(self): + self.is_gzip = False # Is this a GZIP file? + + self._path = None # '/path/' + self._prefix = None # 'foo' + self._suffix = None # '.xml.gz' + #end def __init__ + + def Preload(self, path): + """ Splits up a path into forms ready for recombination. """ + path = encoder.MaybeNarrowPath(path) + + # Get down to a base name + path = os.path.normpath(path) + base = os.path.basename(path).lower() + if not base: + output.Error('Couldn\'t parse the file path: %s' % path) + return False + lenbase = len(base) + + # Recognize extension + lensuffix = 0 + compare_suffix = ['.xml', '.xml.gz', '.gz'] + for suffix in compare_suffix: + if base.endswith(suffix): + lensuffix = len(suffix) + break + if not lensuffix: + output.Error('The path "%s" doesn\'t end in a supported file ' + 'extension.' % path) + return False + self.is_gzip = suffix.endswith('.gz') + + # Split the original path + lenpath = len(path) + self._path = path[:lenpath-lenbase] + self._prefix = path[lenpath-lenbase:lenpath-lensuffix] + self._suffix = path[lenpath-lensuffix:] + + return True + #end def Preload + + def GeneratePath(self, instance): + """ Generates the iterations, as described above. """ + prefix = self._path + self._prefix + if type(instance) == types.IntType: + if instance: + return '%s%d%s' % (prefix, instance, self._suffix) + return prefix + self._suffix + return prefix + instance + #end def GeneratePath + + def GenerateURL(self, instance, root_url): + """ Generates iterations, but as a URL instead of a path. """ + prefix = root_url + self._prefix + retval = None + if type(instance) == types.IntType: + if instance: + retval = '%s%d%s' % (prefix, instance, self._suffix) + else: + retval = prefix + self._suffix + else: + retval = prefix + instance + return URL.Canonicalize(retval) + #end def GenerateURL + + def GenerateWildURL(self, root_url): + """ Generates a wildcard that should match all our iterations """ + prefix = URL.Canonicalize(root_url + self._prefix) + temp = URL.Canonicalize(prefix + self._suffix) + suffix = temp[len(prefix):] + return prefix + '*' + suffix + #end def GenerateURL +#end class FilePathGenerator + + +class PerURLStatistics: + """ Keep track of some simple per-URL statistics, like file extension. """ + + def __init__(self): + self._extensions = {} # Count of extension instances + #end def __init__ + + def Consume(self, url): + """ Log some stats for the URL. At the moment, that means extension. """ + if url and url.loc: + (scheme, netloc, path, query, frag) = urlparse.urlsplit(url.loc) + if not path: + return + + # Recognize directories + if path.endswith('/'): + if self._extensions.has_key('/'): + self._extensions['/'] = self._extensions['/'] + 1 + else: + self._extensions['/'] = 1 + return + + # Strip to a filename + i = path.rfind('/') + if i >= 0: + assert i < len(path) + path = path[i:] + + # Find extension + i = path.rfind('.') + if i > 0: + assert i < len(path) + ext = path[i:].lower() + if self._extensions.has_key(ext): + self._extensions[ext] = self._extensions[ext] + 1 + else: + self._extensions[ext] = 1 + else: + if self._extensions.has_key('(no extension)'): + self._extensions['(no extension)'] = self._extensions[ + '(no extension)'] + 1 + else: + self._extensions['(no extension)'] = 1 + #end def Consume + + def Log(self): + """ Dump out stats to the output. """ + if len(self._extensions): + output.Log('Count of file extensions on URLs:', 1) + set = self._extensions.keys() + set.sort() + for ext in set: + output.Log(' %7d %s' % (self._extensions[ext], ext), 1) + #end def Log + +class Sitemap(xml.sax.handler.ContentHandler): + """ + This is the big workhorse class that processes your inputs and spits + out sitemap files. It is built as a SAX handler for set up purposes. + That is, it processes an XML stream to bring itself up. + """ + + def __init__(self, suppress_notify): + xml.sax.handler.ContentHandler.__init__(self) + self._filters = [] # Filter objects + self._inputs = [] # Input objects + self._urls = {} # Maps URLs to count of dups + self._set = [] # Current set of URLs + self._filegen = None # Path generator for output files + self._wildurl1 = None # Sitemap URLs to filter out + self._wildurl2 = None # Sitemap URLs to filter out + self._sitemaps = 0 # Number of output files + # We init _dup_max to 2 so the default priority is 0.5 instead of 1.0 + self._dup_max = 2 # Max number of duplicate URLs + self._stat = PerURLStatistics() # Some simple stats + self._in_site = False # SAX: are we in a Site node? + self._in_Site_ever = False # SAX: were we ever in a Site? + + self._default_enc = None # Best encoding to try on URLs + self._base_url = None # Prefix to all valid URLs + self._store_into = None # Output filepath + self._suppress = suppress_notify # Suppress notify of servers + #end def __init__ + + def ValidateBasicConfig(self): + """ Verifies (and cleans up) the basic user-configurable options. """ + all_good = True + + if self._default_enc: + encoder.SetUserEncoding(self._default_enc) + + # Canonicalize the base_url + if all_good and not self._base_url: + output.Error('A site needs a "base_url" attribute.') + all_good = False + if all_good and not URL.IsAbsolute(self._base_url): + output.Error('The "base_url" must be absolute, not relative: %s' % + self._base_url) + all_good = False + if all_good: + self._base_url = URL.Canonicalize(self._base_url) + if not self._base_url.endswith('/'): + self._base_url = self._base_url + '/' + output.Log('BaseURL is set to: %s' % self._base_url, 2) + + # Load store_into into a generator + if all_good: + if self._store_into: + self._filegen = FilePathGenerator() + if not self._filegen.Preload(self._store_into): + all_good = False + else: + output.Error('A site needs a "store_into" attribute.') + all_good = False + + # Ask the generator for patterns on what its output will look like + if all_good: + self._wildurl1 = self._filegen.GenerateWildURL(self._base_url) + self._wildurl2 = self._filegen.GenerateURL(SITEINDEX_SUFFIX, + self._base_url) + + # Unify various forms of False + if all_good: + if self._suppress: + if (type(self._suppress) == types.StringType) or (type(self._suppress) + == types.UnicodeType): + if (self._suppress == '0') or (self._suppress.lower() == 'false'): + self._suppress = False + + # Done + if not all_good: + output.Log('See "example_config.xml" for more information.', 0) + return all_good + #end def ValidateBasicConfig + + def Generate(self): + """ Run over all the Inputs and ask them to Produce """ + # Run the inputs + for input in self._inputs: + input.ProduceURLs(self.ConsumeURL) + + # Do last flushes + if len(self._set): + self.FlushSet() + if not self._sitemaps: + output.Warn('No URLs were recorded, writing an empty sitemap.') + self.FlushSet() + + # Write an index as needed + if self._sitemaps > 1: + self.WriteIndex() + + # Notify + self.NotifySearch() + + # Dump stats + self._stat.Log() + #end def Generate + + def ConsumeURL(self, url, allow_fragment): + """ + All per-URL processing comes together here, regardless of Input. + Here we run filters, remove duplicates, spill to disk as needed, etc. + """ + if not url: + return + + # Validate + if not url.Validate(self._base_url, allow_fragment): + return + + # Run filters + accept = None + for filter in self._filters: + accept = filter.Apply(url) + if accept != None: + break + if not (accept or (accept == None)): + url.Log(prefix='FILTERED', level=2) + return + + # Ignore our out output URLs + if fnmatch.fnmatchcase(url.loc, self._wildurl1) or fnmatch.fnmatchcase( + url.loc, self._wildurl2): + url.Log(prefix='IGNORED (output file)', level=2) + return + + # Note the sighting + hash = url.MakeHash() + if self._urls.has_key(hash): + dup = self._urls[hash] + if dup > 0: + dup = dup + 1 + self._urls[hash] = dup + if self._dup_max < dup: + self._dup_max = dup + url.Log(prefix='DUPLICATE') + return + + # Acceptance -- add to set + self._urls[hash] = 1 + self._set.append(url) + self._stat.Consume(url) + url.Log() + + # Flush the set if needed + if len(self._set) >= MAXURLS_PER_SITEMAP: + self.FlushSet() + #end def ConsumeURL + + def FlushSet(self): + """ + Flush the current set of URLs to the output. This is a little + slow because we like to sort them all and normalize the priorities + before dumping. + """ + + # Sort and normalize + output.Log('Sorting and normalizing collected URLs.', 1) + self._set.sort() + for url in self._set: + hash = url.MakeHash() + dup = self._urls[hash] + if dup > 0: + self._urls[hash] = -1 + if not url.priority: + url.priority = '%.4f' % (float(dup) / float(self._dup_max)) + + # Get the filename we're going to write to + filename = self._filegen.GeneratePath(self._sitemaps) + if not filename: + output.Fatal('Unexpected: Couldn\'t generate output filename.') + self._sitemaps = self._sitemaps + 1 + output.Log('Writing Sitemap file "%s" with %d URLs' % + (filename, len(self._set)), 1) + + # Write to it + frame = None + file = None + + try: + if self._filegen.is_gzip: + basename = os.path.basename(filename); + frame = open(filename, 'wb') + file = gzip.GzipFile(fileobj=frame, filename=basename, mode='wt') + else: + file = open(filename, 'wt') + + file.write(SITEMAP_HEADER) + for url in self._set: + url.WriteXML(file) + file.write(SITEMAP_FOOTER) + + file.close() + if frame: + frame.close() + + frame = None + file = None + except IOError: + output.Fatal('Couldn\'t write out to file: %s' % filename) + os.chmod(filename, 0644) + + # Flush + self._set = [] + #end def FlushSet + + def WriteIndex(self): + """ Write the master index of all Sitemap files """ + # Make a filename + filename = self._filegen.GeneratePath(SITEINDEX_SUFFIX) + if not filename: + output.Fatal('Unexpected: Couldn\'t generate output index filename.') + output.Log('Writing index file "%s" with %d Sitemaps' % + (filename, self._sitemaps), 1) + + # Make a lastmod time + lastmod = TimestampISO8601(time.time()) + + # Write to it + try: + fd = open(filename, 'wt') + fd.write(SITEINDEX_HEADER) + + for mapnumber in range(0,self._sitemaps): + # Write the entry + mapurl = self._filegen.GenerateURL(mapnumber, self._base_url) + mapattributes = { 'loc' : mapurl, 'lastmod' : lastmod } + fd.write(SITEINDEX_ENTRY % mapattributes) + + fd.write(SITEINDEX_FOOTER) + + fd.close() + fd = None + except IOError: + output.Fatal('Couldn\'t write out to file: %s' % filename) + os.chmod(filename, 0644) + #end def WriteIndex + + def NotifySearch(self): + """ Send notification of the new Sitemap(s) to the search engines. """ + if self._suppress: + output.Log('Search engine notification is suppressed.', 1) + return + + output.Log('Notifying search engines.', 1) + + # Override the urllib's opener class with one that doesn't ignore 404s + class ExceptionURLopener(urllib.FancyURLopener): + def http_error_default(self, url, fp, errcode, errmsg, headers): + output.Log('HTTP error %d: %s' % (errcode, errmsg), 2) + raise IOError + #end def http_error_default + #end class ExceptionURLOpener + old_opener = urllib._urlopener + urllib._urlopener = ExceptionURLopener() + + # Build the URL we want to send in + if self._sitemaps > 1: + url = self._filegen.GenerateURL(SITEINDEX_SUFFIX, self._base_url) + else: + url = self._filegen.GenerateURL(0, self._base_url) + + # Test if we can hit it ourselves + try: + u = urllib.urlopen(url) + u.close() + except IOError: + output.Error('When attempting to access our generated Sitemap at the ' + 'following URL:\n %s\n we failed to read it. Please ' + 'verify the store_into path you specified in\n' + ' your configuration file is web-accessable. Consult ' + 'the FAQ for more\n information.' % url) + output.Warn('Proceeding to notify with an unverifyable URL.') + + # Cycle through notifications + # To understand this, see the comment near the NOTIFICATION_SITES comment + for ping in NOTIFICATION_SITES: + query_map = ping[3] + query_attr = ping[5] + query_map[query_attr] = url + query = urllib.urlencode(query_map) + notify = urlparse.urlunsplit((ping[0], ping[1], ping[2], query, ping[4])) + + # Send the notification + output.Log('Notifying: %s' % ping[1], 1) + output.Log('Notification URL: %s' % notify, 2) + try: + u = urllib.urlopen(notify) + u.read() + u.close() + except IOError: + output.Warn('Cannot contact: %s' % ping[1]) + + if old_opener: + urllib._urlopener = old_opener + #end def NotifySearch + + def startElement(self, tag, attributes): + """ SAX processing, called per node in the config stream. """ + + if tag == 'site': + if self._in_site: + output.Error('Can not nest Site entries in the configuration.') + else: + self._in_site = True + + if not ValidateAttributes('SITE', attributes, + ('verbose', 'default_encoding', 'base_url', 'store_into', + 'suppress_search_engine_notify')): + return + + verbose = attributes.get('verbose', 0) + if verbose: + output.SetVerbose(verbose) + + self._default_enc = attributes.get('default_encoding') + self._base_url = attributes.get('base_url') + self._store_into = attributes.get('store_into') + if not self._suppress: + self._suppress = attributes.get('suppress_search_engine_notify', + False) + self.ValidateBasicConfig() + + elif tag == 'filter': + self._filters.append(Filter(attributes)) + + elif tag == 'url': + self._inputs.append(InputURL(attributes)) + + elif tag == 'urllist': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputURLList(attributeset)) + + elif tag == 'directory': + self._inputs.append(InputDirectory(attributes, self._base_url)) + + elif tag == 'accesslog': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputAccessLog(attributeset)) + + elif tag == 'sitemap': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputSitemap(attributeset)) + + else: + output.Error('Unrecognized tag in the configuration: %s' % tag) + #end def startElement + + def endElement(self, tag): + """ SAX processing, called per node in the config stream. """ + if tag == 'site': + assert self._in_site + self._in_site = False + self._in_site_ever = True + #end def endElement + + def endDocument(self): + """ End of SAX, verify we can proceed. """ + if not self._in_site_ever: + output.Error('The configuration must specify a "site" element.') + else: + if not self._inputs: + output.Warn('There were no inputs to generate a sitemap from.') + #end def endDocument +#end class Sitemap + + +def ValidateAttributes(tag, attributes, goodattributes): + """ Makes sure 'attributes' does not contain any attribute not + listed in 'goodattributes' """ + all_good = True + for attr in attributes.keys(): + if not attr in goodattributes: + output.Error('Unknown %s attribute: %s' % (tag, attr)) + all_good = False + return all_good +#end def ValidateAttributes + +def ExpandPathAttribute(src, attrib): + """ Given a dictionary of attributes, return a list of dictionaries + with all the same attributes except for the one named attrib. + That one, we treat as a file path and expand into all its possible + variations. """ + # Do the path expansion. On any error, just return the source dictionary. + path = src.get(attrib) + if not path: + return [src] + path = encoder.MaybeNarrowPath(path); + pathlist = glob.glob(path) + if not pathlist: + return [src] + + # If this isn't actually a dictionary, make it one + if type(src) != types.DictionaryType: + tmp = {} + for key in src.keys(): + tmp[key] = src[key] + src = tmp + + # Create N new dictionaries + retval = [] + for path in pathlist: + dst = src.copy() + dst[attrib] = path + retval.append(dst) + + return retval +#end def ExpandPathAttribute + +def OpenFileForRead(path, logtext): + """ Opens a text file, be it GZip or plain """ + + frame = None + file = None + + if not path: + return (frame, file) + + try: + if path.endswith('.gz'): + frame = open(path, 'rb') + file = gzip.GzipFile(fileobj=frame, mode='rt') + else: + file = open(path, 'rt') + + if logtext: + output.Log('Opened %s file: %s' % (logtext, path), 1) + else: + output.Log('Opened file: %s' % path, 1) + except IOError: + output.Error('Can not open file: %s' % path) + + return (frame, file) +#end def OpenFileForRead + +def TimestampISO8601(t): + """Seconds since epoch (1970-01-01) --> ISO 8601 time string.""" + return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) +#end def TimestampISO8601 + +def CreateSitemapFromFile(configpath, suppress_notify): + """ Sets up a new Sitemap object from the specified configuration file. """ + + # Remember error count on the way in + num_errors = output.num_errors + + # Rev up SAX to parse the config + sitemap = Sitemap(suppress_notify) + try: + output.Log('Reading configuration file: %s' % configpath, 0) + xml.sax.parse(configpath, sitemap) + except IOError: + output.Error('Cannot read configuration file: %s' % configpath) + except xml.sax._exceptions.SAXParseException, e: + output.Error('XML error in the config file (line %d, column %d): %s' % + (e._linenum, e._colnum, e.getMessage())) + except xml.sax._exceptions.SAXReaderNotAvailable: + output.Error('Some installs of Python 2.2 did not include complete support' + ' for XML.\n Please try upgrading your version of Python' + ' and re-running the script.') + + # If we added any errors, return no sitemap + if num_errors == output.num_errors: + return sitemap + return None +#end def CreateSitemapFromFile + +def ProcessCommandFlags(args): + """ + Parse command line flags per specified usage, pick off key, value pairs + All flags of type "--key=value" will be processed as __flags[key] = value, + "--option" will be processed as __flags[option] = option + """ + + flags = {} + rkeyval = '--(?P\S*)[=](?P\S*)' # --key=val + roption = '--(?P