diff options
| author | 2013-02-13 18:35:39 -0300 | |
|---|---|---|
| committer | 2013-02-13 18:35:39 -0300 | |
| commit | a40930043121a4b60de8526d58417761a54ab718 (patch) | |
| tree | 383c5cf8e320761ee942619282fe51be625179a7 /nikola | |
| parent | 9c5708cc92af894e414bc76ee35ec2230de5d288 (diff) | |
Imported Upstream version 5.2upstream/5.2
Diffstat (limited to 'nikola')
101 files changed, 4329 insertions, 3414 deletions
diff --git a/nikola/PyRSS2Gen.py b/nikola/PyRSS2Gen.py deleted file mode 100644 index 198ebb5..0000000 --- a/nikola/PyRSS2Gen.py +++ /dev/null @@ -1,442 +0,0 @@ -"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds.""" - -# flake8: noqa - -__name__ = "PyRSS2Gen" -__version__ = (1, 0, 0) -__author__ = "Andrew Dalke <dalke@dalkescientific.com>" - -_generator_name = __name__ + "-" + ".".join(map(str, __version__)) - -import datetime -import io - -# Could make this the base class; will need to add 'publish' -class WriteXmlMixin: - def write_xml(self, outfile, encoding = "iso-8859-1"): - from xml.sax import saxutils - handler = saxutils.XMLGenerator(outfile, encoding) - handler.startDocument() - self.publish(handler) - handler.endDocument() - - def to_xml(self, encoding = "iso-8859-1"): - f = io.StringIO() - self.write_xml(f, encoding) - return f.getvalue() - - -def _element(handler, name, obj, d = {}): - if isinstance(obj, basestring) or obj is None: - # special-case handling to make the API easier - # to use for the common case. - handler.startElement(name, d) - if obj is not None: - handler.characters(obj) - handler.endElement(name) - else: - # It better know how to emit the correct XML. - obj.publish(handler) - -def _opt_element(handler, name, obj): - if obj is None: - return - _element(handler, name, obj) - - -def _format_date(dt): - """convert a datetime into an RFC 822 formatted date - - Input date must be in GMT. - """ - # Looks like: - # Sat, 07 Sep 2002 00:00:01 GMT - # Can't use strftime because that's locale dependent - # - # Isn't there a standard way to do this for Python? The - # rfc822 and email.Utils modules assume a timestamp. The - # following is based on the rfc822 module. - return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( - ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], - dt.day, - ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], - dt.year, dt.hour, dt.minute, dt.second) - - -## -# A couple simple wrapper objects for the fields which -# take a simple value other than a string. -class IntElement: - """implements the 'publish' API for integers - - Takes the tag name and the integer value to publish. - - (Could be used for anything which uses str() to be published - to text for XML.) - """ - element_attrs = {} - def __init__(self, name, val): - self.name = name - self.val = val - def publish(self, handler): - handler.startElement(self.name, self.element_attrs) - handler.characters(str(self.val)) - handler.endElement(self.name) - -class DateElement: - """implements the 'publish' API for a datetime.datetime - - Takes the tag name and the datetime to publish. - - Converts the datetime to RFC 2822 timestamp (4-digit year). - """ - def __init__(self, name, dt): - self.name = name - self.dt = dt - def publish(self, handler): - _element(handler, self.name, _format_date(self.dt)) -#### - -class Category: - """Publish a category element""" - def __init__(self, category, domain = None): - self.category = category - self.domain = domain - def publish(self, handler): - d = {} - if self.domain is not None: - d["domain"] = self.domain - _element(handler, "category", self.category, d) - -class Cloud: - """Publish a cloud""" - def __init__(self, domain, port, path, - registerProcedure, protocol): - self.domain = domain - self.port = port - self.path = path - self.registerProcedure = registerProcedure - self.protocol = protocol - def publish(self, handler): - _element(handler, "cloud", None, { - "domain": self.domain, - "port": str(self.port), - "path": self.path, - "registerProcedure": self.registerProcedure, - "protocol": self.protocol}) - -class Image: - """Publish a channel Image""" - element_attrs = {} - def __init__(self, url, title, link, - width = None, height = None, description = None): - self.url = url - self.title = title - self.link = link - self.width = width - self.height = height - self.description = description - - def publish(self, handler): - handler.startElement("image", self.element_attrs) - - _element(handler, "url", self.url) - _element(handler, "title", self.title) - _element(handler, "link", self.link) - - width = self.width - if isinstance(width, int): - width = IntElement("width", width) - _opt_element(handler, "width", width) - - height = self.height - if isinstance(height, int): - height = IntElement("height", height) - _opt_element(handler, "height", height) - - _opt_element(handler, "description", self.description) - - handler.endElement("image") - -class Guid: - """Publish a guid - - Defaults to being a permalink, which is the assumption if it's - omitted. Hence strings are always permalinks. - """ - def __init__(self, guid, isPermaLink = 1): - self.guid = guid - self.isPermaLink = isPermaLink - def publish(self, handler): - d = {} - if self.isPermaLink: - d["isPermaLink"] = "true" - else: - d["isPermaLink"] = "false" - _element(handler, "guid", self.guid, d) - -class TextInput: - """Publish a textInput - - Apparently this is rarely used. - """ - element_attrs = {} - def __init__(self, title, description, name, link): - self.title = title - self.description = description - self.name = name - self.link = link - - def publish(self, handler): - handler.startElement("textInput", self.element_attrs) - _element(handler, "title", self.title) - _element(handler, "description", self.description) - _element(handler, "name", self.name) - _element(handler, "link", self.link) - handler.endElement("textInput") - - -class Enclosure: - """Publish an enclosure""" - def __init__(self, url, length, type): - self.url = url - self.length = length - self.type = type - def publish(self, handler): - _element(handler, "enclosure", None, - {"url": self.url, - "length": str(self.length), - "type": self.type, - }) - -class Source: - """Publish the item's original source, used by aggregators""" - def __init__(self, name, url): - self.name = name - self.url = url - def publish(self, handler): - _element(handler, "source", self.name, {"url": self.url}) - -class SkipHours: - """Publish the skipHours - - This takes a list of hours, as integers. - """ - element_attrs = {} - def __init__(self, hours): - self.hours = hours - def publish(self, handler): - if self.hours: - handler.startElement("skipHours", self.element_attrs) - for hour in self.hours: - _element(handler, "hour", str(hour)) - handler.endElement("skipHours") - -class SkipDays: - """Publish the skipDays - - This takes a list of days as strings. - """ - element_attrs = {} - def __init__(self, days): - self.days = days - def publish(self, handler): - if self.days: - handler.startElement("skipDays", self.element_attrs) - for day in self.days: - _element(handler, "day", day) - handler.endElement("skipDays") - -class RSS2(WriteXmlMixin): - """The main RSS class. - - Stores the channel attributes, with the "category" elements under - ".categories" and the RSS items under ".items". - """ - - rss_attrs = {"version": "2.0"} - element_attrs = {} - def __init__(self, - title, - link, - description, - - language = None, - copyright = None, - managingEditor = None, - webMaster = None, - pubDate = None, # a datetime, *in* *GMT* - lastBuildDate = None, # a datetime - - categories = None, # list of strings or Category - generator = _generator_name, - docs = "http://blogs.law.harvard.edu/tech/rss", - cloud = None, # a Cloud - ttl = None, # integer number of minutes - - image = None, # an Image - rating = None, # a string; I don't know how it's used - textInput = None, # a TextInput - skipHours = None, # a SkipHours with a list of integers - skipDays = None, # a SkipDays with a list of strings - - items = None, # list of RSSItems - ): - self.title = title - self.link = link - self.description = description - self.language = language - self.copyright = copyright - self.managingEditor = managingEditor - - self.webMaster = webMaster - self.pubDate = pubDate - self.lastBuildDate = lastBuildDate - - if categories is None: - categories = [] - self.categories = categories - self.generator = generator - self.docs = docs - self.cloud = cloud - self.ttl = ttl - self.image = image - self.rating = rating - self.textInput = textInput - self.skipHours = skipHours - self.skipDays = skipDays - - if items is None: - items = [] - self.items = items - - def publish(self, handler): - handler.startElement("rss", self.rss_attrs) - handler.startElement("channel", self.element_attrs) - _element(handler, "title", self.title) - _element(handler, "link", self.link) - _element(handler, "description", self.description) - - self.publish_extensions(handler) - - _opt_element(handler, "language", self.language) - _opt_element(handler, "copyright", self.copyright) - _opt_element(handler, "managingEditor", self.managingEditor) - _opt_element(handler, "webMaster", self.webMaster) - - pubDate = self.pubDate - if isinstance(pubDate, datetime.datetime): - pubDate = DateElement("pubDate", pubDate) - _opt_element(handler, "pubDate", pubDate) - - lastBuildDate = self.lastBuildDate - if isinstance(lastBuildDate, datetime.datetime): - lastBuildDate = DateElement("lastBuildDate", lastBuildDate) - _opt_element(handler, "lastBuildDate", lastBuildDate) - - for category in self.categories: - if isinstance(category, basestring): - category = Category(category) - category.publish(handler) - - _opt_element(handler, "generator", self.generator) - _opt_element(handler, "docs", self.docs) - - if self.cloud is not None: - self.cloud.publish(handler) - - ttl = self.ttl - if isinstance(self.ttl, int): - ttl = IntElement("ttl", ttl) - _opt_element(handler, "tt", ttl) - - if self.image is not None: - self.image.publish(handler) - - _opt_element(handler, "rating", self.rating) - if self.textInput is not None: - self.textInput.publish(handler) - if self.skipHours is not None: - self.skipHours.publish(handler) - if self.skipDays is not None: - self.skipDays.publish(handler) - - for item in self.items: - item.publish(handler) - - handler.endElement("channel") - handler.endElement("rss") - - def publish_extensions(self, handler): - # Derived classes can hook into this to insert - # output after the three required fields. - pass - - - -class RSSItem(WriteXmlMixin): - """Publish an RSS Item""" - element_attrs = {} - def __init__(self, - title = None, # string - link = None, # url as string - description = None, # string - author = None, # email address as string - categories = None, # list of string or Category - comments = None, # url as string - enclosure = None, # an Enclosure - guid = None, # a unique string - pubDate = None, # a datetime - source = None, # a Source - ): - - if title is None and description is None: - raise TypeError( - "must define at least one of 'title' or 'description'") - self.title = title - self.link = link - self.description = description - self.author = author - if categories is None: - categories = [] - self.categories = categories - self.comments = comments - self.enclosure = enclosure - self.guid = guid - self.pubDate = pubDate - self.source = source - # It sure does get tedious typing these names three times... - - def publish(self, handler): - handler.startElement("item", self.element_attrs) - _opt_element(handler, "title", self.title) - _opt_element(handler, "link", self.link) - self.publish_extensions(handler) - _opt_element(handler, "description", self.description) - _opt_element(handler, "author", self.author) - - for category in self.categories: - if isinstance(category, basestring): - category = Category(category) - category.publish(handler) - - _opt_element(handler, "comments", self.comments) - if self.enclosure is not None: - self.enclosure.publish(handler) - _opt_element(handler, "guid", self.guid) - - pubDate = self.pubDate - if isinstance(pubDate, datetime.datetime): - pubDate = DateElement("pubDate", pubDate) - _opt_element(handler, "pubDate", pubDate) - - if self.source is not None: - self.source.publish(handler) - - handler.endElement("item") - - def publish_extensions(self, handler): - # Derived classes can hook into this to insert - # output after the title and link elements - pass diff --git a/nikola/__init__.py b/nikola/__init__.py index 94031c9..e6a5dd3 100644 --- a/nikola/__init__.py +++ b/nikola/__init__.py @@ -1,5 +1,4 @@ from __future__ import absolute_import from .nikola import Nikola # NOQA -from . import plugins - +from . import plugins # NOQA diff --git a/nikola/conf.py.in b/nikola/conf.py.in index 897a941..b723744 100755..100644 --- a/nikola/conf.py.in +++ b/nikola/conf.py.in @@ -5,9 +5,9 @@ from __future__ import unicode_literals import os import time -######################################## +############################################## # Configuration, please edit -######################################## +############################################## </%text> # Data about this site @@ -17,6 +17,51 @@ BLOG_URL = "${BLOG_URL}" BLOG_EMAIL = "${BLOG_EMAIL}" BLOG_DESCRIPTION = "${BLOG_DESCRIPTION}" +# Nikola is multilingual! +# +# Currently supported languages are: +# English -> en +# Greek -> gr +# German -> de +# French -> fr +# Polish -> pl +# Russian -> ru +# Spanish -> es +# Italian -> it +# Simplified Chinese -> zh-cn +# +# If you want to use Nikola with a non-supported language you have to provide +# a module containing the necessary translations +# (p.e. look at the modules at: ./nikola/data/themes/default/messages/fr.py). +# If a specific post is not translated to a language, then the version +# in the default language will be shown instead. + +# What is the default language? +DEFAULT_LANG = "${DEFAULT_LANG}" + +# What other languages do you have? +# The format is {"translationcode" : "path/to/translation" } +# the path will be used as a prefix for the generated pages location +TRANSLATIONS = { + "${DEFAULT_LANG}": "", + # Example for another language: + # "es": "./es", + } + +# Links for the sidebar / navigation bar. +# You should provide a key-value pair for each used language. +SIDEBAR_LINKS = { + DEFAULT_LANG: ( + ('/archive.html', 'Archives'), + ('/categories/index.html', 'Tags'), + ), +} + +<%text> +############################################## +# Below this point, everything is optional +############################################## +</%text> # post_pages contains (wildcard, destination, template, use_in_feed) tuples. # @@ -56,38 +101,6 @@ post_pages = ${POST_PAGES} # 'html' assumes the file is html and just copies it post_compilers = ${POST_COMPILERS} -# Nikola is multilingual! -# -# Currently supported languages are: -# English -> en -# Greek -> gr -# German -> de -# French -> fr -# Russian -> ru -# Spanish -> es -# Italian -> it -# -# If you want to use Nikola with a non-supported language you have to provide -# a module containing the necessary translations -# (p.e. look at the modules at: ./nikola/data/themes/default/messages/fr.py). -# If a specific post is not translated to a language, then the version -# in the default language will be shown instead. - -# What is the default language? -DEFAULT_LANG = "${DEFAULT_LANG}" - -# What other languages do you have? -# The format is {"translationcode" : "path/to/translation" } -# the path will be used as a prefix for the generated pages location -TRANSLATIONS = { - "${DEFAULT_LANG}": "", - #"gr": "./gr", - #"de": "./de", - #"fr": "./fr", - #"ru": "./ru", - #"es": "./es", - } - # Paths for different autogenerated bits. These are combined with the # translation paths. @@ -95,26 +108,26 @@ TRANSLATIONS = { # output / TRANSLATION[lang] / TAG_PATH / index.html (list of tags) # output / TRANSLATION[lang] / TAG_PATH / tag.html (list of posts for a tag) # output / TRANSLATION[lang] / TAG_PATH / tag.xml (RSS feed for a tag) -TAG_PATH = "categories" +# TAG_PATH = "categories" # If TAG_PAGES_ARE_INDEXES is set to True, each tag's page will contain # the posts themselves. If set to False, it will be just a list of links. -TAG_PAGES_ARE_INDEXES = True +# TAG_PAGES_ARE_INDEXES = True # Final location is output / TRANSLATION[lang] / INDEX_PATH / index-*.html -INDEX_PATH = "" +# INDEX_PATH = "" # Final locations for the archives are: # output / TRANSLATION[lang] / ARCHIVE_PATH / ARCHIVE_FILENAME # output / TRANSLATION[lang] / ARCHIVE_PATH / YEAR / index.html -ARCHIVE_PATH = "" -ARCHIVE_FILENAME = "archive.html" +# ARCHIVE_PATH = "" +# ARCHIVE_FILENAME = "archive.html" # Final locations are: # output / TRANSLATION[lang] / RSS_PATH / rss.xml -RSS_PATH = "" +# RSS_PATH = "" # Slug the Tag URL easier for users to type, special characters are # often removed or replaced as well. -SLUG_TAG_PATH = True +# SLUG_TAG_PATH = True # A list of redirection tuples, [("foo/from.html", "/bar/to.html")]. # @@ -123,25 +136,23 @@ SLUG_TAG_PATH = True # relative URL. # # If you don't need any of these, just set to [] - -REDIRECTIONS = ${REDIRECTIONS} +# REDIRECTIONS = ${REDIRECTIONS} # Commands to execute to deploy. Can be anything, for example, # you may use rsync: # "rsync -rav output/* joe@my.site:/srv/www/site" # And then do a backup, or ping pingomatic. # To do manual deployment, set it to [] -DEPLOY_COMMANDS = [] +# DEPLOY_COMMANDS = [] # Where the output site should be located # If you don't use an absolute path, it will be considered as relative # to the location of conf.py - -OUTPUT_FOLDER = 'output' +# OUTPUT_FOLDER = 'output' # where the "cache" of partial generated content should be located # default: 'cache' -CACHE_FOLDER = 'cache' +# CACHE_FOLDER = 'cache' # Filters to apply to the output. # A directory where the keys are either: a file extensions, or @@ -161,9 +172,9 @@ CACHE_FOLDER = 'cache' # argument. # # By default, there are no filters. -FILTERS = { +# FILTERS = { # ".jpg": ["jpegoptim --strip-all -m75 -v %s"], -} +# } # ############################################################################# # Image Gallery Options @@ -171,38 +182,48 @@ FILTERS = { # Galleries are folders in galleries/ # Final location of galleries will be output / GALLERY_PATH / gallery_name -GALLERY_PATH = "galleries" -THUMBNAIL_SIZE = 180 -MAX_IMAGE_SIZE = 1280 -USE_FILENAME_AS_TITLE = True +# GALLERY_PATH = "galleries" +# THUMBNAIL_SIZE = 180 +# MAX_IMAGE_SIZE = 1280 +# USE_FILENAME_AS_TITLE = True # ############################################################################# # HTML fragments and diverse things that are used by the templates # ############################################################################# # Data about post-per-page indexes -INDEXES_TITLE = "" # If this is empty, the default is BLOG_TITLE -INDEXES_PAGES = "" # If this is empty, the default is 'old posts page %d' translated +# INDEXES_TITLE = "" # If this is empty, the default is BLOG_TITLE +# INDEXES_PAGES = "" # If this is empty, the default is 'old posts page %d' translated # Name of the theme to use. Themes are located in themes/theme_name -THEME = 'site' +# THEME = 'site' # date format used to display post dates. (str used by datetime.datetime.strftime) -DATE_FORMAT = '%Y-%m-%d %H:%M' +# DATE_FORMAT = '%Y-%m-%d %H:%M' + +# FAVICONS contains (name, file, size) tuples. +# Used for create favicon link like this: +# <link rel="name" href="file" sizes="size"/> +# about favicons, see: http://www.netmagazine.com/features/create-perfect-favicon +# FAVICONS = { +# ("icon", "/favicon.ico", "16x16"), +# ("icon", "/icon_128x128.png", "128x128"), +# } # Show only teasers in the index pages? Defaults to False. # INDEX_TEASERS = False -# A HTML fragment describing the license, for the sidebar. -# I recomment using the Creative Commons' wizard: +# A HTML fragment describing the license, for the sidebar. Default is "". +# I recommend using the Creative Commons' wizard: # http://creativecommons.org/choose/ -LICENSE = """ -<a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/2.5/ar/"> -<img alt="Creative Commons License BY-NC-SA" -style="border-width:0; margin-bottom:12px;" -src="http://i.creativecommons.org/l/by-nc-sa/2.5/ar/88x31.png"></a>""" - -# A small copyright notice for the page footer (in HTML) +# LICENSE = """ +# <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/2.5/ar/"> +# <img alt="Creative Commons License BY-NC-SA" +# style="border-width:0; margin-bottom:12px;" +# src="http://i.creativecommons.org/l/by-nc-sa/2.5/ar/88x31.png"></a>""" + +# A small copyright notice for the page footer (in HTML). +# Default is '' CONTENT_FOOTER = 'Contents © {date} <a href="mailto:{email}">{author}</a> - Powered by <a href="http://nikola.ralsina.com.ar">Nikola</a>' CONTENT_FOOTER = CONTENT_FOOTER.format(email=BLOG_EMAIL, author=BLOG_AUTHOR, @@ -211,7 +232,15 @@ CONTENT_FOOTER = CONTENT_FOOTER.format(email=BLOG_EMAIL, # To enable comments via Disqus, you need to create a forum at # http://disqus.com, and set DISQUS_FORUM to the short name you selected. # If you want to disable comments, set it to False. -DISQUS_FORUM = "nikolademo" +# Default is "nikolademo", used by the demo sites +# DISQUS_FORUM = "nikolademo" + +# Create index.html for story folders? +# STORY_INDEX = False +# Enable comments on story pages? +# COMMENTS_IN_STORIES = False +# Enable comments on picture gallery pages? +# COMMENTS_IN_GALLERIES = False # Enable Addthis social buttons? # Defaults to true @@ -224,14 +253,18 @@ DISQUS_FORUM = "nikolademo" # RSS_LINK is a HTML fragment to link the RSS or Atom feeds. If set to None, # the base.tmpl will use the feed Nikola generates. However, you may want to # change it for a feedburner feed or something else. -RSS_LINK = None +# RSS_LINK = None + +# Show only teasers in the RSS feed? Default to True +# RSS_TEASERS = True # A search form to search this site, for the sidebar. You can use a google # custom search (http://www.google.com/cse/) # Or a duckduckgo search: https://duckduckgo.com/search_box.html -# This example should work for pretty much any site we generate. -SEARCH_FORM = "" -# This search form is better for the "site" theme where it +# Default is no search form. +# SEARCH_FORM = "" +# +# This search form works for any site and looks good in the "site" theme where it # appears on the navigation bar #SEARCH_FORM = """ #<!-- Custom search --> @@ -247,37 +280,28 @@ SEARCH_FORM = "" #</form> #<!-- End of custom search --> #""" % BLOG_URL +# +# Also, there is a local search plugin you can use. # Google analytics or whatever else you use. Added to the bottom of <body> # in the default template (base.tmpl). -ANALYTICS = """ - """ +# ANALYTICS = "" + +# The possibility to extract metadata from the filename by using a +# regular expression. +# To make it work you need to name parts of your regular expression. +# The following names will be used to extract metadata: +# - title +# - slug +# - date +# - tags +# - link +# - description +# +# An example re is the following: +# '(?P<date>\d{4}-\d{2}-\d{2})-(?P<slug>.*)-(?P<title>.*)\.md' +# FILE_METADATA_REGEXP = None # Put in global_context things you want available on all your templates. # It can be anything, data, functions, modules, etc. -GLOBAL_CONTEXT = { - 'analytics': ANALYTICS, - 'blog_author': BLOG_AUTHOR, - 'blog_title': BLOG_TITLE, - 'blog_url': BLOG_URL, - 'blog_desc': BLOG_DESCRIPTION, - 'date_format': DATE_FORMAT, - 'translations': TRANSLATIONS, - 'license': LICENSE, - 'search_form': SEARCH_FORM, - 'disqus_forum': DISQUS_FORUM, - 'content_footer': CONTENT_FOOTER, - 'rss_path': RSS_PATH, - 'rss_link': RSS_LINK, - # Locale-dependent links for the sidebar - # You should provide a key-value pair for each used language. - 'sidebar_links': { - DEFAULT_LANG: ( - ('/' + os.path.join(ARCHIVE_PATH, ARCHIVE_FILENAME), 'Archives'), - ('/categories/index.html', 'Tags'), - ('/stories/about-nikola.html', 'About Nikola'), - ('/stories/handbook.html', 'The Nikola Handbook'), - ('http://nikola.ralsina.com.ar', 'Powered by Nikola!'), - ), - } - } +GLOBAL_CONTEXT = {} diff --git a/nikola/console.py b/nikola/console.py index 939b611..ad36010 100644 --- a/nikola/console.py +++ b/nikola/console.py @@ -4,4 +4,5 @@ from nikola import Nikola import conf SITE = Nikola(**conf.__dict__) SITE.scan_posts() -print("You can now access your configuration as conf and your site engine as SITE") +print("You can now access your configuration as conf and your site engine " + "as SITE") diff --git a/nikola/data/samplesite/posts/1.txt b/nikola/data/samplesite/posts/1.txt index 5741e05..2e6c3ba 100644 --- a/nikola/data/samplesite/posts/1.txt +++ b/nikola/data/samplesite/posts/1.txt @@ -8,7 +8,7 @@ and build a site using it. Congratulations! * You can read the manual `here </stories/handbook.html>`__ * You can learn more about Nikola at http://nikola.ralsina.com.ar -* You can see a demo photo gallery `here </galleries/demo/>`__ +* You can see a demo photo gallery `here </galleries/demo/index.html>`__ * Demo usage of listings `here </stories/listings-demo.html>`__ * Demo of slideshows `here </stories/slides-demo.html>`__ diff --git a/nikola/data/samplesite/stories/configsample.txt b/nikola/data/samplesite/stories/configsample.txt index 89d296e..a148942 100755..100644 --- a/nikola/data/samplesite/stories/configsample.txt +++ b/nikola/data/samplesite/stories/configsample.txt @@ -218,4 +218,4 @@ # You can also replace the provided tasks with your own by redefining them # below this point. For a list of current tasks, run "doit list", and for - # help on their syntax, refer to the doit handbook at http://python-doit.sf.net + # help on their syntax, refer to the doit handbook at http://pydoit.org diff --git a/nikola/data/themes/default/assets/css/rst.css b/nikola/data/themes/default/assets/css/rst.css index 1f0edcb..cf73111 100644 --- a/nikola/data/themes/default/assets/css/rst.css +++ b/nikola/data/themes/default/assets/css/rst.css @@ -1,8 +1,6 @@ /* -:Author: David Goodger -:Contact: goodger@users.sourceforge.net -:Date: $Date: 2005-12-18 01:56:14 +0100 (Sun, 18 Dec 2005) $ -:Revision: $Revision: 4224 $ +:Author: David Goodger (goodger@python.org) +:Id: $Id: html4css1.css 7514 2012-09-14 14:27:12Z milde $ :Copyright: This stylesheet has been placed in the public domain. Default cascading style sheet for the HTML output of Docutils. @@ -35,11 +33,15 @@ a.toc-backref { color: black } blockquote.epigraph { - margin: 2em 1em ; } + margin: 2em 5em ; } dl.docutils dd { margin-bottom: 0.5em } +object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] { + overflow: hidden; +} + /* Uncomment (and remove this text!) to get bold-faced definition list terms dl.docutils dt { font-weight: bold } @@ -54,16 +56,9 @@ div.abstract p.topic-title { div.admonition, div.attention, div.caution, div.danger, div.error, div.hint, div.important, div.note, div.tip, div.warning { - padding: 8px 35px 8px 14px; - margin-bottom: 18px; - text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5); - background-color: #d9edf7; - color: #3a87ad; - border: 1px solid #bce8f1; - -webkit-border-radius: 4px; - -moz-border-radius: 4px; - border-radius: 4px; -} + margin: 2em ; + border: medium outset ; + padding: 1em } div.admonition p.admonition-title, div.hint p.admonition-title, div.important p.admonition-title, div.note p.admonition-title, @@ -73,7 +68,7 @@ div.tip p.admonition-title { div.attention p.admonition-title, div.caution p.admonition-title, div.danger p.admonition-title, div.error p.admonition-title, -div.warning p.admonition-title { +div.warning p.admonition-title, .code .error { color: red ; font-weight: bold ; font-family: sans-serif } @@ -97,7 +92,6 @@ div.dedication p.topic-title { font-style: normal } div.figure { - text-align: center; margin-left: 2em ; margin-right: 2em } @@ -116,7 +110,7 @@ div.line-block div.line-block { margin-left: 1.5em } div.sidebar { - margin-left: 1em ; + margin: 0 0 0.5em 1em ; border: medium outset ; padding: 1em ; background-color: #ffffee ; @@ -135,21 +129,11 @@ div.system-messages h1 { color: red } div.system-message { - padding: 8px 35px 8px 14px; - margin-bottom: 18px; - text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5); - border: 1px solid #eed3d7; - -webkit-border-radius: 4px; - -moz-border-radius: 4px; - border-radius: 4px; - padding: 1em; - background-color: #f2dede; - color: #b94a48; - -} + border: medium outset ; + padding: 1em } div.system-message p.system-message-title { - color: inherit ; + color: red ; font-weight: bold } div.topic { @@ -168,11 +152,38 @@ h2.subtitle { hr.docutils { width: 75% } -img.align-left { - clear: left } +img.align-left, .figure.align-left, object.align-left { + clear: left ; + float: left ; + margin-right: 1em } -img.align-right { - clear: right } +img.align-right, .figure.align-right, object.align-right { + clear: right ; + float: right ; + margin-left: 1em } + +img.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left } + +.align-center { + clear: both ; + text-align: center } + +.align-right { + text-align: right } + +/* reset inner alignment in figures */ +div.align-right { + text-align: inherit } + +/* div.align-center * { */ +/* text-align: left } */ ol.simple, ul.simple { margin-bottom: 1em } @@ -227,16 +238,20 @@ p.topic-title { pre.address { margin-bottom: 0 ; margin-top: 0 ; - font-family: serif ; - font-size: 100% } + font: inherit } -pre.literal-block, pre.doctest-block { - margin: 0 0 0 0 ; - background-color: #eeeeee; - padding: 1em; - overflow: auto; -/* font-family: "Courier New", Courier, monospace;*/ -} +pre.literal-block, pre.doctest-block, pre.math, pre.code { + margin-left: 2em ; + margin-right: 2em } + +pre.code .ln { color: grey; } /* line numbers */ +pre.code, code { background-color: #eeeeee } +pre.code .comment, code .comment { color: #5C6576 } +pre.code .keyword, code .keyword { color: #3B0D06; font-weight: bold } +pre.code .literal.string, code .literal.string { color: #0C5404 } +pre.code .name.builtin, code .name.builtin { color: #352B84 } +pre.code .deleted, code .deleted { background-color: #DEB0A1} +pre.code .inserted, code .inserted { background-color: #A3D289} span.classifier { font-family: sans-serif ; @@ -293,23 +308,5 @@ h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { font-size: 100% } -tt.docutils { - background-color: #eeeeee } - ul.auto-toc { list-style-type: none } - -#blog-title { - font-size: 34pt; - /*margin:0 0.3em -14px;*/ - background-color: #FFF; - font-family: "courier"; - text-align: right; - margin-top: 20px; - margin-bottom: 10px; -} - -img { - margin-top: 12px; - margin-bottom: 12px; -} diff --git a/nikola/data/themes/default/bundles b/nikola/data/themes/default/bundles index ea9fba9..8fe858b 100644 --- a/nikola/data/themes/default/bundles +++ b/nikola/data/themes/default/bundles @@ -1,2 +1,2 @@ -assets/css/all.css=bootstrap.css,bootstrap-responsive.css,rst.css,code.css,colorbox.css,slides.js,theme.css,custom.css -assets/js/all.js=jquery-1.7.2.min.js,jquery.colorbox-min.js,slides.min.jquery.js +assets/css/all.css=bootstrap.css,bootstrap-responsive.css,rst.css,code.css,colorbox.css,slides.css,theme.css,custom.css +assets/js/all.js=bootstrap.min.js,jquery-1.7.2.min.js,jquery.colorbox-min.js,slides.min.jquery.js diff --git a/nikola/data/themes/default/messages/messages_ca.py b/nikola/data/themes/default/messages/messages_ca.py new file mode 100644 index 0000000..8e7186f --- /dev/null +++ b/nikola/data/themes/default/messages/messages_ca.py @@ -0,0 +1,22 @@ +# -*- encoding:utf-8 -*- +from __future__ import unicode_literals + +MESSAGES = { + "LANGUAGE": "Català", + "Posts for year %s": "Entrades de l'any %s", + "Archive": "Arxiu", + "Posts about %s": "Entrades sobre %s", + "Tags": "Etiquetes", + "Also available in": "També disponibles en", + "More posts about": "Més entrades sobre", + "Posted": "Publicat", + "Original site": "Lloc original", + "Read in English": "Llegeix-ho en català", + "Older posts": "Entrades anteriors", + "Newer posts": "Entrades posteriors", + "Previous post": "Entrada anterior", + "Next post": "Entrada següent", + "old posts page %d": "entrades antigues pàgina %d", + "Read more": "Llegeix-ne més", + "Source": "Codi", +} diff --git a/nikola/data/themes/default/messages/messages_de.py b/nikola/data/themes/default/messages/messages_de.py index cafbcbb..5da3b2b 100644 --- a/nikola/data/themes/default/messages/messages_de.py +++ b/nikola/data/themes/default/messages/messages_de.py @@ -3,20 +3,20 @@ from __future__ import unicode_literals MESSAGES = { "LANGUAGE": "Deutsch", - "Posts for year %s": "Einträge aus dem Jahr %s", + "Posts for year %s": "Einträge aus dem Jahr %s", "Archive": "Archiv", - "Posts about %s": "Einträge über %s", + "Posts about %s": "Einträge über %s", "Tags": "Tags", - "Also available in": "Auch verfügbar in", - "More posts about": "Weitere Einträge über", - "Posted": "Veröffentlicht", + "Also available in": "Auch verfügbar in", + "More posts about": "Weitere Einträge über", + "Posted": "Veröffentlicht", "Original site": "Original-Seite", "Read in English": "Auf Deutsch lesen", - "Older posts": "Ältere Einträge", - "Newer posts": "Neuere Einträge", + "Older posts": "Ältere Einträge", + "Newer posts": "Neuere Einträge", "Previous post": "Vorheriger Eintrag", - "Next post": "Nächster Eintrag", + "Next post": "Nächster Eintrag", "Source": "Source", "Read more": "Weiterlesen", - "old posts page %d": "Vorherige Einträge %d" + "old posts page %d": "Vorherige Einträge %d" } diff --git a/nikola/data/themes/default/messages/messages_en.py b/nikola/data/themes/default/messages/messages_en.py index 6d39047..9fc77ef 100644 --- a/nikola/data/themes/default/messages/messages_en.py +++ b/nikola/data/themes/default/messages/messages_en.py @@ -1,27 +1,22 @@ +# -*- encoding:utf-8 -*- from __future__ import unicode_literals -MESSAGES = [ - "Posts for year %s", - "Archive", - "Posts about %s", - "Tags", - "Also available in", - "More posts about", - "Posted", - "Original site", - "Read in English", - "Newer posts", - "Older posts", - "Previous post", - "Next post", - "old posts page %d", - "Read more", - "Source", -] - -# In english things are not translated -msg_dict = {} -for msg in MESSAGES: - msg_dict[msg] = msg -MESSAGES = msg_dict -MESSAGES["LANGUAGE"] = "English" +MESSAGES = { + "LANGUAGE": "English", + "Posts for year %s": "Posts for year %s", + "Archive": "Archive", + "Posts about %s": "Posts about %s", + "Tags": "Tags", + "Also available in": "Also available in", + "More posts about": "More posts about", + "Posted": "Posted", + "Original site": "Original site", + "Read in English": "Read in English", + "Newer posts": "Newer posts", + "Older posts": "Older posts", + "Previous post": "Previous post", + "Next post": "Next post", + "old posts page %d": "old posts page %d", + "Read more": "Read more", + "Source": "Source", +} diff --git a/nikola/data/themes/default/messages/messages_fr.py b/nikola/data/themes/default/messages/messages_fr.py index 776147b..74eecb8 100644 --- a/nikola/data/themes/default/messages/messages_fr.py +++ b/nikola/data/themes/default/messages/messages_fr.py @@ -14,5 +14,8 @@ MESSAGES = { "Read in English": "Lire en français", "Newer posts": "Billets récents", "Older posts": "Anciens billets", + "Previous post": "Previous post", + "Next post": "Next post", + "Read more": "Read more", "Source": "Source", } diff --git a/nikola/data/themes/default/messages/messages_gr.py b/nikola/data/themes/default/messages/messages_gr.py index 5965bc3..c6135f3 100644 --- a/nikola/data/themes/default/messages/messages_gr.py +++ b/nikola/data/themes/default/messages/messages_gr.py @@ -17,5 +17,6 @@ MESSAGES = { "Previous post": "Προηγούμενη ανάρτηση", "Next post": "Επόμενη ανάρτηση", "old posts page %d": "σελίδα παλαιότερων αναρτήσεων %d", + "Read more": "Read more", "Source": "Source", } diff --git a/nikola/data/themes/default/messages/messages_pl.py b/nikola/data/themes/default/messages/messages_pl.py new file mode 100644 index 0000000..7172ebc --- /dev/null +++ b/nikola/data/themes/default/messages/messages_pl.py @@ -0,0 +1,22 @@ +# -*- encoding:utf-8 -*- +from __future__ import unicode_literals + +MESSAGES = { + "LANGUAGE": "polski", + "Posts for year %s": "Posty z roku %s", + "Archive": "Archiwum", + "Posts about %s": "Posty o %s", + "Tags": "Tags", + "Also available in": "Również dostępny w", + "More posts about": "Więcej postów o", + "Posted": "Opublikowany", + "Original site": "Oryginalna strona", + "Read in English": "Czytaj po polsku", + "Older posts": "Starsze posty", + "Newer posts": "Nowsze posty", + "Previous post": "Poprzedni post", + "Next post": "Następny post", + "Source": "Źródło", + "Read more": "Czytaj więcej", + "old posts page %d": "stare posty, strona %d" +} diff --git a/nikola/data/themes/default/messages/messages_zh-cn.py b/nikola/data/themes/default/messages/messages_zh-cn.py new file mode 100644 index 0000000..2f4b64e --- /dev/null +++ b/nikola/data/themes/default/messages/messages_zh-cn.py @@ -0,0 +1,22 @@ +# -*- encoding:utf-8 -*-
+from __future__ import unicode_literals
+
+MESSAGES = {
+ "LANGUAGE": "简体中文",
+ "Posts for year %s": "%s年文章",
+ "Archive": "文章存档",
+ "Posts about %s": "文章分类:%s",
+ "Tags": "标签",
+ "Also available in": "其他语言版本",
+ "More posts about": "更多相关文章:",
+ "Posted": "发表于",
+ "Original site": "原文地址",
+ "Read in English": "中文版",
+ "Older posts": "旧一篇",
+ "Newer posts": "新一篇",
+ "Previous post": "前一篇",
+ "Next post": "后一篇",
+ "old posts page %d": "旧文章页 %d",
+ "Read more": "更多",
+ "Source": "源代码",
+}
diff --git a/nikola/data/themes/default/templates/base_helper.tmpl b/nikola/data/themes/default/templates/base_helper.tmpl index 3f27f23..170dee1 100644 --- a/nikola/data/themes/default/templates/base_helper.tmpl +++ b/nikola/data/themes/default/templates/base_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_head()"> <meta charset="utf-8"> <meta name="title" content="${title} | ${blog_title}" > @@ -10,7 +11,6 @@ <script src="/assets/js/all.js" type="text/javascript"></script> %else: <link href="/assets/css/bootstrap.css" rel="stylesheet" type="text/css"> - <link href="/assets/css/bootstrap-responsive.css" rel="stylesheet" type="text/css"> <link href="/assets/css/rst.css" rel="stylesheet" type="text/css"> <link href="/assets/css/code.css" rel="stylesheet" type="text/css"> <link href="/assets/css/colorbox.css" rel="stylesheet" type="text/css"/> @@ -19,9 +19,11 @@ %if has_custom_css: <link href="/assets/css/custom.css" rel="stylesheet" type="text/css"> %endif + <link href="/assets/css/bootstrap-responsive.css" rel="stylesheet" type="text/css"> <script src="/assets/js/jquery-1.7.2.min.js" type="text/javascript"></script> <script src="/assets/js/jquery.colorbox-min.js" type="text/javascript"></script> <script src="/assets/js/slides.min.jquery.js" type="text/javascript"></script> + <script src="/assets/js/bootstrap.min.js" type="text/javascript"></script> %endif <!-- Le HTML5 shim, for IE6-8 support of HTML5 elements --> <!--[if lt IE 9]> @@ -34,6 +36,11 @@ <link rel="alternate" type="application/rss+xml" title="RSS (${language})" href="${_link('rss', None, lang)}"> %endfor %endif + %if favicons: + %for name, file, size in favicons: + <link rel="${name}" href="${file}" sizes="${size}"/> + %endfor + %endif </%def> diff --git a/nikola/data/themes/default/templates/disqus_helper.tmpl b/nikola/data/themes/default/templates/disqus_helper.tmpl new file mode 100644 index 0000000..674e20e --- /dev/null +++ b/nikola/data/themes/default/templates/disqus_helper.tmpl @@ -0,0 +1,40 @@ +## -*- coding: utf-8 -*- +<%! + import json +%> +<%def name="html_disqus(url, title, identifier)"> + %if disqus_forum: + <div id="disqus_thread"></div> + <script type="text/javascript"> + var disqus_shortname ="${disqus_forum}"; + %if url: + var disqus_url="${url}"; + %endif + var disqus_title=${json.dumps(title)}; + var disqus_identifier="${identifier}"; + var disqus_config = function () { + this.language = "${lang}"; + }; + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> + %endif +</%def> + +<%def name="html_disqus_link(link, identifier)"> + <p> + %if disqus_forum: + <a href="${link}" data-disqus-identifier="${identifier}">Comments</a> + %endif +</%def> + + +<%def name="html_disqus_script()"> + %if disqus_forum: + <script type="text/javascript">var disqus_shortname="${disqus_forum}";(function(){var a=document.createElement("script");a.async=true;a.type="text/javascript";a.src="http://"+disqus_shortname+".disqus.com/count.js";(document.getElementsByTagName("HEAD")[0]||document.getElementsByTagName("BODY")[0]).appendChild(a)}());</script> + %endif +</%def> diff --git a/nikola/data/themes/default/templates/gallery.tmpl b/nikola/data/themes/default/templates/gallery.tmpl index 37d749f..3186cc8 100644 --- a/nikola/data/themes/default/templates/gallery.tmpl +++ b/nikola/data/themes/default/templates/gallery.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%inherit file="base.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%block name="sourcelink"></%block> <%block name="content"> @@ -24,4 +25,7 @@ <img src="${image[1]}" /></a></li> %endfor </ul> +%if enable_comments: + ${disqus.html_disqus(None, permalink, title)} +%endif </%block> diff --git a/nikola/data/themes/default/templates/index.tmpl b/nikola/data/themes/default/templates/index.tmpl index 03dd1f8..1a436e2 100644 --- a/nikola/data/themes/default/templates/index.tmpl +++ b/nikola/data/themes/default/templates/index.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="index_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> % for post in posts: @@ -10,9 +11,9 @@ </small></h1> <hr> ${post.text(lang, index_teasers)} - ${helper.html_disqus_link(post)} + ${disqus.html_disqus_link(post.permalink()+"#disqus_thread", post.base_path)} </div> % endfor ${helper.html_pager()} - ${helper.html_disqus_script()} + ${disqus.html_disqus_script()} </%block> diff --git a/nikola/data/themes/default/templates/index_helper.tmpl b/nikola/data/themes/default/templates/index_helper.tmpl index cfecdf3..114a730 100644 --- a/nikola/data/themes/default/templates/index_helper.tmpl +++ b/nikola/data/themes/default/templates/index_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_pager()"> <div> <ul class="pager"> @@ -14,18 +15,3 @@ </ul> </div> </%def> - - -<%def name="html_disqus_link(post)"> - <p> - %if disqus_forum: - <a href="${post.permalink()}#disqus_thread">Comments</a> - %endif -</%def> - - -<%def name="html_disqus_script()"> - %if disqus_forum: - <script type="text/javascript">var disqus_shortname="${disqus_forum}";(function(){var a=document.createElement("script");a.async=true;a.type="text/javascript";a.src="http://"+disqus_shortname+".disqus.com/count.js";(document.getElementsByTagName("HEAD")[0]||document.getElementsByTagName("BODY")[0]).appendChild(a)}());</script> - %endif -</%def> diff --git a/nikola/data/themes/default/templates/post.tmpl b/nikola/data/themes/default/templates/post.tmpl index 306192d..672d4f6 100644 --- a/nikola/data/themes/default/templates/post.tmpl +++ b/nikola/data/themes/default/templates/post.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="post_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> <div class="postbox"> @@ -9,12 +10,12 @@ ${messages[lang]["Posted"]}: ${post.date.strftime(date_format)} ${helper.html_translations(post)} | - <a href="${post.pagenames[lang]+'.txt'}">${messages[lang]["Source"]}</a> + <a href="${post.pagenames[lang]+'.txt'}" id="sourcelink">${messages[lang]["Source"]}</a> ${helper.html_tags(post)} </small> <hr> ${post.text(lang)} ${helper.html_pager(post)} - ${helper.html_disqus(post)} + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} </div> </%block> diff --git a/nikola/data/themes/default/templates/post_helper.tmpl b/nikola/data/themes/default/templates/post_helper.tmpl index 3e874e9..ab08359 100644 --- a/nikola/data/themes/default/templates/post_helper.tmpl +++ b/nikola/data/themes/default/templates/post_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_title()"> <h1>${title}</h1> % if link: @@ -9,7 +10,7 @@ <%def name="html_translations(post)"> %if len(translations) > 1: %for langname in translations.keys(): - %if langname != lang: + %if langname != lang and post.is_translation_available(langname): | <a href="${post.permalink(langname)}">${messages[langname]["Read in English"]}</a> %endif @@ -27,17 +28,6 @@ %endif </%def> - -<%def name="html_disqus(post)"> - %if disqus_forum: - <div id="disqus_thread"></div> - <script type="text/javascript">var disqus_shortname="${disqus_forum}";var disqus_url="${post.permalink(absolute=True)}";(function(){var a=document.createElement("script");a.type="text/javascript";a.async=true;a.src="http://"+disqus_shortname+".disqus.com/embed.js";(document.getElementsByTagName("head")[0]||document.getElementsByTagName("body")[0]).appendChild(a)})(); </script> - <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> - <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> - %endif -</%def> - - <%def name="html_pager(post)"> <ul class="pager"> %if post.prev_post: diff --git a/nikola/data/themes/default/templates/story.tmpl b/nikola/data/themes/default/templates/story.tmpl index deb0a46..d5c2f44 100644 --- a/nikola/data/themes/default/templates/story.tmpl +++ b/nikola/data/themes/default/templates/story.tmpl @@ -1,8 +1,12 @@ ## -*- coding: utf-8 -*- <%inherit file="post.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%block name="content"> %if title: <h1>${title}</h1> %endif ${post.text(lang)} +%if enable_comments: + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} +%endif </%block> diff --git a/nikola/data/themes/jinja-default/templates/gallery.tmpl b/nikola/data/themes/jinja-default/templates/gallery.tmpl index dcd8a43..34ff439 100644 --- a/nikola/data/themes/jinja-default/templates/gallery.tmpl +++ b/nikola/data/themes/jinja-default/templates/gallery.tmpl @@ -23,5 +23,25 @@ <img src="{{image[1]}}" /></a></li> {% endfor %} </ul> + {%if enable_comments %} + {%if disqus_forum %} + <div id="disqus_thread"></div> + <script type="text/javascript"> + var disqus_shortname ="{{disqus_forum}}"; + var disqus_title={{title|tojson}}; + var disqus_identifier="{{permalink}}"; + var disqus_config = function () { + this.language = "{{lang}}"; + }; + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> + {% endif %} + {% endif %} + {% endblock %} diff --git a/nikola/data/themes/jinja-default/templates/index.tmpl b/nikola/data/themes/jinja-default/templates/index.tmpl index c068417..ad54c19 100644 --- a/nikola/data/themes/jinja-default/templates/index.tmpl +++ b/nikola/data/themes/jinja-default/templates/index.tmpl @@ -10,7 +10,7 @@ {{post.text(lang, index_teasers)}} <p> {% if disqus_forum %} - <a href="{{post.permalink()}}#disqus_thread">Comments</a> + <a href="{{post.permalink()}}#disqus_thread" data-disqus-identifier="{{post.base_path}}">Comments</a> {% endif %} </div> {% endfor %} diff --git a/nikola/data/themes/jinja-default/templates/post.tmpl b/nikola/data/themes/jinja-default/templates/post.tmpl index 3ce6abe..2a356c5 100644 --- a/nikola/data/themes/jinja-default/templates/post.tmpl +++ b/nikola/data/themes/jinja-default/templates/post.tmpl @@ -11,14 +11,14 @@ {% if translations|length > 1 %} {% for langname in translations.keys() %} - {% if langname != lang %} + {% if langname != lang and post.is_translation_available(langname) %} <a href="{{post.permalink(langname)}}">{{messages[langname]["Read in English"]}}</a> | {% endif %} {% endfor %} {% endif %} - <a href="{{post.pagenames[lang]+".txt"}}">{{messages[lang]["Source"]}}</a> + <a href="{{post.pagenames[lang]+".txt"}}" id="sourcelink">{{messages[lang]["Source"]}}</a> {% if post.tags %} | {{messages[lang]["More posts about"]}} {% for tag in post.tags %} @@ -42,9 +42,21 @@ </ul> {% if disqus_forum %} <div id="disqus_thread"></div> - <script type="text/javascript"> var disqus_shortname = '{{disqus_forum}}'; var disqus_url = '{{post.permalink(absolute=True)}}'; (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> - <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> - <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> + <script type="text/javascript"> + var disqus_shortname ="{{disqus_forum}}"; + var disqus_url="{{post.permalink(absolute=True)}}"; + var disqus_title={{post.title(lang)|tojson }}; + var disqus_identifier="{{post.base_path}}"; + var disqus_config = function () { + this.language = "{{lang}}"; + }; + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> {% endif %} </div> {% endblock %} diff --git a/nikola/data/themes/jinja-default/templates/story.tmpl b/nikola/data/themes/jinja-default/templates/story.tmpl index 411c269..ccaac91 100644 --- a/nikola/data/themes/jinja-default/templates/story.tmpl +++ b/nikola/data/themes/jinja-default/templates/story.tmpl @@ -4,4 +4,7 @@ <h1>{{title}}</h1> {% endif %} {{post.text(lang)}} +{%if enable_comments %} + {{disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)}} +{%endif%} {% endblock %} diff --git a/nikola/data/themes/monospace/assets/css/rst.css b/nikola/data/themes/monospace/assets/css/rst.css index 1f0edcb..cf73111 100644 --- a/nikola/data/themes/monospace/assets/css/rst.css +++ b/nikola/data/themes/monospace/assets/css/rst.css @@ -1,8 +1,6 @@ /* -:Author: David Goodger -:Contact: goodger@users.sourceforge.net -:Date: $Date: 2005-12-18 01:56:14 +0100 (Sun, 18 Dec 2005) $ -:Revision: $Revision: 4224 $ +:Author: David Goodger (goodger@python.org) +:Id: $Id: html4css1.css 7514 2012-09-14 14:27:12Z milde $ :Copyright: This stylesheet has been placed in the public domain. Default cascading style sheet for the HTML output of Docutils. @@ -35,11 +33,15 @@ a.toc-backref { color: black } blockquote.epigraph { - margin: 2em 1em ; } + margin: 2em 5em ; } dl.docutils dd { margin-bottom: 0.5em } +object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] { + overflow: hidden; +} + /* Uncomment (and remove this text!) to get bold-faced definition list terms dl.docutils dt { font-weight: bold } @@ -54,16 +56,9 @@ div.abstract p.topic-title { div.admonition, div.attention, div.caution, div.danger, div.error, div.hint, div.important, div.note, div.tip, div.warning { - padding: 8px 35px 8px 14px; - margin-bottom: 18px; - text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5); - background-color: #d9edf7; - color: #3a87ad; - border: 1px solid #bce8f1; - -webkit-border-radius: 4px; - -moz-border-radius: 4px; - border-radius: 4px; -} + margin: 2em ; + border: medium outset ; + padding: 1em } div.admonition p.admonition-title, div.hint p.admonition-title, div.important p.admonition-title, div.note p.admonition-title, @@ -73,7 +68,7 @@ div.tip p.admonition-title { div.attention p.admonition-title, div.caution p.admonition-title, div.danger p.admonition-title, div.error p.admonition-title, -div.warning p.admonition-title { +div.warning p.admonition-title, .code .error { color: red ; font-weight: bold ; font-family: sans-serif } @@ -97,7 +92,6 @@ div.dedication p.topic-title { font-style: normal } div.figure { - text-align: center; margin-left: 2em ; margin-right: 2em } @@ -116,7 +110,7 @@ div.line-block div.line-block { margin-left: 1.5em } div.sidebar { - margin-left: 1em ; + margin: 0 0 0.5em 1em ; border: medium outset ; padding: 1em ; background-color: #ffffee ; @@ -135,21 +129,11 @@ div.system-messages h1 { color: red } div.system-message { - padding: 8px 35px 8px 14px; - margin-bottom: 18px; - text-shadow: 0 1px 0 rgba(255, 255, 255, 0.5); - border: 1px solid #eed3d7; - -webkit-border-radius: 4px; - -moz-border-radius: 4px; - border-radius: 4px; - padding: 1em; - background-color: #f2dede; - color: #b94a48; - -} + border: medium outset ; + padding: 1em } div.system-message p.system-message-title { - color: inherit ; + color: red ; font-weight: bold } div.topic { @@ -168,11 +152,38 @@ h2.subtitle { hr.docutils { width: 75% } -img.align-left { - clear: left } +img.align-left, .figure.align-left, object.align-left { + clear: left ; + float: left ; + margin-right: 1em } -img.align-right { - clear: right } +img.align-right, .figure.align-right, object.align-right { + clear: right ; + float: right ; + margin-left: 1em } + +img.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left } + +.align-center { + clear: both ; + text-align: center } + +.align-right { + text-align: right } + +/* reset inner alignment in figures */ +div.align-right { + text-align: inherit } + +/* div.align-center * { */ +/* text-align: left } */ ol.simple, ul.simple { margin-bottom: 1em } @@ -227,16 +238,20 @@ p.topic-title { pre.address { margin-bottom: 0 ; margin-top: 0 ; - font-family: serif ; - font-size: 100% } + font: inherit } -pre.literal-block, pre.doctest-block { - margin: 0 0 0 0 ; - background-color: #eeeeee; - padding: 1em; - overflow: auto; -/* font-family: "Courier New", Courier, monospace;*/ -} +pre.literal-block, pre.doctest-block, pre.math, pre.code { + margin-left: 2em ; + margin-right: 2em } + +pre.code .ln { color: grey; } /* line numbers */ +pre.code, code { background-color: #eeeeee } +pre.code .comment, code .comment { color: #5C6576 } +pre.code .keyword, code .keyword { color: #3B0D06; font-weight: bold } +pre.code .literal.string, code .literal.string { color: #0C5404 } +pre.code .name.builtin, code .name.builtin { color: #352B84 } +pre.code .deleted, code .deleted { background-color: #DEB0A1} +pre.code .inserted, code .inserted { background-color: #A3D289} span.classifier { font-family: sans-serif ; @@ -293,23 +308,5 @@ h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { font-size: 100% } -tt.docutils { - background-color: #eeeeee } - ul.auto-toc { list-style-type: none } - -#blog-title { - font-size: 34pt; - /*margin:0 0.3em -14px;*/ - background-color: #FFF; - font-family: "courier"; - text-align: right; - margin-top: 20px; - margin-bottom: 10px; -} - -img { - margin-top: 12px; - margin-bottom: 12px; -} diff --git a/nikola/data/themes/monospace/assets/css/theme.css b/nikola/data/themes/monospace/assets/css/theme.css index b66d7bd..b9c0cf2 100644 --- a/nikola/data/themes/monospace/assets/css/theme.css +++ b/nikola/data/themes/monospace/assets/css/theme.css @@ -11,4 +11,4 @@ body { margin:0px; padding:20px 0px; text-align:center; font-family:Monospace; c #sidebar .description { display:block; width:100%; height:auto; margin:0px 0px 10px 0px; } h1, h2, h3, h4, h5, h6, h7 { margin:0px; text-transform:uppercase; } h4, h5, h6 { font-size:14px; } -h1 { padding:0px 0px 15px; margin:0px 0px 15px 0px; } +#blog-title { margin-top: 0; line-height:48px;} diff --git a/nikola/data/themes/monospace/messages b/nikola/data/themes/monospace/messages deleted file mode 120000 index 3047ea2..0000000 --- a/nikola/data/themes/monospace/messages +++ /dev/null @@ -1 +0,0 @@ -../default/messages/
\ No newline at end of file diff --git a/nikola/data/themes/monospace/templates/base_helper.tmpl b/nikola/data/themes/monospace/templates/base_helper.tmpl index f5fe80c..170dee1 100644 --- a/nikola/data/themes/monospace/templates/base_helper.tmpl +++ b/nikola/data/themes/monospace/templates/base_helper.tmpl @@ -1,22 +1,34 @@ +## -*- coding: utf-8 -*- <%def name="html_head()"> <meta charset="utf-8"> <meta name="title" content="${title} | ${blog_title}" > <meta name="description" content="${description}" > <meta name="author" content="${blog_author}"> <title>${title} | ${blog_title}</title> + <!-- Le styles --> %if use_bundles: -<!-- CSS and JS Bundles here --> <link href="/assets/css/all.css" rel="stylesheet" type="text/css"> + <script src="/assets/js/all.js" type="text/javascript"></script> %else: -<!-- CSS and JS here --> + <link href="/assets/css/bootstrap.css" rel="stylesheet" type="text/css"> <link href="/assets/css/rst.css" rel="stylesheet" type="text/css"> <link href="/assets/css/code.css" rel="stylesheet" type="text/css"> + <link href="/assets/css/colorbox.css" rel="stylesheet" type="text/css"/> + <link href="/assets/css/slides.css" rel="stylesheet" type="text/css"/> <link href="/assets/css/theme.css" rel="stylesheet" type="text/css"/> %if has_custom_css: -<!-- Custom CSS here --> <link href="/assets/css/custom.css" rel="stylesheet" type="text/css"> %endif + <link href="/assets/css/bootstrap-responsive.css" rel="stylesheet" type="text/css"> + <script src="/assets/js/jquery-1.7.2.min.js" type="text/javascript"></script> + <script src="/assets/js/jquery.colorbox-min.js" type="text/javascript"></script> + <script src="/assets/js/slides.min.jquery.js" type="text/javascript"></script> + <script src="/assets/js/bootstrap.min.js" type="text/javascript"></script> %endif + <!-- Le HTML5 shim, for IE6-8 support of HTML5 elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js" type="text/javascript"></script> + <![endif]--> %if rss_link: ${rss_link} %else: @@ -24,6 +36,11 @@ <link rel="alternate" type="application/rss+xml" title="RSS (${language})" href="${_link('rss', None, lang)}"> %endfor %endif + %if favicons: + %for name, file, size in favicons: + <link rel="${name}" href="${file}" sizes="${size}"/> + %endfor + %endif </%def> diff --git a/nikola/data/themes/monospace/templates/disqus_helper.tmpl b/nikola/data/themes/monospace/templates/disqus_helper.tmpl new file mode 100644 index 0000000..674e20e --- /dev/null +++ b/nikola/data/themes/monospace/templates/disqus_helper.tmpl @@ -0,0 +1,40 @@ +## -*- coding: utf-8 -*- +<%! + import json +%> +<%def name="html_disqus(url, title, identifier)"> + %if disqus_forum: + <div id="disqus_thread"></div> + <script type="text/javascript"> + var disqus_shortname ="${disqus_forum}"; + %if url: + var disqus_url="${url}"; + %endif + var disqus_title=${json.dumps(title)}; + var disqus_identifier="${identifier}"; + var disqus_config = function () { + this.language = "${lang}"; + }; + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> + %endif +</%def> + +<%def name="html_disqus_link(link, identifier)"> + <p> + %if disqus_forum: + <a href="${link}" data-disqus-identifier="${identifier}">Comments</a> + %endif +</%def> + + +<%def name="html_disqus_script()"> + %if disqus_forum: + <script type="text/javascript">var disqus_shortname="${disqus_forum}";(function(){var a=document.createElement("script");a.async=true;a.type="text/javascript";a.src="http://"+disqus_shortname+".disqus.com/count.js";(document.getElementsByTagName("HEAD")[0]||document.getElementsByTagName("BODY")[0]).appendChild(a)}());</script> + %endif +</%def> diff --git a/nikola/data/themes/monospace/templates/gallery.tmpl b/nikola/data/themes/monospace/templates/gallery.tmpl index 37d749f..3186cc8 100644 --- a/nikola/data/themes/monospace/templates/gallery.tmpl +++ b/nikola/data/themes/monospace/templates/gallery.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%inherit file="base.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%block name="sourcelink"></%block> <%block name="content"> @@ -24,4 +25,7 @@ <img src="${image[1]}" /></a></li> %endfor </ul> +%if enable_comments: + ${disqus.html_disqus(None, permalink, title)} +%endif </%block> diff --git a/nikola/data/themes/monospace/templates/index.tmpl b/nikola/data/themes/monospace/templates/index.tmpl index bbf5529..ee57d26 100644 --- a/nikola/data/themes/monospace/templates/index.tmpl +++ b/nikola/data/themes/monospace/templates/index.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="index_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> % for post in posts: @@ -19,9 +20,9 @@ </span> </div> ${post.text(lang, index_teasers)} - ${helper.html_disqus_link(post)} + ${disqus.html_disqus_link(post.permalink()+"#disqus_thread", post.base_path)} </div> % endfor ${helper.html_pager()} - ${helper.html_disqus_script()} + ${disqus.html_disqus_script()} </%block> diff --git a/nikola/data/themes/monospace/templates/index_helper.tmpl b/nikola/data/themes/monospace/templates/index_helper.tmpl index cfecdf3..114a730 100644 --- a/nikola/data/themes/monospace/templates/index_helper.tmpl +++ b/nikola/data/themes/monospace/templates/index_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_pager()"> <div> <ul class="pager"> @@ -14,18 +15,3 @@ </ul> </div> </%def> - - -<%def name="html_disqus_link(post)"> - <p> - %if disqus_forum: - <a href="${post.permalink()}#disqus_thread">Comments</a> - %endif -</%def> - - -<%def name="html_disqus_script()"> - %if disqus_forum: - <script type="text/javascript">var disqus_shortname="${disqus_forum}";(function(){var a=document.createElement("script");a.async=true;a.type="text/javascript";a.src="http://"+disqus_shortname+".disqus.com/count.js";(document.getElementsByTagName("HEAD")[0]||document.getElementsByTagName("BODY")[0]).appendChild(a)}());</script> - %endif -</%def> diff --git a/nikola/data/themes/monospace/templates/post.tmpl b/nikola/data/themes/monospace/templates/post.tmpl index 94a74f8..2ba27f1 100644 --- a/nikola/data/themes/monospace/templates/post.tmpl +++ b/nikola/data/themes/monospace/templates/post.tmpl @@ -1,12 +1,13 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="post_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> <div class="post"> ${helper.html_title()} <div class="meta" style="background-color: rgb(234, 234, 234); "> <span class="authordate"> - ${messages[lang]["Posted"]}: ${post.date.strftime(date_format)} [<a href="${post.pagenames[lang]+'.txt'}">${messages[lang]["Source"]}</a>] + ${messages[lang]["Posted"]}: ${post.date.strftime(date_format)} [<a href="${post.pagenames[lang]+'.txt'}" id="sourcelink">${messages[lang]["Source"]}</a>] </span> <br> %if post.tags: @@ -23,6 +24,6 @@ </div> ${post.text(lang)} ${helper.html_pager(post)} - ${helper.html_disqus(post)} + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} </div> </%block> diff --git a/nikola/data/themes/monospace/templates/post_helper.tmpl b/nikola/data/themes/monospace/templates/post_helper.tmpl index 3e874e9..8651c65 100644 --- a/nikola/data/themes/monospace/templates/post_helper.tmpl +++ b/nikola/data/themes/monospace/templates/post_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_title()"> <h1>${title}</h1> % if link: @@ -9,7 +10,7 @@ <%def name="html_translations(post)"> %if len(translations) > 1: %for langname in translations.keys(): - %if langname != lang: + %if langname != lang and post.is_translation_available(langname): | <a href="${post.permalink(langname)}">${messages[langname]["Read in English"]}</a> %endif @@ -28,16 +29,6 @@ </%def> -<%def name="html_disqus(post)"> - %if disqus_forum: - <div id="disqus_thread"></div> - <script type="text/javascript">var disqus_shortname="${disqus_forum}";var disqus_url="${post.permalink(absolute=True)}";(function(){var a=document.createElement("script");a.type="text/javascript";a.async=true;a.src="http://"+disqus_shortname+".disqus.com/embed.js";(document.getElementsByTagName("head")[0]||document.getElementsByTagName("body")[0]).appendChild(a)})(); </script> - <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> - <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> - %endif -</%def> - - <%def name="html_pager(post)"> <ul class="pager"> %if post.prev_post: diff --git a/nikola/data/themes/monospace/templates/story.tmpl b/nikola/data/themes/monospace/templates/story.tmpl index deb0a46..30d263b 100644 --- a/nikola/data/themes/monospace/templates/story.tmpl +++ b/nikola/data/themes/monospace/templates/story.tmpl @@ -5,4 +5,7 @@ <h1>${title}</h1> %endif ${post.text(lang)} +%if enable_comments: + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} +%endif </%block> diff --git a/nikola/data/themes/orphan/messages b/nikola/data/themes/orphan/messages deleted file mode 120000 index 3047ea2..0000000 --- a/nikola/data/themes/orphan/messages +++ /dev/null @@ -1 +0,0 @@ -../default/messages/
\ No newline at end of file diff --git a/nikola/data/themes/orphan/templates/base_helper.tmpl b/nikola/data/themes/orphan/templates/base_helper.tmpl index f5fe80c..170dee1 100644 --- a/nikola/data/themes/orphan/templates/base_helper.tmpl +++ b/nikola/data/themes/orphan/templates/base_helper.tmpl @@ -1,22 +1,34 @@ +## -*- coding: utf-8 -*- <%def name="html_head()"> <meta charset="utf-8"> <meta name="title" content="${title} | ${blog_title}" > <meta name="description" content="${description}" > <meta name="author" content="${blog_author}"> <title>${title} | ${blog_title}</title> + <!-- Le styles --> %if use_bundles: -<!-- CSS and JS Bundles here --> <link href="/assets/css/all.css" rel="stylesheet" type="text/css"> + <script src="/assets/js/all.js" type="text/javascript"></script> %else: -<!-- CSS and JS here --> + <link href="/assets/css/bootstrap.css" rel="stylesheet" type="text/css"> <link href="/assets/css/rst.css" rel="stylesheet" type="text/css"> <link href="/assets/css/code.css" rel="stylesheet" type="text/css"> + <link href="/assets/css/colorbox.css" rel="stylesheet" type="text/css"/> + <link href="/assets/css/slides.css" rel="stylesheet" type="text/css"/> <link href="/assets/css/theme.css" rel="stylesheet" type="text/css"/> %if has_custom_css: -<!-- Custom CSS here --> <link href="/assets/css/custom.css" rel="stylesheet" type="text/css"> %endif + <link href="/assets/css/bootstrap-responsive.css" rel="stylesheet" type="text/css"> + <script src="/assets/js/jquery-1.7.2.min.js" type="text/javascript"></script> + <script src="/assets/js/jquery.colorbox-min.js" type="text/javascript"></script> + <script src="/assets/js/slides.min.jquery.js" type="text/javascript"></script> + <script src="/assets/js/bootstrap.min.js" type="text/javascript"></script> %endif + <!-- Le HTML5 shim, for IE6-8 support of HTML5 elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js" type="text/javascript"></script> + <![endif]--> %if rss_link: ${rss_link} %else: @@ -24,6 +36,11 @@ <link rel="alternate" type="application/rss+xml" title="RSS (${language})" href="${_link('rss', None, lang)}"> %endfor %endif + %if favicons: + %for name, file, size in favicons: + <link rel="${name}" href="${file}" sizes="${size}"/> + %endfor + %endif </%def> diff --git a/nikola/data/themes/orphan/templates/disqus_helper.tmpl b/nikola/data/themes/orphan/templates/disqus_helper.tmpl new file mode 100644 index 0000000..674e20e --- /dev/null +++ b/nikola/data/themes/orphan/templates/disqus_helper.tmpl @@ -0,0 +1,40 @@ +## -*- coding: utf-8 -*- +<%! + import json +%> +<%def name="html_disqus(url, title, identifier)"> + %if disqus_forum: + <div id="disqus_thread"></div> + <script type="text/javascript"> + var disqus_shortname ="${disqus_forum}"; + %if url: + var disqus_url="${url}"; + %endif + var disqus_title=${json.dumps(title)}; + var disqus_identifier="${identifier}"; + var disqus_config = function () { + this.language = "${lang}"; + }; + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> + %endif +</%def> + +<%def name="html_disqus_link(link, identifier)"> + <p> + %if disqus_forum: + <a href="${link}" data-disqus-identifier="${identifier}">Comments</a> + %endif +</%def> + + +<%def name="html_disqus_script()"> + %if disqus_forum: + <script type="text/javascript">var disqus_shortname="${disqus_forum}";(function(){var a=document.createElement("script");a.async=true;a.type="text/javascript";a.src="http://"+disqus_shortname+".disqus.com/count.js";(document.getElementsByTagName("HEAD")[0]||document.getElementsByTagName("BODY")[0]).appendChild(a)}());</script> + %endif +</%def> diff --git a/nikola/data/themes/orphan/templates/gallery.tmpl b/nikola/data/themes/orphan/templates/gallery.tmpl index 37d749f..3186cc8 100644 --- a/nikola/data/themes/orphan/templates/gallery.tmpl +++ b/nikola/data/themes/orphan/templates/gallery.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%inherit file="base.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%block name="sourcelink"></%block> <%block name="content"> @@ -24,4 +25,7 @@ <img src="${image[1]}" /></a></li> %endfor </ul> +%if enable_comments: + ${disqus.html_disqus(None, permalink, title)} +%endif </%block> diff --git a/nikola/data/themes/orphan/templates/index.tmpl b/nikola/data/themes/orphan/templates/index.tmpl index 03dd1f8..1a436e2 100644 --- a/nikola/data/themes/orphan/templates/index.tmpl +++ b/nikola/data/themes/orphan/templates/index.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="index_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> % for post in posts: @@ -10,9 +11,9 @@ </small></h1> <hr> ${post.text(lang, index_teasers)} - ${helper.html_disqus_link(post)} + ${disqus.html_disqus_link(post.permalink()+"#disqus_thread", post.base_path)} </div> % endfor ${helper.html_pager()} - ${helper.html_disqus_script()} + ${disqus.html_disqus_script()} </%block> diff --git a/nikola/data/themes/orphan/templates/index_helper.tmpl b/nikola/data/themes/orphan/templates/index_helper.tmpl index cfecdf3..114a730 100644 --- a/nikola/data/themes/orphan/templates/index_helper.tmpl +++ b/nikola/data/themes/orphan/templates/index_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_pager()"> <div> <ul class="pager"> @@ -14,18 +15,3 @@ </ul> </div> </%def> - - -<%def name="html_disqus_link(post)"> - <p> - %if disqus_forum: - <a href="${post.permalink()}#disqus_thread">Comments</a> - %endif -</%def> - - -<%def name="html_disqus_script()"> - %if disqus_forum: - <script type="text/javascript">var disqus_shortname="${disqus_forum}";(function(){var a=document.createElement("script");a.async=true;a.type="text/javascript";a.src="http://"+disqus_shortname+".disqus.com/count.js";(document.getElementsByTagName("HEAD")[0]||document.getElementsByTagName("BODY")[0]).appendChild(a)}());</script> - %endif -</%def> diff --git a/nikola/data/themes/orphan/templates/post.tmpl b/nikola/data/themes/orphan/templates/post.tmpl index 306192d..672d4f6 100644 --- a/nikola/data/themes/orphan/templates/post.tmpl +++ b/nikola/data/themes/orphan/templates/post.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="post_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> <div class="postbox"> @@ -9,12 +10,12 @@ ${messages[lang]["Posted"]}: ${post.date.strftime(date_format)} ${helper.html_translations(post)} | - <a href="${post.pagenames[lang]+'.txt'}">${messages[lang]["Source"]}</a> + <a href="${post.pagenames[lang]+'.txt'}" id="sourcelink">${messages[lang]["Source"]}</a> ${helper.html_tags(post)} </small> <hr> ${post.text(lang)} ${helper.html_pager(post)} - ${helper.html_disqus(post)} + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} </div> </%block> diff --git a/nikola/data/themes/orphan/templates/post_helper.tmpl b/nikola/data/themes/orphan/templates/post_helper.tmpl index 3e874e9..a3dc75f 100644 --- a/nikola/data/themes/orphan/templates/post_helper.tmpl +++ b/nikola/data/themes/orphan/templates/post_helper.tmpl @@ -1,3 +1,4 @@ +## -*- coding: utf-8 -*- <%def name="html_title()"> <h1>${title}</h1> % if link: @@ -9,7 +10,7 @@ <%def name="html_translations(post)"> %if len(translations) > 1: %for langname in translations.keys(): - %if langname != lang: + %if langname != lang and post.is_translation_available(langname): | <a href="${post.permalink(langname)}">${messages[langname]["Read in English"]}</a> %endif @@ -28,16 +29,6 @@ </%def> -<%def name="html_disqus(post)"> - %if disqus_forum: - <div id="disqus_thread"></div> - <script type="text/javascript">var disqus_shortname="${disqus_forum}";var disqus_url="${post.permalink(absolute=True)}";(function(){var a=document.createElement("script");a.type="text/javascript";a.async=true;a.src="http://"+disqus_shortname+".disqus.com/embed.js";(document.getElementsByTagName("head")[0]||document.getElementsByTagName("body")[0]).appendChild(a)})(); </script> - <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> - <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> - %endif -</%def> - - <%def name="html_pager(post)"> <ul class="pager"> %if post.prev_post: diff --git a/nikola/data/themes/orphan/templates/story.tmpl b/nikola/data/themes/orphan/templates/story.tmpl index deb0a46..30d263b 100644 --- a/nikola/data/themes/orphan/templates/story.tmpl +++ b/nikola/data/themes/orphan/templates/story.tmpl @@ -5,4 +5,7 @@ <h1>${title}</h1> %endif ${post.text(lang)} +%if enable_comments: + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} +%endif </%block> diff --git a/nikola/data/themes/site/templates/base.tmpl b/nikola/data/themes/site/templates/base.tmpl index 784f29a..2094d9a 100644 --- a/nikola/data/themes/site/templates/base.tmpl +++ b/nikola/data/themes/site/templates/base.tmpl @@ -3,6 +3,7 @@ <!DOCTYPE html> <html lang="${lang}"> <head> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> ${html_head()} <%block name="extra_head"> </%block> @@ -12,34 +13,50 @@ <div class="navbar navbar-fixed-top"> <div class="navbar-inner"> <div class="container"> + + <!-- .btn-navbar is used as the toggle for collapsed navbar content --> + <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse"> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </a> + <a class="brand" href="${abs_link('/')}"> ${blog_title} </a> - <ul class="nav"> - ${html_sidebar_links()} - </ul> - %if search_form: - ${search_form} - %endif - <ul class="nav pull-right"> - <%block name="belowtitle"> - %if len(translations) > 1: - <li>${html_translations()}</li> - %endif - </%block> - <%block name="sourcelink"> </%block> - </ul> + <!-- Everything you want hidden at 940px or less, place within here --> + <div class="nav-collapse collapse"> + <ul class="nav"> + ${html_sidebar_links()} + </ul> + %if search_form: + ${search_form} + %endif + <ul class="nav pull-right"> + <%block name="belowtitle"> + %if len(translations) > 1: + <li>${html_translations()}</li> + %endif + </%block> + <%block name="sourcelink"> </%block> + </ul> + </div> </div> </div> </div> <!-- End of Menubar --> -<div class="container" id="container"> +<div class="container-fluid" id="container-fluid"> <!--Body content--> + <div class="row-fluid"> + <div class="span2"></div> + <div class="span8"> <%block name="content"></%block> + </div> + </div> <!--End of body content--> - <div class="footerbox"> +</div> +<div class="footerbox"> ${content_footer} - </div> </div> ${html_social()} ${analytics} diff --git a/nikola/data/themes/site/templates/post.tmpl b/nikola/data/themes/site/templates/post.tmpl index 06ca10f..785385f 100644 --- a/nikola/data/themes/site/templates/post.tmpl +++ b/nikola/data/themes/site/templates/post.tmpl @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%namespace name="helper" file="post_helper.tmpl"/> +<%namespace name="disqus" file="disqus_helper.tmpl"/> <%inherit file="base.tmpl"/> <%block name="content"> <div class="postbox"> @@ -13,12 +14,12 @@ <hr> ${post.text(lang)} ${helper.html_pager(post)} - ${helper.html_disqus(post)} + ${disqus.html_disqus(post.permalink(absolute=True), post.title(lang), post.base_path)} </div> </%block> <%block name="sourcelink"> <li> - <a href="${post.pagenames[lang]+post.source_ext()}">${messages[lang]["Source"]}</a> + <a href="${post.pagenames[lang]+post.source_ext()}" id="sourcelink">${messages[lang]["Source"]}</a> </li> </%block> diff --git a/nikola/filters.py b/nikola/filters.py index 15696f1..4a63cb4 100644 --- a/nikola/filters.py +++ b/nikola/filters.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -51,7 +51,7 @@ def runinplace(command, infile): command = command.replace('%1', "'%s'" % infile) needs_tmp = "%2" in command - command = command.replace('%2', "'%s'"% tmpfname) + command = command.replace('%2', "'%s'" % tmpfname) subprocess.check_call(command, shell=True) @@ -62,22 +62,29 @@ def runinplace(command, infile): def yui_compressor(infile): return runinplace(r'yui-compressor --nomunge %1 -o %2', infile) + def optipng(infile): return runinplace(r"optipng -preserve -o2 -quiet %1", infile) + def jpegoptim(infile): - return runinplace(r"jpegoptim -p --strip-all -q %1",infile) + return runinplace(r"jpegoptim -p --strip-all -q %1", infile) + def tidy(inplace): # Goggle site verifcation files are no HTML if re.match(r"google[a-f0-9]+.html", os.path.basename(inplace)) \ - and open(inplace).readline().startswith("google-site-verification:"): + and open(inplace).readline().startswith( + "google-site-verification:"): return # Tidy will give error exits, that we will ignore. - output = subprocess.check_output( "tidy -m -w 90 --indent no --quote-marks no --keep-time yes --tidy-mark no '%s'; exit 0" % inplace, stderr = subprocess.STDOUT, shell = True ) + output = subprocess.check_output("tidy -m -w 90 --indent no --quote-marks" + "no --keep-time yes --tidy-mark no '%s';" + "exit 0" % inplace, + stderr=subprocess.STDOUT, shell=True) - for line in output.split( "\n" ): + for line in output.split("\n"): if "Warning:" in line: if '<meta> proprietary attribute "charset"' in line: # We want to set it though. @@ -95,6 +102,6 @@ def tidy(inplace): # Happens for tables, TODO: Check this is normal. continue else: - assert False, (inplace,line) + assert False, (inplace, line) elif "Error:" in line: assert False, line diff --git a/nikola/nikola.py b/nikola/nikola.py index 4ce6f61..e10c84e 100644 --- a/nikola/nikola.py +++ b/nikola/nikola.py @@ -32,7 +32,7 @@ import sys try: from urlparse import urlparse, urlsplit, urljoin except ImportError: - from urllib.parse import urlparse, urlsplit, urljoin + from urllib.parse import urlparse, urlsplit, urljoin # NOQA import lxml.html from yapsy.PluginManager import PluginManager @@ -79,39 +79,70 @@ class Nikola(object): # This is the default config # TODO: fill it self.config = { + 'ADD_THIS_BUTTONS': True, + 'ANALYTICS': '', 'ARCHIVE_PATH': "", 'ARCHIVE_FILENAME': "archive.html", - 'DEFAULT_LANG': "en", - 'OUTPUT_FOLDER': 'output', 'CACHE_FOLDER': 'cache', + 'COMMENTS_IN_GALLERIES': False, + 'COMMENTS_IN_STORIES': False, + 'CONTENT_FOOTER': '', + 'DATE_FORMAT': '%Y-%m-%d %H:%M', + 'DEFAULT_LANG': "en", + 'DEPLOY_COMMANDS': [], + 'DISQUS_FORUM': 'nikolademo', + 'FAVICONS': {}, + 'FILE_METADATA_REGEXP': None, 'FILES_FOLDERS': {'files': ''}, - 'LISTINGS_FOLDER': 'listings', - 'ADD_THIS_BUTTONS': True, + 'FILTERS': {}, + 'GALLERY_PATH': 'galleries', 'INDEX_DISPLAY_POST_COUNT': 10, 'INDEX_TEASERS': False, - 'MAX_IMAGE_SIZE': 1280, - 'USE_FILENAME_AS_TITLE': True, - 'SLUG_TAG_PATH': False, 'INDEXES_TITLE': "", 'INDEXES_PAGES': "", - 'FILTERS': {}, - 'USE_BUNDLES': True, - 'TAG_PAGES_ARE_INDEXES': False, - 'THEME': 'default', + 'INDEX_PATH': '', + 'LICENSE': '', + 'LISTINGS_FOLDER': 'listings', + 'MAX_IMAGE_SIZE': 1280, + 'OUTPUT_FOLDER': 'output', 'post_compilers': { - "rest": ['.txt', '.rst'], - "markdown": ['.md', '.mdown', '.markdown'], - "html": ['.html', '.htm'], + "rest": ('.txt', '.rst'), + "markdown": ('.md', '.mdown', '.markdown'), + "textile": ('.textile',), + "txt2tags": ('.t2t',), + "bbcode": ('.bb',), + "wiki": ('.wiki',), + "ipynb": ('.ipynb',), + "html": ('.html', '.htm') }, + 'POST_PAGES': ( + ("posts/*.txt", "posts", "post.tmpl", True), + ("stories/*.txt", "stories", "story.tmpl", False), + ), + 'REDIRECTIONS': [], + 'RSS_LINK': None, + 'RSS_PATH': '', + 'RSS_TEASERS': True, + 'SEARCH_FORM': '', + 'SLUG_TAG_PATH': True, + 'STORY_INDEX': False, + 'TAG_PATH': 'categories', + 'TAG_PAGES_ARE_INDEXES': False, + 'THEME': 'site', + 'THUMBNAIL_SIZE': 180, + 'USE_FILENAME_AS_TITLE': True, + 'USE_BUNDLES': True, } + self.config.update(config) self.config['TRANSLATIONS'] = self.config.get('TRANSLATIONS', - {self.config['DEFAULT_LANG']: ''}) + {self.config['DEFAULT_' + 'LANG']: ''}) self.THEMES = utils.get_theme_chain(self.config['THEME']) self.MESSAGES = utils.load_messages(self.THEMES, - self.config['TRANSLATIONS']) + self.config['TRANSLATIONS']) self.plugin_manager = PluginManager(categories_filter={ "Command": Command, @@ -124,7 +155,7 @@ class Nikola(object): self.plugin_manager.setPluginPlaces([ str(os.path.join(os.path.dirname(__file__), 'plugins')), str(os.path.join(os.getcwd(), 'plugins')), - ]) + ]) self.plugin_manager.collectPlugins() self.commands = {} @@ -145,20 +176,40 @@ class Nikola(object): pluginInfo.plugin_object.set_site(self) # set global_context for template rendering - self.GLOBAL_CONTEXT = self.config.get('GLOBAL_CONTEXT', {}) + self.GLOBAL_CONTEXT = { + } + self.GLOBAL_CONTEXT['messages'] = self.MESSAGES self.GLOBAL_CONTEXT['_link'] = self.link self.GLOBAL_CONTEXT['rel_link'] = self.rel_link self.GLOBAL_CONTEXT['abs_link'] = self.abs_link self.GLOBAL_CONTEXT['exists'] = self.file_exists + self.GLOBAL_CONTEXT['add_this_buttons'] = self.config[ 'ADD_THIS_BUTTONS'] self.GLOBAL_CONTEXT['index_display_post_count'] = self.config[ 'INDEX_DISPLAY_POST_COUNT'] self.GLOBAL_CONTEXT['use_bundles'] = self.config['USE_BUNDLES'] + self.GLOBAL_CONTEXT['favicons'] = self.config['FAVICONS'] if 'date_format' not in self.GLOBAL_CONTEXT: self.GLOBAL_CONTEXT['date_format'] = '%Y-%m-%d %H:%M' + self.GLOBAL_CONTEXT['blog_author'] = self.config.get('BLOG_AUTHOR') + self.GLOBAL_CONTEXT['blog_title'] = self.config.get('BLOG_TITLE') + self.GLOBAL_CONTEXT['blog_url'] = self.config.get('BLOG_URL') + self.GLOBAL_CONTEXT['blog_desc'] = self.config.get('BLOG_DESCRIPTION') + self.GLOBAL_CONTEXT['analytics'] = self.config.get('ANALYTICS') + self.GLOBAL_CONTEXT['translations'] = self.config.get('TRANSLATIONS') + self.GLOBAL_CONTEXT['license'] = self.config.get('LICENSE') + self.GLOBAL_CONTEXT['search_form'] = self.config.get('SEARCH_FORM') + self.GLOBAL_CONTEXT['disqus_forum'] = self.config.get('DISQUS_FORUM') + self.GLOBAL_CONTEXT['content_footer'] = self.config.get('CONTENT_FOOTER') + self.GLOBAL_CONTEXT['rss_path'] = self.config.get('RSS_PATH') + self.GLOBAL_CONTEXT['rss_link'] = self.config.get('RSS_LINK') + self.GLOBAL_CONTEXT['sidebar_links'] = self.config.get('SIDEBAR_LINKS') + + self.GLOBAL_CONTEXT.update(self.config.get('GLOBAL_CONTEXT', {})) + # check if custom css exist and is not empty for files_path in list(self.config['FILES_FOLDERS'].keys()): custom_css_path = os.path.join(files_path, 'assets/css/custom.css') @@ -173,8 +224,8 @@ class Nikola(object): pi = self.plugin_manager.getPluginByName( template_sys_name, "TemplateSystem") if pi is None: - sys.stderr.write("Error loading %s template system plugin\n" - % template_sys_name) + sys.stderr.write("Error loading %s template system plugin\n" % + template_sys_name) sys.exit(1) self.template_system = pi.plugin_object lookup_dirs = [os.path.join(utils.get_theme_path(name), "templates") @@ -228,13 +279,16 @@ class Nikola(object): def render_template(self, template_name, output_name, context): local_context = {} local_context["template_name"] = template_name - local_context.update(self.config['GLOBAL_CONTEXT']) + local_context.update(self.GLOBAL_CONTEXT) local_context.update(context) data = self.template_system.render_template( template_name, None, local_context) - assert output_name.startswith(self.config["OUTPUT_FOLDER"]) - url_part = output_name[len(self.config["OUTPUT_FOLDER"]) + 1:] + assert isinstance(output_name, bytes) + assert output_name.startswith( + self.config["OUTPUT_FOLDER"].encode('utf8')) + url_part = output_name.decode('utf8')[len(self.config["OUTPUT_FOLDER"]) + + 1:] # This is to support windows paths url_part = "/".join(url_part.split(os.sep)) @@ -250,7 +304,7 @@ class Nikola(object): if dst_url.netloc: if dst_url.scheme == 'link': # Magic link dst = self.link(dst_url.netloc, dst_url.path.lstrip('/'), - context['lang']) + context['lang']) else: return dst @@ -273,7 +327,7 @@ class Nikola(object): break # Now i is the longest common prefix result = '/'.join(['..'] * (len(src_elems) - i - 1) + - dst_elems[i:]) + dst_elems[i:]) if not result: result = "." @@ -309,6 +363,7 @@ class Nikola(object): * rss (name is ignored) * gallery (name is the gallery name) * listing (name is the source code file name) + * post_path (name is 1st element in a post_pages tuple) The returned value is always a path relative to output, like "categories/whatever.html" @@ -324,38 +379,49 @@ class Nikola(object): if kind == "tag_index": path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['TAG_PATH'], 'index.html'] if _f] + self.config['TAG_PATH'], 'index.html'] if _f] elif kind == "tag": if self.config['SLUG_TAG_PATH']: name = utils.slugify(name) path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['TAG_PATH'], name + ".html"] if _f] + self.config['TAG_PATH'], name + ".html"] if + _f] elif kind == "tag_rss": if self.config['SLUG_TAG_PATH']: name = utils.slugify(name) path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['TAG_PATH'], name + ".xml"] if _f] + self.config['TAG_PATH'], name + ".xml"] if + _f] elif kind == "index": - if name > 0: + if name not in [None, 0]: path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['INDEX_PATH'], 'index-%s.html' % name] if _f] + self.config['INDEX_PATH'], + 'index-%s.html' % name] if _f] else: path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['INDEX_PATH'], 'index.html'] if _f] + self.config['INDEX_PATH'], 'index.html'] + if _f] + elif kind == "post_path": + path = [_f for _f in [self.config['TRANSLATIONS'][lang], + os.path.dirname(name), "index.html"] if _f] elif kind == "rss": path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['RSS_PATH'], 'rss.xml'] if _f] + self.config['RSS_PATH'], 'rss.xml'] if _f] elif kind == "archive": if name: path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['ARCHIVE_PATH'], name, 'index.html'] if _f] + self.config['ARCHIVE_PATH'], name, + 'index.html'] if _f] else: path = [_f for _f in [self.config['TRANSLATIONS'][lang], - self.config['ARCHIVE_PATH'], self.config['ARCHIVE_FILENAME']] if _f] + self.config['ARCHIVE_PATH'], + self.config['ARCHIVE_FILENAME']] if _f] elif kind == "gallery": - path = [_f for _f in [self.config['GALLERY_PATH'], name, 'index.html'] if _f] + path = [_f for _f in [self.config['GALLERY_PATH'], name, + 'index.html'] if _f] elif kind == "listing": - path = [_f for _f in [self.config['LISTINGS_FOLDER'], name + '.html'] if _f] + path = [_f for _f in [self.config['LISTINGS_FOLDER'], name + + '.html'] if _f] if is_link: return '/' + ('/'.join(path)) else: @@ -426,36 +492,48 @@ class Nikola(object): def scan_posts(self): """Scan all the posts.""" if not self._scanned: - print("Scanning posts ") + print("Scanning posts", end='') targets = set([]) - for wildcard, destination, _, use_in_feeds in \ + for wildcard, destination, template_name, use_in_feeds in \ self.config['post_pages']: - print (".") - for base_path in glob.glob(wildcard): - post = Post( - base_path, - self.config['CACHE_FOLDER'], - destination, - use_in_feeds, - self.config['TRANSLATIONS'], - self.config['DEFAULT_LANG'], - self.config['BLOG_URL'], - self.MESSAGES) - for lang, langpath in list(self.config['TRANSLATIONS'].items()): - dest = (destination, langpath, post.pagenames[lang]) - if dest in targets: - raise Exception( - 'Duplicated output path %r in post %r' % - (post.pagenames[lang], base_path)) - targets.add(dest) - self.global_data[post.post_name] = post - if post.use_in_feeds: - self.posts_per_year[ - str(post.date.year)].append(post.post_name) - for tag in post.tags: - self.posts_per_tag[tag].append(post.post_name) - else: - self.pages.append(post) + print(".", end='') + base_len = len(destination.split(os.sep)) + dirname = os.path.dirname(wildcard) + for dirpath, _, _ in os.walk(dirname): + dir_glob = os.path.join(dirpath, + os.path.basename(wildcard)) + dest_dir = os.path.join(*([destination] + + dirpath.split( + os.sep)[base_len:])) + for base_path in glob.glob(dir_glob): + post = Post( + base_path, + self.config['CACHE_FOLDER'], + dest_dir, + use_in_feeds, + self.config['TRANSLATIONS'], + self.config['DEFAULT_LANG'], + self.config['BLOG_URL'], + self.MESSAGES, + template_name, + self.config['FILE_METADATA_REGEXP']) + for lang, langpath in list( + self.config['TRANSLATIONS'].items()): + dest = (destination, langpath, dir_glob, + post.pagenames[lang]) + if dest in targets: + raise Exception( + 'Duplicated output path %r in post %r' % + (post.pagenames[lang], base_path)) + targets.add(dest) + self.global_data[post.post_name] = post + if post.use_in_feeds: + self.posts_per_year[ + str(post.date.year)].append(post.post_name) + for tag in post.tags: + self.posts_per_tag[tag].append(post.post_name) + else: + self.pages.append(post) for name, post in list(self.global_data.items()): self.timeline.append(post) self.timeline.sort(key=lambda p: p.date) @@ -468,52 +546,53 @@ class Nikola(object): self._scanned = True print("done!") - def generic_page_renderer(self, lang, wildcard, - template_name, destination, filters): + def generic_page_renderer(self, lang, post, filters): """Render post fragments to final HTML pages.""" - for post in glob.glob(wildcard): - post_name = os.path.splitext(post)[0] - context = {} - post = self.global_data[post_name] - deps = post.deps(lang) + \ - self.template_system.template_deps(template_name) - context['post'] = post - context['lang'] = lang - context['title'] = post.title(lang) - context['description'] = post.description(lang) - context['permalink'] = post.permalink(lang) - context['page_list'] = self.pages - output_name = os.path.join( - self.config['OUTPUT_FOLDER'], - self.config['TRANSLATIONS'][lang], - destination, - post.pagenames[lang] + ".html") - deps_dict = copy(context) - deps_dict.pop('post') - if post.prev_post: - deps_dict['PREV_LINK'] = [post.prev_post.permalink(lang)] - if post.next_post: - deps_dict['NEXT_LINK'] = [post.next_post.permalink(lang)] - deps_dict['OUTPUT_FOLDER'] = self.config['OUTPUT_FOLDER'] - deps_dict['TRANSLATIONS'] = self.config['TRANSLATIONS'] - deps_dict['global'] = self.config['GLOBAL_CONTEXT'] - - task = { - 'name': output_name.encode('utf-8'), - 'file_dep': deps, - 'targets': [output_name], - 'actions': [(self.render_template, - [template_name, output_name, context])], - 'clean': True, - 'uptodate': [config_changed(deps_dict)], - } - - yield utils.apply_filters(task, filters) - - def generic_post_list_renderer(self, lang, posts, - output_name, template_name, filters, extra_context): + context = {} + deps = post.deps(lang) + \ + self.template_system.template_deps(post.template_name) + context['post'] = post + context['lang'] = lang + context['title'] = post.title(lang) + context['description'] = post.description(lang) + context['permalink'] = post.permalink(lang) + context['page_list'] = self.pages + if post.use_in_feeds: + context['enable_comments'] = True + else: + context['enable_comments'] = self.config['COMMENTS_IN_STORIES'] + output_name = os.path.join(self.config['OUTPUT_FOLDER'], + post.destination_path(lang)).encode('utf8') + deps_dict = copy(context) + deps_dict.pop('post') + if post.prev_post: + deps_dict['PREV_LINK'] = [post.prev_post.permalink(lang)] + if post.next_post: + deps_dict['NEXT_LINK'] = [post.next_post.permalink(lang)] + deps_dict['OUTPUT_FOLDER'] = self.config['OUTPUT_FOLDER'] + deps_dict['TRANSLATIONS'] = self.config['TRANSLATIONS'] + deps_dict['global'] = self.GLOBAL_CONTEXT + deps_dict['comments'] = context['enable_comments'] + + task = { + 'name': output_name, + 'file_dep': deps, + 'targets': [output_name], + 'actions': [(self.render_template, [post.template_name, + output_name, context])], + 'clean': True, + 'uptodate': [config_changed(deps_dict)], + } + + yield utils.apply_filters(task, filters) + + def generic_post_list_renderer(self, lang, posts, output_name, + template_name, filters, extra_context): """Renders pages with lists of posts.""" + # This is a name on disk, has to be bytes + assert isinstance(output_name, bytes) + deps = self.template_system.template_deps(template_name) for post in posts: deps += post.deps(lang) @@ -526,15 +605,15 @@ class Nikola(object): context["nextlink"] = None context.update(extra_context) deps_context = copy(context) - deps_context["posts"] = [(p.titles[lang], p.permalink(lang)) - for p in posts] - deps_context["global"] = self.config['GLOBAL_CONTEXT'] + deps_context["posts"] = [(p.titles[lang], p.permalink(lang)) for p in + posts] + deps_context["global"] = self.GLOBAL_CONTEXT task = { - 'name': output_name.encode('utf8'), + 'name': output_name, 'targets': [output_name], 'file_dep': deps, - 'actions': [(self.render_template, - [template_name, output_name, context])], + 'actions': [(self.render_template, [template_name, output_name, + context])], 'clean': True, 'uptodate': [config_changed(deps_context)] } diff --git a/nikola/plugin_categories.py b/nikola/plugin_categories.py index bf810e3..8c69dec 100644 --- a/nikola/plugin_categories.py +++ b/nikola/plugin_categories.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -107,3 +107,8 @@ class PageCompiler(object): def compile_html(self, source, dest): """Compile the source, save it on dest.""" raise Exception("Implement Me First") + + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + """Create post file with optional metadata.""" + raise Exception("Implement Me First") diff --git a/nikola/plugins/__init__.py b/nikola/plugins/__init__.py index ec6c8f5..b1de7f1 100644 --- a/nikola/plugins/__init__.py +++ b/nikola/plugins/__init__.py @@ -1,4 +1,3 @@ from __future__ import absolute_import -from . import command_import_wordpress - +from . import command_import_wordpress # NOQA diff --git a/nikola/plugins/command_bootswatch_theme.py b/nikola/plugins/command_bootswatch_theme.py index 185717f..6c1061f 100644 --- a/nikola/plugins/command_bootswatch_theme.py +++ b/nikola/plugins/command_bootswatch_theme.py @@ -29,7 +29,7 @@ import os try: import requests except ImportError: - requests = None + requests = None # NOQA from nikola.plugin_categories import Command @@ -42,16 +42,19 @@ class CommandBootswatchTheme(Command): def run(self, *args): """Given a swatch name and a parent theme, creates a custom theme.""" if requests is None: - print('To use the install_theme command, you need to install the "requests" package.') + print('To use the install_theme command, you need to install the ' + '"requests" package.') return parser = OptionParser(usage="nikola %s [options]" % self.name) parser.add_option("-n", "--name", dest="name", - help="New theme name (default: custom)", default='custom') + help="New theme name (default: custom)", + default='custom') parser.add_option("-s", "--swatch", dest="swatch", - help="Name of the swatch from bootswatch.com (default: slate)", - default='slate') + help="Name of the swatch from bootswatch.com " + "(default: slate)", default='slate') parser.add_option("-p", "--parent", dest="parent", - help="Parent theme name (default: site)", default='site') + help="Parent theme name (default: site)", + default='site') (options, args) = parser.parse_args(list(args)) name = options.name @@ -68,11 +71,11 @@ class CommandBootswatchTheme(Command): url = 'http://bootswatch.com/%s/%s' % (swatch, fname) print("Downloading: ", url) data = requests.get(url).text - with open(os.path.join( - 'themes', name, 'assets', 'css', fname), 'wb+') as output: + with open(os.path.join('themes', name, 'assets', 'css', fname), + 'wb+') as output: output.write(data) with open(os.path.join('themes', name, 'parent'), 'wb+') as output: output.write(parent) - print('Theme created. Change the THEME setting to "%s" to use it.' - % name) + print('Theme created. Change the THEME setting to "%s" to use it.' % + name) diff --git a/nikola/plugins/command_build.py b/nikola/plugins/command_build.py index 867cbf9..29d4c9d 100644 --- a/nikola/plugins/command_build.py +++ b/nikola/plugins/command_build.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -60,5 +60,5 @@ def task_render_site(): args = args[1:] else: cmd = 'run' - os.system('doit %s -f %s -d . %s' % (cmd, dodo.name, ' '.join(args))) - + os.system('doit %s -f %s -d . %s' % (cmd, dodo.name, + ''.join(args))) diff --git a/nikola/plugins/command_check.py b/nikola/plugins/command_check.py index 5fc8bfe..ae19c41 100644 --- a/nikola/plugins/command_check.py +++ b/nikola/plugins/command_check.py @@ -30,7 +30,7 @@ try: from urllib import unquote from urlparse import urlparse except ImportError: - from urllib.parse import unquote, urlparse + from urllib.parse import unquote, urlparse # NOQA import lxml.html @@ -46,11 +46,10 @@ class CommandCheck(Command): """Check the generated site.""" parser = OptionParser(usage="nikola %s [options]" % self.name) parser.add_option('-l', '--check-links', dest='links', - action='store_true', - help='Check for dangling links.') + action='store_true', + help='Check for dangling links.') parser.add_option('-f', '--check-files', dest='files', - action='store_true', - help='Check for unknown files.') + action='store_true', help='Check for unknown files.') (options, args) = parser.parse_args(list(args)) if options.links: @@ -75,8 +74,7 @@ def analize(task): if parsed.fragment: target = target.split('#')[0] target_filename = os.path.abspath( - os.path.join(os.path.dirname(filename), - unquote(target))) + os.path.join(os.path.dirname(filename), unquote(target))) if target_filename not in existing_targets: if os.path.exists(target_filename): existing_targets.add(target_filename) @@ -96,13 +94,10 @@ def scan_links(): print("Checking Links:\n===============\n") for task in os.popen('nikola build list --all', 'r').readlines(): task = task.strip() - if task.split(':')[0] in ( - 'render_tags', - 'render_archive', - 'render_galleries', - 'render_indexes', - 'render_pages', - 'render_site') and '.html' in task: + if task.split(':')[0] in ('render_tags', 'render_archive', + 'render_galleries', 'render_indexes', + 'render_pages', + 'render_site') and '.html' in task: analize(task) diff --git a/nikola/plugins/command_console.py b/nikola/plugins/command_console.py index ea517a0..7a009fd 100644 --- a/nikola/plugins/command_console.py +++ b/nikola/plugins/command_console.py @@ -32,4 +32,4 @@ class Deploy(Command): name = "console" def run(self, *args): - os.system('python -i -c "from nikola.console import *"') + os.system('python -i -c "from nikola.console import *"') diff --git a/nikola/plugins/command_import_blogger.plugin b/nikola/plugins/command_import_blogger.plugin new file mode 100644 index 0000000..b275a7f --- /dev/null +++ b/nikola/plugins/command_import_blogger.plugin @@ -0,0 +1,10 @@ +[Core] +Name = import_blogger +Module = command_import_blogger + +[Documentation] +Author = Roberto Alsina +Version = 0.2 +Website = http://nikola.ralsina.com.ar +Description = Import a blogger site from a XML dump. + diff --git a/nikola/plugins/command_import_blogger.py b/nikola/plugins/command_import_blogger.py new file mode 100644 index 0000000..aea210a --- /dev/null +++ b/nikola/plugins/command_import_blogger.py @@ -0,0 +1,300 @@ +# Copyright (c) 2012 Roberto Alsina y otros. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from __future__ import unicode_literals, print_function +import codecs +import csv +import datetime +import os +from optparse import OptionParser +import time + +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse # NOQA + +try: + import feedparser +except ImportError: + feedparser = None # NOQA +from lxml import html +from mako.template import Template + +from nikola.plugin_categories import Command +from nikola import utils + +links = {} + + +class CommandImportBlogger(Command): + """Import a blogger dump.""" + + name = "import_blogger" + + @classmethod + def get_channel_from_file(cls, filename): + return feedparser.parse(filename) + + @staticmethod + def configure_redirections(url_map): + redirections = [] + for k, v in url_map.items(): + # remove the initial "/" because src is a relative file path + src = (urlparse(k).path + 'index.html')[1:] + dst = (urlparse(v).path) + if src == 'index.html': + print("Can't do a redirect for: %r" % k) + else: + redirections.append((src, dst)) + + return redirections + + def generate_base_site(self): + if not os.path.exists(self.output_folder): + os.system('nikola init --empty %s' % (self.output_folder, )) + else: + self.import_into_existing_site = True + print('The folder %s already exists - assuming that this is a ' + 'already existing nikola site.' % self.output_folder) + + conf_template = Template(filename=os.path.join( + os.path.dirname(utils.__file__), 'conf.py.in')) + + return conf_template + + @staticmethod + def populate_context(channel): + context = {} + context['DEFAULT_LANG'] = 'en' # blogger doesn't include the language + # in the dump + context['BLOG_TITLE'] = channel.feed.title + + context['BLOG_DESCRIPTION'] = '' # Missing in the dump + context['BLOG_URL'] = channel.feed.link.rstrip('/') + context['BLOG_EMAIL'] = channel.feed.author_detail.email + context['BLOG_AUTHOR'] = channel.feed.author_detail.name + context['POST_PAGES'] = '''( + ("posts/*.html", "posts", "post.tmpl", True), + ("stories/*.html", "stories", "story.tmpl", False), + )''' + context['POST_COMPILERS'] = '''{ + "rest": ('.txt', '.rst'), + "markdown": ('.md', '.mdown', '.markdown', '.wp'), + "html": ('.html', '.htm') + } + ''' + + return context + + @classmethod + def transform_content(cls, content): + # No transformations yet + return content + + @classmethod + def write_content(cls, filename, content): + doc = html.document_fromstring(content) + doc.rewrite_links(replacer) + + with open(filename, "wb+") as fd: + fd.write(html.tostring(doc, encoding='utf8')) + + @staticmethod + def write_metadata(filename, title, slug, post_date, description, tags): + with codecs.open(filename, "w+", "utf8") as fd: + fd.write('%s\n' % title) + fd.write('%s\n' % slug) + fd.write('%s\n' % post_date) + fd.write('%s\n' % ','.join(tags)) + fd.write('\n') + fd.write('%s\n' % description) + + def import_item(self, item, out_folder=None): + """Takes an item from the feed and creates a post file.""" + if out_folder is None: + out_folder = 'posts' + + # link is something like http://foo.com/2012/09/01/hello-world/ + # So, take the path, utils.slugify it, and that's our slug + link = item.link + link_path = urlparse(link).path + + title = item.title + + # blogger supports empty titles, which Nikola doesn't + if not title: + print("Warning: Empty title in post with URL %s. Using NO_TITLE " + "as placeholder, please fix." % link) + title = "NO_TITLE" + + if link_path.lower().endswith('.html'): + link_path = link_path[:-5] + + slug = utils.slugify(link_path) + + if not slug: # should never happen + print("Error converting post:", title) + return + + description = '' + post_date = datetime.datetime.fromtimestamp(time.mktime( + item.published_parsed)) + + for candidate in item.content: + if candidate.type == 'text/html': + content = candidate.value + break + # FIXME: handle attachments + + tags = [] + for tag in item.tags: + if tag.scheme == 'http://www.blogger.com/atom/ns#': + tags.append(tag.term) + + if item.get('app_draft'): + tags.append('draft') + is_draft = True + else: + is_draft = False + + self.url_map[link] = self.context['BLOG_URL'] + '/' + \ + out_folder + '/' + slug + '.html' + + if is_draft and self.exclude_drafts: + print('Draft "%s" will not be imported.' % (title, )) + elif content.strip(): + # If no content is found, no files are written. + content = self.transform_content(content) + + self.write_metadata(os.path.join(self.output_folder, out_folder, + slug + '.meta'), + title, slug, post_date, description, tags) + self.write_content( + os.path.join(self.output_folder, out_folder, slug + '.html'), + content) + else: + print('Not going to import "%s" because it seems to contain' + ' no content.' % (title, )) + + def process_item(self, item): + post_type = item.tags[0].term + + if post_type == 'http://schemas.google.com/blogger/2008/kind#post': + self.import_item(item, 'posts') + elif post_type == 'http://schemas.google.com/blogger/2008/kind#page': + self.import_item(item, 'stories') + elif post_type == ('http://schemas.google.com/blogger/2008/kind' + '#settings'): + # Ignore settings + pass + elif post_type == ('http://schemas.google.com/blogger/2008/kind' + '#template'): + # Ignore template + pass + elif post_type == ('http://schemas.google.com/blogger/2008/kind' + '#comment'): + # FIXME: not importing comments. Does blogger support "pages"? + pass + else: + print("Unknown post_type:", post_type) + + def import_posts(self, channel): + for item in channel.entries: + self.process_item(item) + + @staticmethod + def write_urlmap_csv(output_file, url_map): + with codecs.open(output_file, 'w+', 'utf8') as fd: + csv_writer = csv.writer(fd) + for item in url_map.items(): + csv_writer.writerow(item) + + def get_configuration_output_path(self): + if not self.import_into_existing_site: + filename = 'conf.py' + else: + filename = 'conf.py.wordpress_import-%s' % datetime.datetime.now( + ).strftime('%Y%m%d_%H%M%s') + config_output_path = os.path.join(self.output_folder, filename) + print('Configuration will be written to: %s' % config_output_path) + + return config_output_path + + @staticmethod + def write_configuration(filename, rendered_template): + with codecs.open(filename, 'w+', 'utf8') as fd: + fd.write(rendered_template) + + def run(self, *arguments): + """Import a Wordpress blog from an export file into a Nikola site.""" + # Parse the data + if feedparser is None: + print('To use the import_blogger command,' + ' you have to install the "feedparser" package.') + return + + parser = OptionParser( + usage="nikola %s [options] blogger_export_file" % self.name) + parser.add_option('-f', '--filename', dest='filename', + help='Blogger export file from which the import is ' + 'made.') + parser.add_option('-o', '--output-folder', dest='output_folder', + default='new_site', + help='The location into which the imported content ' + 'will be written') + parser.add_option('-d', '--no-drafts', dest='exclude_drafts', + default=False, action="store_true", help='Do not ' + 'import drafts.') + + (options, args) = parser.parse_args(list(arguments)) + + if not options.filename and args: + options.filename = args[0] + + if not options.filename: + parser.print_usage() + return + + self.blogger_export_file = options.filename + self.output_folder = options.output_folder + self.import_into_existing_site = False + self.exclude_drafts = options.exclude_drafts + self.url_map = {} + channel = self.get_channel_from_file(self.blogger_export_file) + self.context = self.populate_context(channel) + conf_template = self.generate_base_site() + self.context['REDIRECTIONS'] = self.configure_redirections( + self.url_map) + + self.import_posts(channel) + self.write_urlmap_csv( + os.path.join(self.output_folder, 'url_map.csv'), self.url_map) + + self.write_configuration(self.get_configuration_output_path( + ), conf_template.render(**self.context)) + + +def replacer(dst): + return links.get(dst, dst) diff --git a/nikola/plugins/command_import_wordpress.plugin b/nikola/plugins/command_import_wordpress.plugin index a2477b9..ff7cdca 100644 --- a/nikola/plugins/command_import_wordpress.plugin +++ b/nikola/plugins/command_import_wordpress.plugin @@ -4,7 +4,7 @@ Module = command_import_wordpress [Documentation] Author = Roberto Alsina -Version = 0.1 +Version = 0.2 Website = http://nikola.ralsina.com.ar Description = Import a wordpress site from a XML dump (requires markdown). diff --git a/nikola/plugins/command_import_wordpress.py b/nikola/plugins/command_import_wordpress.py index 1552da4..07028d8 100644 --- a/nikola/plugins/command_import_wordpress.py +++ b/nikola/plugins/command_import_wordpress.py @@ -25,20 +25,23 @@ from __future__ import unicode_literals, print_function import codecs import csv +import datetime import os import re +from optparse import OptionParser + try: from urlparse import urlparse except ImportError: - from urllib.parse import urlparse + from urllib.parse import urlparse # NOQA -from lxml import etree, html, builder +from lxml import etree, html from mako.template import Template try: import requests except ImportError: - requests = None + requests = None # NOQA from nikola.plugin_categories import Command from nikola import utils @@ -85,9 +88,14 @@ class CommandImportWordpress(Command): return redirections - @staticmethod - def generate_base_site(context): - os.system('nikola init new_site') + def generate_base_site(self): + if not os.path.exists(self.output_folder): + os.system('nikola init --empty %s' % (self.output_folder, )) + else: + self.import_into_existing_site = True + print('The folder %s already exists - assuming that this is a ' + 'already existing nikola site.' % self.output_folder) + conf_template = Template(filename=os.path.join( os.path.dirname(utils.__file__), 'conf.py.in')) @@ -128,14 +136,18 @@ class CommandImportWordpress(Command): @staticmethod def download_url_content_to_file(url, dst_path): - with open(dst_path, 'wb+') as fd: - fd.write(requests.get(url).content) + try: + with open(dst_path, 'wb+') as fd: + fd.write(requests.get(url).content) + except requests.exceptions.ConnectionError as err: + print("Downloading %s to %s failed: %s" % (url, dst_path, err)) def import_attachment(self, item, wordpress_namespace): - url = get_text_tag(item, '{%s}attachment_url' % wordpress_namespace, 'foo') + url = get_text_tag( + item, '{%s}attachment_url' % wordpress_namespace, 'foo') link = get_text_tag(item, '{%s}link' % wordpress_namespace, 'foo') path = urlparse(url).path - dst_path = os.path.join(*(['new_site', 'files'] + dst_path = os.path.join(*([self.output_folder, 'files'] + list(path.split('/')))) dst_dir = os.path.dirname(dst_path) if not os.path.isdir(dst_dir): @@ -147,23 +159,32 @@ class CommandImportWordpress(Command): links[url] = '/' + dst_url @staticmethod - def write_content(filename, content): + def transform_sourcecode(content): + new_content = re.sub('\[sourcecode language="([^"]+)"\]', + "\n~~~~~~~~~~~~{.\\1}\n", content) + new_content = new_content.replace('[/sourcecode]', + "\n~~~~~~~~~~~~\n") + return new_content + + @staticmethod + def transform_caption(content): + new_caption = re.sub(r'\[/caption\]', '', content) + new_caption = re.sub(r'\[caption.*\]', '', new_caption) + + return new_caption + + @classmethod + def transform_content(cls, content): + new_content = cls.transform_sourcecode(content) + return cls.transform_caption(new_content) + + @classmethod + def write_content(cls, filename, content): + doc = html.document_fromstring(content) + doc.rewrite_links(replacer) + with open(filename, "wb+") as fd: - if content.strip(): - # Handle sourcecode pseudo-tags - content = re.sub('\[sourcecode language="([^"]+)"\]', - "\n~~~~~~~~~~~~{.\\1}\n", content) - content = content.replace('[/sourcecode]', "\n~~~~~~~~~~~~\n") - doc = html.document_fromstring(content) - doc.rewrite_links(replacer) - # Replace H1 elements with H2 elements - for tag in doc.findall('.//h1'): - if not tag.text: - print("Failed to fix bad title: %r" % - html.tostring(tag)) - else: - tag.getparent().replace(tag, builder.E.h2(tag.text)) - fd.write(html.tostring(doc, encoding='utf8')) + fd.write(html.tostring(doc, encoding='utf8')) @staticmethod def write_metadata(filename, title, slug, post_date, description, tags): @@ -186,22 +207,30 @@ class CommandImportWordpress(Command): link = get_text_tag(item, 'link', None) slug = utils.slugify(urlparse(link).path) if not slug: # it happens if the post has no "nice" URL - slug = get_text_tag(item, '{%s}post_name' % wordpress_namespace, None) + slug = get_text_tag( + item, '{%s}post_name' % wordpress_namespace, None) if not slug: # it *may* happen - slug = get_text_tag(item, '{%s}post_id' % wordpress_namespace, None) + slug = get_text_tag( + item, '{%s}post_id' % wordpress_namespace, None) if not slug: # should never happen print("Error converting post:", title) return description = get_text_tag(item, 'description', '') - post_date = get_text_tag(item, '{%s}post_date' % wordpress_namespace, None) - status = get_text_tag(item, '{%s}status' % wordpress_namespace, 'publish') + post_date = get_text_tag( + item, '{%s}post_date' % wordpress_namespace, None) + status = get_text_tag( + item, '{%s}status' % wordpress_namespace, 'publish') content = get_text_tag( item, '{http://purl.org/rss/1.0/modules/content/}encoded', '') tags = [] if status != 'publish': tags.append('draft') + is_draft = True + else: + is_draft = False + for tag in item.findall('category'): text = tag.text if text == 'Uncategorized': @@ -211,17 +240,28 @@ class CommandImportWordpress(Command): self.url_map[link] = self.context['BLOG_URL'] + '/' + \ out_folder + '/' + slug + '.html' - self.write_metadata(os.path.join('new_site', out_folder, - slug + '.meta'), - title, slug, post_date, description, tags) - self.write_content( - os.path.join('new_site', out_folder, slug + '.wp'), content) + if is_draft and self.exclude_drafts: + print('Draft "%s" will not be imported.' % (title, )) + elif content.strip(): + # If no content is found, no files are written. + content = self.transform_content(content) + + self.write_metadata(os.path.join(self.output_folder, out_folder, + slug + '.meta'), + title, slug, post_date, description, tags) + self.write_content( + os.path.join(self.output_folder, out_folder, slug + '.wp'), + content) + else: + print('Not going to import "%s" because it seems to contain' + ' no content.' % (title, )) def process_item(self, item): # The namespace usually is something like: # http://wordpress.org/export/1.2/ wordpress_namespace = item.nsmap['wp'] - post_type = get_text_tag(item, '{%s}post_type' % wordpress_namespace, 'post') + post_type = get_text_tag( + item, '{%s}post_type' % wordpress_namespace, 'post') if post_type == 'attachment': self.import_attachment(item, wordpress_namespace) @@ -241,32 +281,68 @@ class CommandImportWordpress(Command): for item in url_map.items(): csv_writer.writerow(item) + def get_configuration_output_path(self): + if not self.import_into_existing_site: + filename = 'conf.py' + else: + filename = 'conf.py.wordpress_import-%s' % datetime.datetime.now( + ).strftime('%Y%m%d_%H%M%s') + config_output_path = os.path.join(self.output_folder, filename) + print('Configuration will be written to: %s' % config_output_path) + + return config_output_path + @staticmethod def write_configuration(filename, rendered_template): with codecs.open(filename, 'w+', 'utf8') as fd: fd.write(rendered_template) - def run(self, fname=None): + def run(self, *arguments): + """Import a Wordpress blog from an export file into a Nikola site.""" # Parse the data if requests is None: - print('To use the import_wordpress command, you have to install the "requests" package.') + print('To use the import_wordpress command,' + ' you have to install the "requests" package.') return - if fname is None: - print("Usage: nikola import_wordpress wordpress_dump.xml") + + parser = OptionParser(usage="nikola %s [options] " + "wordpress_export_file" % self.name) + parser.add_option('-f', '--filename', dest='filename', + help='WordPress export file from which the import ' + 'made.') + parser.add_option('-o', '--output-folder', dest='output_folder', + default='new_site', help='The location into which ' + 'the imported content will be written') + parser.add_option('-d', '--no-drafts', dest='exclude_drafts', + default=False, action="store_true", help='Do not ' + 'import drafts.') + + (options, args) = parser.parse_args(list(arguments)) + + if not options.filename and args: + options.filename = args[0] + + if not options.filename: + parser.print_usage() return + self.wordpress_export_file = options.filename + self.output_folder = options.output_folder + self.import_into_existing_site = False + self.exclude_drafts = options.exclude_drafts self.url_map = {} - channel = self.get_channel_from_file(fname) + channel = self.get_channel_from_file(self.wordpress_export_file) self.context = self.populate_context(channel) - conf_template = self.generate_base_site(self.context) + conf_template = self.generate_base_site() self.context['REDIRECTIONS'] = self.configure_redirections( self.url_map) self.import_posts(channel) self.write_urlmap_csv( - os.path.join('new_site', 'url_map.csv'), self.url_map) - self.write_configuration(os.path.join( - 'new_site', 'conf.py'), conf_template.render(**self.context)) + os.path.join(self.output_folder, 'url_map.csv'), self.url_map) + + self.write_configuration(self.get_configuration_output_path( + ), conf_template.render(**self.context)) def replacer(dst): diff --git a/nikola/plugins/command_init.plugin b/nikola/plugins/command_init.plugin index 3c6bd21..f4adf4a 100644 --- a/nikola/plugins/command_init.plugin +++ b/nikola/plugins/command_init.plugin @@ -4,7 +4,6 @@ Module = command_init [Documentation] Author = Roberto Alsina -Version = 0.1 +Version = 0.2 Website = http://nikola.ralsina.com.ar Description = Create a new site. - diff --git a/nikola/plugins/command_init.py b/nikola/plugins/command_init.py index 0b56482..e9bd001 100644 --- a/nikola/plugins/command_init.py +++ b/nikola/plugins/command_init.py @@ -23,7 +23,7 @@ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from __future__ import print_function -from optparse import OptionParser +from optparse import OptionParser, OptionGroup import os import shutil import codecs @@ -61,14 +61,50 @@ The destination folder must not exist. 'POST_COMPILERS': """{ "rest": ('.txt', '.rst'), "markdown": ('.md', '.mdown', '.markdown'), + "textile": ('.textile',), + "txt2tags": ('.t2t',), + "bbcode": ('.bb',), + "wiki": ('.wiki',), + "ipynb": ('.ipynb',), "html": ('.html', '.htm') }""", 'REDIRECTIONS': '[]', - } + } + + @classmethod + def copy_sample_site(cls, target): + lib_path = cls.get_path_to_nikola_modules() + src = os.path.join(lib_path, 'data', 'samplesite') + shutil.copytree(src, target) + + @classmethod + def create_configuration(cls, target): + lib_path = cls.get_path_to_nikola_modules() + template_path = os.path.join(lib_path, 'conf.py.in') + conf_template = Template(filename=template_path) + conf_path = os.path.join(target, 'conf.py') + with codecs.open(conf_path, 'w+', 'utf8') as fd: + fd.write(conf_template.render(**cls.SAMPLE_CONF)) + + @classmethod + def create_empty_site(cls, target): + for folder in ('files', 'galleries', 'listings', 'posts', 'stories'): + os.makedirs(os.path.join(target, folder)) + + @staticmethod + def get_path_to_nikola_modules(): + return os.path.dirname(nikola.__file__) def run(self, *args): """Create a new site.""" parser = OptionParser(usage=self.usage) + group = OptionGroup(parser, "Site Options") + group.add_option( + "--empty", action="store_true", dest='empty', default=True, + help="Create an empty site with only a config.") + group.add_option("--demo", action="store_false", dest='empty', + help="Create a site filled with example data.") + parser.add_option_group(group) (options, args) = parser.parse_args(list(args)) if not args: @@ -78,17 +114,13 @@ The destination folder must not exist. if target is None: print(self.usage) else: - # copy sample data - lib_path = os.path.dirname(nikola.__file__) - src = os.path.join(lib_path, 'data', 'samplesite') - shutil.copytree(src, target) - # create conf.py - template_path = os.path.join(lib_path, 'conf.py.in') - conf_template = Template(filename=template_path) - conf_path = os.path.join(target, 'conf.py') - with codecs.open(conf_path, 'w+', 'utf8') as fd: - fd.write(conf_template.render(**self.SAMPLE_CONF)) - - print("A new site with some sample data has been created at %s." - % target) - print("See README.txt in that folder for more information.") + if options.empty: + self.create_empty_site(target) + print('Created empty site at %s.' % target) + else: + self.copy_sample_site(target) + print("A new site with example data has been created at %s." + % target) + print("See README.txt in that folder for more information.") + + self.create_configuration(target) diff --git a/nikola/plugins/command_install_theme.py b/nikola/plugins/command_install_theme.py index b9ca634..0dc000b 100644 --- a/nikola/plugins/command_install_theme.py +++ b/nikola/plugins/command_install_theme.py @@ -26,12 +26,12 @@ from __future__ import print_function from optparse import OptionParser import os import json -from io import StringIO +from io import BytesIO try: import requests except ImportError: - requests = None + requests = None # NOQA from nikola.plugin_categories import Command from nikola import utils @@ -45,18 +45,19 @@ class CommandInstallTheme(Command): def run(self, *args): """Install theme into current site.""" if requests is None: - print('To use the install_theme command, you need to install the "requests" package.') + print('To use the install_theme command, you need to install the ' + '"requests" package.') return parser = OptionParser(usage="nikola %s [options]" % self.name) - parser.add_option("-l", "--list", dest="list", - action="store_true", - help="Show list of available themes.") - parser.add_option("-n", "--name", dest="name", - help="Theme name", default=None) - parser.add_option("-u", "--url", dest="url", - help="URL for the theme repository" - "(default: http://nikola.ralsina.com.ar/themes/index.json)", - default='http://nikola.ralsina.com.ar/themes/index.json') + parser.add_option("-l", "--list", dest="list", action="store_true", + help="Show list of available themes.") + parser.add_option("-n", "--name", dest="name", help="Theme name", + default=None) + parser.add_option("-u", "--url", dest="url", help="URL for the theme " + "repository" "(default: " + "http://nikola.ralsina.com.ar/themes/index.json)", + default='http://nikola.ralsina.com.ar/themes/' + 'index.json') (options, args) = parser.parse_args(list(args)) listing = options.list @@ -84,7 +85,7 @@ class CommandInstallTheme(Command): except: raise OSError("mkdir 'theme' error!") print('Downloading: %s' % data[name]) - zip_file = StringIO() + zip_file = BytesIO() zip_file.write(requests.get(data[name]).content) print('Extracting: %s into themes' % name) utils.extract_all(zip_file) diff --git a/nikola/plugins/command_new_post.py b/nikola/plugins/command_new_post.py index 36026be..a5715de 100644 --- a/nikola/plugins/command_new_post.py +++ b/nikola/plugins/command_new_post.py @@ -33,6 +33,30 @@ from nikola.plugin_categories import Command from nikola import utils +def filter_post_pages(compiler, is_post, post_compilers, post_pages): + """Given a compiler ("markdown", "rest"), and whether it's meant for + a post or a page, and post_compilers, return the correct entry from + post_pages.""" + + # First throw away all the post_pages with the wrong is_post + filtered = [entry for entry in post_pages if entry[3] == is_post] + + # These are the extensions supported by the required format + extensions = post_compilers[compiler] + + # Throw away the post_pages with the wrong extensions + filtered = [entry for entry in filtered if any([ext in entry[0] for ext in + extensions])] + + if not filtered: + type_name = "post" if is_post else "page" + raise Exception("Can't find a way, using your configuration, to create" + "a %s in format %s. You may want to tweak " + "post_compilers or post_pages in conf.py" % + (type_name, compiler)) + return filtered[0] + + class CommandNewPost(Command): """Create a new post.""" @@ -40,23 +64,30 @@ class CommandNewPost(Command): def run(self, *args): """Create a new post.""" + + compiler_names = [p.name for p in + self.site.plugin_manager.getPluginsOfCategory( + "PageCompiler")] + parser = OptionParser(usage="nikola %s [options]" % self.name) - parser.add_option('-p', '--page', dest='is_post', - action='store_false', - help='Create a page instead of a blog post.') - parser.add_option('-t', '--title', dest='title', - help='Title for the page/post.', default=None) - parser.add_option('--tags', dest='tags', - help='Comma-separated tags for the page/post.', - default='') - parser.add_option('-1', dest='onefile', - action='store_true', - help='Create post with embedded metadata (single file format).', - default=self.site.config.get('ONE_FILE_POSTS', True)) - parser.add_option('-f', '--format', - dest='post_format', - default='rest', - help='Format for post (rest or markdown)') + parser.add_option('-p', '--page', dest='is_post', action='store_false', + default=True, help='Create a page instead of a blog ' + 'post.') + parser.add_option('-t', '--title', dest='title', help='Title for the ' + 'page/post.', default=None) + parser.add_option('--tags', dest='tags', help='Comma-separated tags ' + 'for the page/post.', default='') + parser.add_option('-1', dest='onefile', action='store_true', + help='Create post with embedded metadata (single ' + 'file format).', + default=self.site.config.get('ONE_FILE_POSTS', True)) + parser.add_option('-2', dest='onefile', action='store_false', + help='Create post with separate metadata (two file ' + 'format).', + default=self.site.config.get('ONE_FILE_POSTS', True)) + parser.add_option('-f', '--format', dest='post_format', default='rest', + help='Format for post (one of %s)' % + ','.join(compiler_names)) (options, args) = parser.parse_args(list(args)) is_post = options.is_post @@ -64,62 +95,45 @@ class CommandNewPost(Command): tags = options.tags onefile = options.onefile post_format = options.post_format + if post_format not in compiler_names: + print("ERROR: Unknown post format %s" % post_format) + return + compiler_plugin = self.site.plugin_manager.getPluginByName( + post_format, "PageCompiler").plugin_object # Guess where we should put this - for path, _, _, use_in_rss in self.site.config['post_pages']: - if use_in_rss == is_post: - break - else: - path = self.site.config['post_pages'][0][0] + entry = filter_post_pages(post_format, is_post, + self.site.config['post_compilers'], + self.site.config['post_pages']) print("Creating New Post") print("-----------------\n") if title is None: - print("Enter title: ") - title = sys.stdin.readline().decode(sys.stdin.encoding).strip() + print("Enter title: ", end='') + title = sys.stdin.readline() else: print("Title: ", title) + if isinstance(title, bytes): + title = title.decode(sys.stdin.encoding) + title = title.strip() slug = utils.slugify(title) date = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') - data = [ - title, - slug, - date, - tags - ] - output_path = os.path.dirname(path) + data = [title, slug, date, tags] + output_path = os.path.dirname(entry[0]) meta_path = os.path.join(output_path, slug + ".meta") - pattern = os.path.basename(path) - if pattern.startswith("*."): - suffix = pattern[1:] - else: - suffix = ".txt" + pattern = os.path.basename(entry[0]) + suffix = pattern[1:] txt_path = os.path.join(output_path, slug + suffix) if (not onefile and os.path.isfile(meta_path)) or \ - os.path.isfile(txt_path): + os.path.isfile(txt_path): print("The title already exists!") exit() + compiler_plugin.create_post(txt_path, onefile, title, slug, date, tags) - if onefile: - if post_format not in ('rest', 'markdown'): - print("ERROR: Unknown post format %s" % post_format) - return - with codecs.open(txt_path, "wb+", "utf8") as fd: - if post_format == 'markdown': - fd.write('<!-- \n') - fd.write('.. title: %s\n' % title) - fd.write('.. slug: %s\n' % slug) - fd.write('.. date: %s\n' % date) - fd.write('.. tags: %s\n' % tags) - fd.write('.. link: \n') - fd.write('.. description: \n') - if post_format == 'markdown': - fd.write('-->\n') - fd.write("\nWrite your post here.") - else: + if not onefile: # write metadata file with codecs.open(meta_path, "wb+", "utf8") as fd: - fd.write(data) + fd.write('\n'.join(data)) with codecs.open(txt_path, "wb+", "utf8") as fd: fd.write("Write your post here.") print("Your post's metadata is at: ", meta_path) diff --git a/nikola/plugins/command_serve.py b/nikola/plugins/command_serve.py index 628bba0..75e07a9 100644 --- a/nikola/plugins/command_serve.py +++ b/nikola/plugins/command_serve.py @@ -29,8 +29,8 @@ try: from BaseHTTPServer import HTTPServer from SimpleHTTPServer import SimpleHTTPRequestHandler except ImportError: - from http.server import HTTPServer - from http.server import SimpleHTTPRequestHandler + from http.server import HTTPServer # NOQA + from http.server import SimpleHTTPRequestHandler # NOQA from nikola.plugin_categories import Command @@ -44,12 +44,10 @@ class CommandBuild(Command): """Start test server.""" parser = OptionParser(usage="nikola %s [options]" % self.name) - parser.add_option("-p", "--port", dest="port", - help="Port numer (default: 8000)", default=8000, - type="int") - parser.add_option("-a", "--address", dest="address", - help="Address to bind (default: 127.0.0.1)", - default='127.0.0.1') + parser.add_option("-p", "--port", dest="port", help="Port numer " + "(default: 8000)", default=8000, type="int") + parser.add_option("-a", "--address", dest="address", help="Address to " + "bind (default: 127.0.0.1)", default='127.0.0.1') (options, args) = parser.parse_args(list(args)) out_dir = self.site.config['OUTPUT_FOLDER'] @@ -58,9 +56,9 @@ class CommandBuild(Command): else: os.chdir(out_dir) httpd = HTTPServer((options.address, options.port), - OurHTTPRequestHandler) + OurHTTPRequestHandler) sa = httpd.socket.getsockname() - print("Serving HTTP on", sa[0], "port", sa[1], "...") + print("Serving HTTP on {0[0]} port {0[1]}...".format(sa)) httpd.serve_forever() diff --git a/nikola/plugins/compile_bbcode.plugin b/nikola/plugins/compile_bbcode.plugin new file mode 100644 index 0000000..ec3ce2b --- /dev/null +++ b/nikola/plugins/compile_bbcode.plugin @@ -0,0 +1,10 @@ +[Core] +Name = bbcode +Module = compile_bbcode + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile BBCode into HTML + diff --git a/nikola/plugins/compile_bbcode.py b/nikola/plugins/compile_bbcode.py new file mode 100644 index 0000000..fd7fe1a --- /dev/null +++ b/nikola/plugins/compile_bbcode.py @@ -0,0 +1,78 @@ +# Copyright (c) 2012 Roberto Alsina y otros. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Implementation of compile_html based on bbcode.""" + +import codecs +import os + +try: + import bbcode +except ImportError: + bbcode = None # NOQA + +from nikola.plugin_categories import PageCompiler + + +class CompileTextile(PageCompiler): + """Compile bbcode into HTML.""" + + name = "bbcode" + + def __init__(self): + if bbcode is None: + return + self.parser = bbcode.Parser() + self.parser.add_simple_formatter("note", "") + + def compile_html(self, source, dest): + if bbcode is None: + raise Exception('To build this site, you need to install the ' + '"bbcode" package.') + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + output = self.parser.format(data) + out_file.write(output) + + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + d_name = os.path.dirname(path) + if not os.path.isdir(d_name): + os.makedirs(os.path.dirname(path)) + with codecs.open(path, "wb+", "utf8") as fd: + if onefile: + fd.write('[note]<!--\n') + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n') + fd.write('-->[/note]\n\n') + fd.write("\nWrite your post here.") diff --git a/nikola/plugins/compile_html.py b/nikola/plugins/compile_html.py index 24e01fb..850a3e5 100644 --- a/nikola/plugins/compile_html.py +++ b/nikola/plugins/compile_html.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -26,7 +26,7 @@ import os import shutil - +import codecs from nikola.plugin_categories import PageCompiler @@ -39,6 +39,23 @@ class CompileHtml(PageCompiler): def compile_html(self, source, dest): try: os.makedirs(os.path.dirname(dest)) - except: + except Exception: pass shutil.copyfile(source, dest) + + def create_post(self, path, onefile=False, title="", slug="", + date="", tags=""): + d_name = os.path.dirname(path) + if not os.path.isdir(d_name): + os.makedirs(os.path.dirname(path)) + with codecs.open(path, "wb+", "utf8") as fd: + if onefile: + fd.write('<!-- \n') + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n') + fd.write('-->\n\n') + fd.write("\n<p>Write your post here.</p>") diff --git a/nikola/plugins/compile_markdown/__init__.py b/nikola/plugins/compile_markdown/__init__.py index 1a58a98..5eb25c8 100644 --- a/nikola/plugins/compile_markdown/__init__.py +++ b/nikola/plugins/compile_markdown/__init__.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -31,7 +31,7 @@ import re try: from markdown import markdown except ImportError: - markdown = None + markdown = None # NOQA from nikola.plugin_categories import PageCompiler @@ -43,7 +43,8 @@ class CompileMarkdown(PageCompiler): def compile_html(self, source, dest): if markdown is None: - raise Exception('To build this site, you need to install the "markdown" package.') + raise Exception('To build this site, you need to install the ' + '"markdown" package.') try: os.makedirs(os.path.dirname(dest)) except: @@ -52,11 +53,27 @@ class CompileMarkdown(PageCompiler): with codecs.open(source, "r", "utf8") as in_file: data = in_file.read() output = markdown(data, ['fenced_code', 'codehilite']) - # remove the H1 because there is "title" h1. - output = re.sub(r'<h1>.*</h1>', '', output) + # h1 is reserved for the title so increment all header levels + for n in reversed(range(1, 9)): + output = re.sub('<h%i>' % n, '<h%i>' % (n + 1), output) + output = re.sub('</h%i>' % n, '</h%i>' % (n + 1), output) # python-markdown's highlighter uses the class 'codehilite' to wrap # code, # instead of the standard 'code'. None of the standard # pygments stylesheets use this class, so swap it to be 'code' output = re.sub(r'(<div[^>]+class="[^"]*)codehilite([^>]+)', r'\1code\2', output) out_file.write(output) + + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + with codecs.open(path, "wb+", "utf8") as fd: + if onefile: + fd.write('<!-- \n') + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n') + fd.write('-->\n\n') + fd.write("\nWrite your post here.") diff --git a/nikola/plugins/compile_rest/__init__.py b/nikola/plugins/compile_rest/__init__.py index 0e677e1..4191add 100644 --- a/nikola/plugins/compile_rest/__init__.py +++ b/nikola/plugins/compile_rest/__init__.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -38,8 +38,12 @@ directives.register_directive('listing', listings_directive) from .youtube import youtube directives.register_directive('youtube', youtube) +from .vimeo import vimeo +directives.register_directive('vimeo', vimeo) from .slides import slides directives.register_directive('slides', slides) +from .gist_directive import GitHubGist +directives.register_directive('gist', GitHubGist) from nikola.plugin_categories import PageCompiler @@ -59,23 +63,33 @@ class CompileRest(PageCompiler): with codecs.open(dest, "w+", "utf8") as out_file: with codecs.open(source, "r", "utf8") as in_file: data = in_file.read() - output, error_level = rst2html(data, - settings_overrides={'initial_header_level': 2}) + output, error_level = rst2html( + data, settings_overrides={'initial_header_level': 2}) out_file.write(output) if error_level < 3: return True else: return False + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + with codecs.open(path, "wb+", "utf8") as fd: + if onefile: + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n\n') + fd.write("\nWrite your post here.") + def rst2html(source, source_path=None, source_class=docutils.io.StringInput, - destination_path=None, - reader=None, reader_name='standalone', - parser=None, parser_name='restructuredtext', - writer=None, writer_name='html', - settings=None, settings_spec=None, - settings_overrides=None, config_section=None, - enable_exit_status=None): + destination_path=None, reader=None, reader_name='standalone', + parser=None, parser_name='restructuredtext', writer=None, + writer_name='html', settings=None, settings_spec=None, + settings_overrides=None, config_section=None, + enable_exit_status=None): """ Set up & run a `Publisher`, and return a dictionary of document parts. Dictionary keys are the names of parts, and values are Unicode strings; diff --git a/nikola/plugins/compile_rest/gist_directive.py b/nikola/plugins/compile_rest/gist_directive.py new file mode 100644 index 0000000..3bfe818 --- /dev/null +++ b/nikola/plugins/compile_rest/gist_directive.py @@ -0,0 +1,56 @@ +# This file is public domain according to its author, Brian Hsu + +from docutils.parsers.rst import Directive, directives +from docutils import nodes + +try: + import requests +except ImportError: + requests = None # NOQA + + +class GitHubGist(Directive): + """ Embed GitHub Gist. + + Usage: + .. gist:: GIST_ID + + """ + + required_arguments = 1 + optional_arguments = 1 + option_spec = {'file': directives.unchanged} + final_argument_whitespace = True + has_content = False + + def get_raw_gist_with_filename(self, gistID, filename): + url = "https://raw.github.com/gist/%s/%s" % (gistID, filename) + return requests.get(url).text + + def get_raw_gist(self, gistID): + url = "https://raw.github.com/gist/%s/" % (gistID) + return requests.get(url).text + + def run(self): + if requests is None: + print('To use the gist directive, you need to install the ' + '"requests" package.') + return [] + gistID = self.arguments[0].strip() + embedHTML = "" + rawGist = "" + + if 'file' in self.options: + filename = self.options['file'] + rawGist = (self.get_raw_gist_with_filename(gistID, filename)) + embedHTML = ('<script src="https://gist.github.com/%s.js?file=%s">' + '</script>') % (gistID, filename) + else: + rawGist = (self.get_raw_gist(gistID)) + embedHTML = ('<script src="https://gist.github.com/%s.js">' + '</script>') % gistID + + return [nodes.raw('', embedHTML, format='html'), + nodes.raw('', '<noscript>', format='html'), + nodes.literal_block('', rawGist), + nodes.raw('', '</noscript>', format='html')] diff --git a/nikola/plugins/compile_rest/pygments_code_block_directive.py b/nikola/plugins/compile_rest/pygments_code_block_directive.py index a83098f..f858427 100644 --- a/nikola/plugins/compile_rest/pygments_code_block_directive.py +++ b/nikola/plugins/compile_rest/pygments_code_block_directive.py @@ -36,9 +36,9 @@ import codecs from copy import copy
import os
try:
- from urlparse import urlparse, urlunsplit
+ from urlparse import urlunsplit
except ImportError:
- from urllib.parse import urlparse, urlunsplit
+ from urllib.parse import urlunsplit # NOQA
from docutils import nodes, core
from docutils.parsers.rst import directives
@@ -96,8 +96,7 @@ class DocutilsInterface(object): try:
if self.language and str(self.language).lower() != 'none':
lexer = get_lexer_by_name(self.language.lower(),
- **self.custom_args
- )
+ **self.custom_args)
else:
lexer = get_lexer_by_name('text', **self.custom_args)
except ValueError:
@@ -136,7 +135,7 @@ class DocutilsInterface(object): # ::
def code_block_directive(name, arguments, options, content, lineno,
- content_offset, block_text, state, state_machine):
+ content_offset, block_text, state, state_machine):
"""Parse and classify content of a code_block."""
if 'include' in options:
try:
@@ -173,8 +172,8 @@ def code_block_directive(name, arguments, options, content, lineno, # Move the after_index to the beginning of the line with the
# match.
for char in content[after_index:0:-1]:
- # codecs always opens binary. This works with '\n',
- # '\r' and '\r\n'. We are going backwards, so
+ # codecs always opens binary. This works with '\n',
+ # '\r' and '\r\n'. We are going backwards, so
# '\n' is found first in '\r\n'.
# Going with .splitlines() seems more appropriate
# but needs a few more changes.
@@ -197,7 +196,7 @@ def code_block_directive(name, arguments, options, content, lineno, 'code-block directive:\nText not found.' %
options['start-after'])
line_offset = len(content[:after_index +
- len(after_text)].splitlines())
+ len(after_text)].splitlines())
content = content[after_index + len(after_text):]
# same changes here for the same reason
@@ -249,7 +248,7 @@ def code_block_directive(name, arguments, options, content, lineno, lnwidth = len(str(total_lines))
fstr = "\n%%%dd " % lnwidth
code_block += nodes.inline(fstr[1:] % lineno, fstr[1:] % lineno,
- classes=['linenumber'])
+ classes=['linenumber'])
# parse content with pygments and add to code_block element
content = content.rstrip()
@@ -260,6 +259,9 @@ def code_block_directive(name, arguments, options, content, lineno, l = list(DocutilsInterface(content, language, options))
if l[-1] == ('', '\n'):
l = l[:-1]
+ # We strip last element for the same reason (trailing \n looks bad)
+ if l:
+ l[-1] = (l[-1][0], l[-1][1].rstrip())
for cls, value in l:
if withln and "\n" in value:
# Split on the "\n"s
@@ -271,7 +273,7 @@ def code_block_directive(name, arguments, options, content, lineno, for chunk, ln in zip(values, linenos)[1:]:
if ln <= total_lines:
code_block += nodes.inline(fstr % ln, fstr % ln,
- classes=['linenumber'])
+ classes=['linenumber'])
code_block += nodes.Text(chunk, chunk)
lineno += len(values) - 1
@@ -317,8 +319,8 @@ def string_bool(argument): elif argument.lower() == 'false':
return False
else:
- raise ValueError('"%s" unknown; choose from "True" or "False"'
- % argument)
+ raise ValueError('"%s" unknown; choose from "True" or "False"' %
+ argument)
def csharp_unicodelevel(argument):
@@ -339,9 +341,9 @@ def listings_directive(name, arguments, options, content, lineno, options['include'] = os.path.join('listings', fname)
target = urlunsplit(("link", 'listing', fname, '', ''))
generated_nodes = [core.publish_doctree('`%s <%s>`_' % (fname, target))[0]]
- generated_nodes += code_block_directive(name, [arguments[1]],
- options, content, lineno, content_offset, block_text,
- state, state_machine)
+ generated_nodes += code_block_directive(name, [arguments[1]], options,
+ content, lineno, content_offset,
+ block_text, state, state_machine)
return generated_nodes
code_block_directive.arguments = (1, 0, 1)
diff --git a/nikola/plugins/compile_rest/slides.py b/nikola/plugins/compile_rest/slides.py index 942a7d4..c9d55f3 100644 --- a/nikola/plugins/compile_rest/slides.py +++ b/nikola/plugins/compile_rest/slides.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -27,10 +27,9 @@ import json from docutils import nodes from docutils.parsers.rst import Directive, directives -class slides(Directive): +class slides(Directive): """ Restructured text extension for inserting slideshows.""" - has_content = True option_spec = { "preload": directives.flag, @@ -60,12 +59,13 @@ class slides(Directive): "animationStart": directives.unchanged, "animationComplete": directives.unchanged, } - + def run(self): if len(self.content) == 0: return - for opt in ("preload", "generateNextPrev", "pagination", "generatePagination", - "crossfade", "randomize", "hoverPause", "autoHeight", "bigTarget"): + for opt in ("preload", "generateNextPrev", "pagination", + "generatePagination", "crossfade", "randomize", + "hoverPause", "autoHeight", "bigTarget"): if opt in self.options: self.options[opt] = True options = { @@ -73,17 +73,19 @@ class slides(Directive): "bigTarget": True, "paginationClass": "pager", "currentClass": "slide-current" - } + } options.update(self.options) options = json.dumps(options) output = [] - output.append("""<script> $(function(){ $("#slides").slides(%s); }); </script>""" % options) - output.append("""<div id="slides" class="slides"><div class="slides_container">""") + output.append('<script> $(function(){ $("#slides").slides(%s); });' + '</script>' % options) + output.append('<div id="slides" class="slides"><div ' + 'class="slides_container">') for image in self.content: output.append("""<div><img src="%s"></div>""" % image) output.append("""</div></div>""") - + return [nodes.raw('', '\n'.join(output), format='html')] - + directives.register_directive('slides', slides) diff --git a/nikola/plugins/compile_rest/vimeo.py b/nikola/plugins/compile_rest/vimeo.py new file mode 100644 index 0000000..3eefcc4 --- /dev/null +++ b/nikola/plugins/compile_rest/vimeo.py @@ -0,0 +1,92 @@ +# Copyright (c) 2012 Roberto Alsina y otros. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from docutils import nodes +from docutils.parsers.rst import directives + +try: + import requests +except ImportError: + requests = None # NOQA +try: + import json # python 2.6 or higher +except ImportError: + try: + import simplejson as json # NOQA + except ImportError: + json = None + +CODE = """<iframe src="http://player.vimeo.com/video/%(vimeo_id)s" +width="%(width)s" height="%(height)s" +frameborder="0" webkitAllowFullScreen mozallowfullscreen allowFullScreen> +</iframe> +""" + +VIDEO_DEFAULT_HEIGHT = 500 +VIDEO_DEFAULT_WIDTH = 281 + + +def vimeo(name, args, options, content, lineno, contentOffset, blockText, + state, stateMachine): + """ Restructured text extension for inserting vimeo embedded videos """ + if requests is None: + raise Exception("To use the Vimeo directive you need to install the " + "requests module.") + if json is None: + raise Exception("To use the Vimeo directive you need python 2.6 or to " + "install the simplejson module.") + if len(content) == 0: + return + + string_vars = {'vimeo_id': content[0]} + extra_args = content[1:] # Because content[0] is ID + extra_args = [ea.strip().split("=") for ea in extra_args] # key=value + extra_args = [ea for ea in extra_args if len(ea) == 2] # drop bad lines + extra_args = dict(extra_args) + if 'width' in extra_args: + string_vars['width'] = extra_args.pop('width') + if 'height' in extra_args: + string_vars['height'] = extra_args.pop('height') + + # Only need to make a connection if width and height aren't provided + if 'height' not in string_vars or 'width' not in string_vars: + string_vars['height'] = VIDEO_DEFAULT_HEIGHT + string_vars['width'] = VIDEO_DEFAULT_WIDTH + + if json: # we can attempt to retrieve video attributes from vimeo + try: + url = ('http://vimeo.com/api/v2/video/%(vimeo_id)s.json' % + string_vars) + data = requests.get(url).text + video_attributes = json.loads(data) + string_vars['height'] = video_attributes['height'] + string_vars['width'] = video_attributes['width'] + except Exception: + # fall back to the defaults + pass + + return [nodes.raw('', CODE % string_vars, format='html')] + +vimeo.content = True +directives.register_directive('vimeo', vimeo) diff --git a/nikola/plugins/compile_rest/youtube.py b/nikola/plugins/compile_rest/youtube.py index 0765158..fe3b28b 100644 --- a/nikola/plugins/compile_rest/youtube.py +++ b/nikola/plugins/compile_rest/youtube.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -28,9 +28,8 @@ from docutils.parsers.rst import directives CODE = """\ <iframe width="%(width)s" height="%(height)s" -src="http://www.youtube.com/embed/%(yid)s?rel=0&hd=1&wmode=transparent"> -</iframe> -""" +src="http://www.youtube.com/embed/%(yid)s?rel=0&hd=1&wmode=transparent" +></iframe>""" def youtube(name, args, options, content, lineno, @@ -43,7 +42,7 @@ def youtube(name, args, options, content, lineno, 'width': 425, 'height': 344, 'extra': '' - } + } extra_args = content[1:] # Because content[0] is ID extra_args = [ea.strip().split("=") for ea in extra_args] # key=value extra_args = [ea for ea in extra_args if len(ea) == 2] # drop bad lines diff --git a/nikola/plugins/compile_textile.plugin b/nikola/plugins/compile_textile.plugin new file mode 100644 index 0000000..c13b3b1 --- /dev/null +++ b/nikola/plugins/compile_textile.plugin @@ -0,0 +1,10 @@ +[Core] +Name = textile +Module = compile_textile + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile Textile into HTML + diff --git a/nikola/plugins/compile_textile.py b/nikola/plugins/compile_textile.py new file mode 100644 index 0000000..7fa4e3f --- /dev/null +++ b/nikola/plugins/compile_textile.py @@ -0,0 +1,72 @@ +# Copyright (c) 2012 Roberto Alsina y otros. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Implementation of compile_html based on textile.""" + +import codecs +import os + +try: + from textile import textile +except ImportError: + textile = None # NOQA + +from nikola.plugin_categories import PageCompiler + + +class CompileTextile(PageCompiler): + """Compile textile into HTML.""" + + name = "textile" + + def compile_html(self, source, dest): + if textile is None: + raise Exception('To build this site, you need to install the ' + '"textile" package.') + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + output = textile(data, head_offset=1) + out_file.write(output) + + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + d_name = os.path.dirname(path) + if not os.path.isdir(d_name): + os.makedirs(os.path.dirname(path)) + with codecs.open(path, "wb+", "utf8") as fd: + if onefile: + fd.write('<notextile> <!--\n') + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n') + fd.write('--></notextile>\n\n') + fd.write("\nWrite your post here.") diff --git a/nikola/plugins/compile_txt2tags.plugin b/nikola/plugins/compile_txt2tags.plugin new file mode 100644 index 0000000..2c65da1 --- /dev/null +++ b/nikola/plugins/compile_txt2tags.plugin @@ -0,0 +1,10 @@ +[Core] +Name = txt2tags +Module = compile_txt2tags + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile Txt2tags into HTML + diff --git a/nikola/plugins/compile_txt2tags.py b/nikola/plugins/compile_txt2tags.py new file mode 100644 index 0000000..2446dfd --- /dev/null +++ b/nikola/plugins/compile_txt2tags.py @@ -0,0 +1,75 @@ +# Copyright (c) 2012 Roberto Alsina y otros. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Implementation of compile_html based on txt2tags. + +Txt2tags is not in PyPI, you can install it with + +easy_install -f "http://txt2tags.org/txt2tags.py#egg=txt2tags-2.6" txt2tags + +""" + +import codecs +import os + +try: + from txt2tags import exec_command_line as txt2tags +except ImportError: + txt2tags = None # NOQA + +from nikola.plugin_categories import PageCompiler + + +class CompileTextile(PageCompiler): + """Compile txt2tags into HTML.""" + + name = "txt2tags" + + def compile_html(self, source, dest): + if txt2tags is None: + raise Exception('To build this site, you need to install the ' + '"txt2tags" package.') + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + cmd = ["-t", "html", "--no-headers", "--outfile", dest, source] + txt2tags(cmd) + + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + d_name = os.path.dirname(path) + if not os.path.isdir(d_name): + os.makedirs(os.path.dirname(path)) + with codecs.open(path, "wb+", "utf8") as fd: + if onefile: + fd.write("\n'''\n<!--\n") + fd.write('.. title: %s\n' % title) + fd.write('.. slug: %s\n' % slug) + fd.write('.. date: %s\n' % date) + fd.write('.. tags: %s\n' % tags) + fd.write('.. link: \n') + fd.write('.. description: \n') + fd.write("-->\n'''\n") + fd.write("\nWrite your post here.") diff --git a/nikola/plugins/compile_wiki.plugin b/nikola/plugins/compile_wiki.plugin new file mode 100644 index 0000000..65cd942 --- /dev/null +++ b/nikola/plugins/compile_wiki.plugin @@ -0,0 +1,10 @@ +[Core] +Name = wiki +Module = compile_wiki + +[Documentation] +Author = Roberto Alsina +Version = 0.1 +Website = http://nikola.ralsina.com.ar +Description = Compile WikiMarkup into HTML + diff --git a/nikola/plugins/compile_wiki.py b/nikola/plugins/compile_wiki.py new file mode 100644 index 0000000..1215506 --- /dev/null +++ b/nikola/plugins/compile_wiki.py @@ -0,0 +1,70 @@ +# Copyright (c) 2012 Roberto Alsina y otros. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Implementation of compile_html based on textile.""" + +import codecs +import os + +try: + from creole import Parser + from creole.html_emitter import HtmlEmitter + creole = True +except ImportError: + creole = None + +from nikola.plugin_categories import PageCompiler + + +class CompileTextile(PageCompiler): + """Compile textile into HTML.""" + + name = "wiki" + + def compile_html(self, source, dest): + if creole is None: + raise Exception('To build this site, you need to install the ' + '"creole" package.') + try: + os.makedirs(os.path.dirname(dest)) + except: + pass + with codecs.open(dest, "w+", "utf8") as out_file: + with codecs.open(source, "r", "utf8") as in_file: + data = in_file.read() + document = Parser(data).parse() + output = HtmlEmitter(document).emit() + out_file.write(output) + + def create_post(self, path, onefile=False, title="", slug="", date="", + tags=""): + if onefile: + raise Exception('There are no comments in CreoleWiki markup, so ' + 'one-file format is not possible, use the -2 ' + 'option.') + d_name = os.path.dirname(path) + if not os.path.isdir(d_name): + os.makedirs(os.path.dirname(path)) + with codecs.open(path, "wb+", "utf8") as fd: + fd.write("Write your post here.") diff --git a/nikola/plugins/task_archive.py b/nikola/plugins/task_archive.py index cafb7e3..f91a10e 100644 --- a/nikola/plugins/task_archive.py +++ b/nikola/plugins/task_archive.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -47,7 +47,8 @@ class Archive(Task): for year, posts in list(self.site.posts_per_year.items()): for lang in kw["translations"]: output_name = os.path.join( - kw['output_folder'], self.site.path("archive", year, lang)) + kw['output_folder'], self.site.path("archive", year, + lang)).encode('utf8') post_list = [self.site.global_data[post] for post in posts] post_list.sort(key=lambda a: a.date) post_list.reverse() @@ -78,10 +79,11 @@ class Archive(Task): for lang in kw["translations"]: context = {} output_name = os.path.join( - kw['output_folder'], self.site.path("archive", None, lang)) + kw['output_folder'], self.site.path("archive", None, + lang)).encode('utf8') context["title"] = kw["messages"][lang]["Archive"] context["items"] = [(year, self.site.link("archive", year, lang)) - for year in years] + for year in years] context["permalink"] = self.site.link("archive", None, lang) task = self.site.generic_post_list_renderer( lang, diff --git a/nikola/plugins/task_copy_assets.py b/nikola/plugins/task_copy_assets.py index 6b9c6a5..39fef5a 100644 --- a/nikola/plugins/task_copy_assets.py +++ b/nikola/plugins/task_copy_assets.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR diff --git a/nikola/plugins/task_copy_files.py b/nikola/plugins/task_copy_files.py index f8d761d..feaf147 100644 --- a/nikola/plugins/task_copy_files.py +++ b/nikola/plugins/task_copy_files.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR diff --git a/nikola/plugins/task_create_bundles.py b/nikola/plugins/task_create_bundles.py index 4903699..d024636 100644 --- a/nikola/plugins/task_create_bundles.py +++ b/nikola/plugins/task_create_bundles.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -54,35 +54,36 @@ class BuildBundles(LateTask): } def build_bundle(output, inputs): - out_dir = os.path.join(kw['output_folder'], os.path.dirname(output)) + out_dir = os.path.join(kw['output_folder'], + os.path.dirname(output)) inputs = [i for i in inputs if os.path.isfile( os.path.join(out_dir, i))] cache_dir = os.path.join(kw['cache_folder'], 'webassets') if not os.path.isdir(cache_dir): os.makedirs(cache_dir) env = webassets.Environment(out_dir, os.path.dirname(output), - cache=cache_dir) - bundle = webassets.Bundle(*inputs, - output=os.path.basename(output)) + cache=cache_dir) + bundle = webassets.Bundle(*inputs, output=os.path.basename(output)) env.register(output, bundle) # This generates the file env[output].urls() flag = False - if webassets is not None and self.site.config['USE_BUNDLES'] is not False: + if (webassets is not None and self.site.config['USE_BUNDLES'] is not + False): for name, files in kw['theme_bundles'].items(): output_path = os.path.join(kw['output_folder'], name) dname = os.path.dirname(name) - file_dep = [os.path.join('output', dname, fname) - for fname in files] + file_dep = [os.path.join('output', dname, fname) for fname in + files] task = { 'file_dep': file_dep, - 'basename': self.name, - 'name': output_path, + 'basename': str(self.name), + 'name': str(output_path), 'actions': [(build_bundle, (name, files))], 'targets': [output_path], 'uptodate': [utils.config_changed(kw)] - } + } flag = True yield utils.apply_filters(task, kw['filters']) if flag is False: # No page rendered, yield a dummy task diff --git a/nikola/plugins/task_indexes.py b/nikola/plugins/task_indexes.py index 6f54145..757998e 100644 --- a/nikola/plugins/task_indexes.py +++ b/nikola/plugins/task_indexes.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -22,6 +22,8 @@ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +from __future__ import unicode_literals +import glob import os from nikola.plugin_categories import Task @@ -39,7 +41,7 @@ class Indexes(Task): kw = { "translations": self.site.config['TRANSLATIONS'], "index_display_post_count": - self.site.config['INDEX_DISPLAY_POST_COUNT'], + self.site.config['INDEX_DISPLAY_POST_COUNT'], "messages": self.site.MESSAGES, "index_teasers": self.site.config['INDEX_TEASERS'], "output_folder": self.site.config['OUTPUT_FOLDER'], @@ -56,10 +58,7 @@ class Indexes(Task): posts = posts[kw["index_display_post_count"]:] num_pages = len(lists) if not lists: - yield { - 'basename': 'render_indexes', - 'actions': [], - } + yield {'basename': 'render_indexes', 'actions': []} for lang in kw["translations"]: for i, post_list in enumerate(lists): context = {} @@ -68,10 +67,8 @@ class Indexes(Task): else: indexes_title = self.site.config["BLOG_TITLE"] if not i: - output_name = "index.html" context["title"] = indexes_title else: - output_name = "index-%s.html" % i if self.site.config.get("INDEXES_PAGES", ""): indexes_pages = self.site.config["INDEXES_PAGES"] % i else: @@ -89,7 +86,8 @@ class Indexes(Task): context["nextlink"] = "index-%s.html" % (i + 1) context["permalink"] = self.site.link("index", i, lang) output_name = os.path.join( - kw['output_folder'], self.site.path("index", i, lang)) + kw['output_folder'], self.site.path("index", i, + lang)).encode('utf8') task = self.site.generic_post_list_renderer( lang, post_list, @@ -102,3 +100,39 @@ class Indexes(Task): task['uptodate'] = [config_changed(task_cfg)] task['basename'] = 'render_indexes' yield task + + if not self.site.config["STORY_INDEX"]: + return + # TODO: do story indexes as described in #232 + kw = { + "translations": self.site.config['TRANSLATIONS'], + "post_pages": self.site.config["post_pages"], + "output_folder": self.site.config['OUTPUT_FOLDER'], + "filters": self.site.config['FILTERS'], + } + template_name = "list.tmpl" + for lang in kw["translations"]: + for wildcard, dest, _, is_post in kw["post_pages"]: + if is_post: + continue + context = {} + # vim/pyflakes thinks it's unused + # src_dir = os.path.dirname(wildcard) + files = glob.glob(wildcard) + post_list = [self.site.global_data[os.path.splitext(p)[0]] for + p in files] + output_name = os.path.join(kw["output_folder"], + self.site.path("post_path", + wildcard, + lang)).encode('utf8') + context["items"] = [(post.title(lang), post.permalink(lang)) + for post in post_list] + task = self.site.generic_post_list_renderer(lang, post_list, + output_name, + template_name, + kw['filters'], + context) + task_cfg = {1: task['uptodate'][0].config, 2: kw} + task['uptodate'] = [config_changed(task_cfg)] + task['basename'] = self.name + yield task diff --git a/nikola/plugins/task_redirect.py b/nikola/plugins/task_redirect.py index d7117ec..b133948 100644 --- a/nikola/plugins/task_redirect.py +++ b/nikola/plugins/task_redirect.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -62,7 +62,7 @@ class Redirect(Task): 'actions': [(create_redirect, (src_path, dst))], 'clean': True, 'uptodate': [utils.config_changed(kw)], - } + } def create_redirect(src, dst): @@ -71,6 +71,5 @@ def create_redirect(src, dst): except: pass with codecs.open(src, "wb+", "utf8") as fd: - fd.write(('<head>' + - '<meta HTTP-EQUIV="REFRESH" content="0; url=%s">' + - '</head>') % dst) + fd.write('<head><meta http-equiv="refresh" content="0; ' + 'url=%s"></head>' % dst) diff --git a/nikola/plugins/task_render_galleries.py b/nikola/plugins/task_render_galleries.py index 72d0581..7fe1501 100644 --- a/nikola/plugins/task_render_galleries.py +++ b/nikola/plugins/task_render_galleries.py @@ -26,8 +26,8 @@ from __future__ import unicode_literals import codecs import datetime import glob +import hashlib import os -import uuid Image = None try: @@ -76,7 +76,7 @@ class Galleries(Task): yield { 'basename': str('render_galleries'), 'actions': [], - } + } return # gallery_path is "gallery/name" @@ -88,8 +88,9 @@ class Galleries(Task): else: gallery_name = os.path.join(*splitted) # output_gallery is "output/GALLERY_PATH/name" - output_gallery = os.path.dirname(os.path.join(kw["output_folder"], - self.site.path("gallery", gallery_name, None))) + output_gallery = os.path.dirname(os.path.join( + kw["output_folder"], self.site.path("gallery", gallery_name, + None))) if not os.path.isdir(output_gallery): yield { 'basename': str('render_galleries'), @@ -98,7 +99,7 @@ class Galleries(Task): 'targets': [output_gallery], 'clean': True, 'uptodate': [utils.config_changed(kw)], - } + } # image_list contains "gallery/name/image_name.jpg" image_list = glob.glob(gallery_path + "/*jpg") +\ glob.glob(gallery_path + "/*JPG") +\ @@ -118,21 +119,21 @@ class Galleries(Task): excluded_image_name_list = [] excluded_image_list = list(map(add_gallery_path, - excluded_image_name_list)) + excluded_image_name_list)) image_set = set(image_list) - set(excluded_image_list) image_list = list(image_set) except IOError: pass # List of sub-galleries - folder_list = [x.split(os.sep)[-2] for x in - glob.glob(os.path.join(gallery_path, '*') + os.sep)] + folder_list = [x.split(os.sep)[-2] + os.sep for x in + glob.glob(os.path.join(gallery_path, '*') + os.sep)] crumbs = gallery_path.split(os.sep)[:-1] crumbs.append(os.path.basename(gallery_name)) # TODO: write this in human paths = ['/'.join(['..'] * (len(crumbs) - 1 - i)) for i in - range(len(crumbs[:-1]))] + ['#'] + range(len(crumbs[:-1]))] + ['#'] crumbs = list(zip(paths, crumbs)) image_list = [x for x in image_list if "thumbnail" not in x] @@ -150,7 +151,7 @@ class Galleries(Task): # thumb_path is # "output/GALLERY_PATH/name/image_name.thumbnail.jpg" thumb_path = os.path.join(output_gallery, - fname + ".thumbnail" + ext) + ".thumbnail".join([fname, ext])) # thumb_path is "output/GALLERY_PATH/name/image_name.jpg" orig_dest_path = os.path.join(output_gallery, img_name) thumbs.append(os.path.basename(thumb_path)) @@ -182,12 +183,12 @@ class Galleries(Task): # Remove excluded images if excluded_image_name_list: for img, img_name in zip(excluded_image_list, - excluded_image_name_list): + excluded_image_name_list): # img_name is "image_name.jpg" # fname, ext are "image_name", ".jpg" fname, ext = os.path.splitext(img_name) - excluded_thumb_dest_path = os.path.join(output_gallery, - fname + ".thumbnail" + ext) + excluded_thumb_dest_path = os.path.join( + output_gallery, ".thumbnail".join([fname, ext])) excluded_dest_path = os.path.join(output_gallery, img_name) yield { 'basename': str('render_galleries'), @@ -218,7 +219,8 @@ class Galleries(Task): context["title"] = os.path.basename(gallery_path) context["description"] = kw["blog_description"] if kw['use_filename_as_title']: - img_titles = ['id="%s" alt="%s" title="%s"' % (fn[:-4], fn[:-4], utils.unslugify(fn[:-4])) + img_titles = ['id="%s" alt="%s" title="%s"' % + (fn[:-4], fn[:-4], utils.unslugify(fn[:-4])) for fn in image_name_list] else: img_titles = [''] * len(image_name_list) @@ -227,14 +229,19 @@ class Galleries(Task): context["crumbs"] = crumbs context["permalink"] = self.site.link( "gallery", gallery_name, None) + context["enable_comments"] = ( + self.site.config["COMMENTS_IN_GALLERIES"]) # Use galleries/name/index.txt to generate a blurb for # the gallery, if it exists index_path = os.path.join(gallery_path, "index.txt") cache_dir = os.path.join(kw["cache_folder"], 'galleries') if not os.path.isdir(cache_dir): - os.makedirs(cache_dir) - index_dst_path = os.path.join(cache_dir, str(uuid.uuid1())+'.html') + os.makedirs(cache_dir) + index_dst_path = os.path.join( + cache_dir, + str(hashlib.sha224(index_path.encode('utf-8')).hexdigest() + + '.html')) if os.path.exists(index_path): compile_html = self.site.get_compiler(index_path) yield { @@ -242,8 +249,7 @@ class Galleries(Task): 'name': index_dst_path.encode('utf-8'), 'file_dep': [index_path], 'targets': [index_dst_path], - 'actions': [(compile_html, - [index_path, index_dst_path])], + 'actions': [(compile_html, [index_path, index_dst_path])], 'clean': True, 'uptodate': [utils.config_changed(kw)], } @@ -258,19 +264,22 @@ class Galleries(Task): file_dep.append(index_dst_path) else: context['text'] = '' - self.site.render_template(template_name, output_name, context) + self.site.render_template(template_name, output_name.encode( + 'utf8'), context) yield { 'basename': str('render_galleries'), 'name': output_name.encode('utf8'), 'file_dep': file_dep, 'targets': [output_name], - 'actions': [(render_gallery, - (output_name, context, index_dst_path))], + 'actions': [(render_gallery, (output_name, context, + index_dst_path))], 'clean': True, 'uptodate': [utils.config_changed({ 1: kw, - 2: self.site.config['GLOBAL_CONTEXT']})], + 2: self.site.config['GLOBAL_CONTEXT'], + 3: self.site.config["COMMENTS_IN_GALLERIES"], + })], } def resize_image(self, src, dst, max_size): diff --git a/nikola/plugins/task_render_listings.py b/nikola/plugins/task_render_listings.py index e3334c2..6d1d853 100644 --- a/nikola/plugins/task_render_listings.py +++ b/nikola/plugins/task_render_listings.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -55,24 +55,24 @@ class Listings(Task): except: lexer = TextLexer() code = highlight(fd.read(), lexer, - HtmlFormatter(cssclass='code', - linenos="table", - nowrap=False, - lineanchors=utils.slugify(f), - anchorlinenos=True)) + HtmlFormatter(cssclass='code', + linenos="table", nowrap=False, + lineanchors=utils.slugify(f), + anchorlinenos=True)) title = os.path.basename(in_name) crumbs = out_name.split(os.sep)[1:-1] + [title] # TODO: write this in human paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in - range(len(crumbs[:-2]))] + ['.', '#'] + range(len(crumbs[:-2]))] + ['.', '#'] context = { 'code': code, 'title': title, 'crumbs': zip(paths, crumbs), 'lang': kw['default_lang'], 'description': title, - } - self.site.render_template('listing.tmpl', out_name, context) + } + self.site.render_template('listing.tmpl', out_name.encode('utf8'), + context) flag = True template_deps = self.site.template_system.template_deps('listing.tmpl') for root, dirs, files in os.walk(kw['listings_folder']): diff --git a/nikola/plugins/task_render_pages.py b/nikola/plugins/task_render_pages.py index 1892c13..0145579 100644 --- a/nikola/plugins/task_render_pages.py +++ b/nikola/plugins/task_render_pages.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -41,9 +41,9 @@ class RenderPages(Task): self.site.scan_posts() flag = False for lang in kw["translations"]: - for wildcard, destination, template_name, _ in kw["post_pages"]: - for task in self.site.generic_page_renderer(lang, - wildcard, template_name, destination, kw["filters"]): + for post in self.site.timeline: + for task in self.site.generic_page_renderer(lang, post, + kw["filters"]): task['uptodate'] = [config_changed({ 1: task['uptodate'][0].config, 2: kw})] diff --git a/nikola/plugins/task_render_posts.py b/nikola/plugins/task_render_posts.py index 48a0384..a4d5578 100644 --- a/nikola/plugins/task_render_posts.py +++ b/nikola/plugins/task_render_posts.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -63,7 +63,7 @@ class RenderPosts(Task): 'file_dep': post.fragment_deps(lang), 'targets': [dest], 'actions': [(self.site.get_compiler(post.source_path), - [source, dest])], + [source, dest])], 'clean': True, 'uptodate': [utils.config_changed(deps_dict)], } diff --git a/nikola/plugins/task_render_rss.py b/nikola/plugins/task_render_rss.py index 54b66bf..fb35843 100644 --- a/nikola/plugins/task_render_rss.py +++ b/nikola/plugins/task_render_rss.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -42,12 +42,13 @@ class RenderRSS(Task): "blog_url": self.site.config["BLOG_URL"], "blog_description": self.site.config["BLOG_DESCRIPTION"], "output_folder": self.site.config["OUTPUT_FOLDER"], + "rss_teasers": self.site.config["RSS_TEASERS"], } self.site.scan_posts() # TODO: timeline is global, kill it for lang in kw["translations"]: output_name = os.path.join(kw['output_folder'], - self.site.path("rss", None, lang)) + self.site.path("rss", None, lang)) deps = [] posts = [x for x in self.site.timeline if x.use_in_feeds][:10] for post in posts: @@ -58,8 +59,9 @@ class RenderRSS(Task): 'file_dep': deps, 'targets': [output_name], 'actions': [(utils.generic_rss_renderer, - (lang, kw["blog_title"], kw["blog_url"], - kw["blog_description"], posts, output_name))], + (lang, kw["blog_title"], kw["blog_url"], + kw["blog_description"], posts, output_name, + kw["rss_teasers"]))], 'clean': True, 'uptodate': [utils.config_changed(kw)], } diff --git a/nikola/plugins/task_render_sources.py b/nikola/plugins/task_render_sources.py index 3a05b96..bce8d69 100644 --- a/nikola/plugins/task_render_sources.py +++ b/nikola/plugins/task_render_sources.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -53,9 +53,13 @@ class Sources(Task): flag = False for lang in kw["translations"]: for post in self.site.timeline: - output_name = os.path.join(kw['output_folder'], - post.destination_path(lang, post.source_ext())) + output_name = os.path.join( + kw['output_folder'], post.destination_path( + lang, post.source_ext())) source = post.source_path + if source.endswith('.html'): + print("Avoiting to render source of .html page") + continue if lang != kw["default_lang"]: source_lang = source + '.' + lang if os.path.exists(source_lang): @@ -68,7 +72,7 @@ class Sources(Task): 'actions': [(utils.copy_file, (source, output_name))], 'clean': True, 'uptodate': [utils.config_changed(kw)], - } + } if flag is False: # No page rendered, yield a dummy task yield { 'basename': 'render_sources', diff --git a/nikola/plugins/task_render_tags.py b/nikola/plugins/task_render_tags.py index 026ba75..a561a81 100644 --- a/nikola/plugins/task_render_tags.py +++ b/nikola/plugins/task_render_tags.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -23,6 +23,7 @@ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #from __future__ import unicode_literals +import codecs import json import os @@ -48,17 +49,15 @@ class RenderTags(Task): "filters": self.site.config['FILTERS'], "tag_pages_are_indexes": self.site.config['TAG_PAGES_ARE_INDEXES'], "index_display_post_count": - self.site.config['INDEX_DISPLAY_POST_COUNT'], + self.site.config['INDEX_DISPLAY_POST_COUNT'], "index_teasers": self.site.config['INDEX_TEASERS'], + "rss_teasers": self.site.config["RSS_TEASERS"], } self.site.scan_posts() if not self.site.posts_per_tag: - yield { - 'basename': str(self.name), - 'actions': [], - } + yield {'basename': str(self.name), 'actions': []} return for tag, posts in list(self.site.posts_per_tag.items()): @@ -82,29 +81,30 @@ class RenderTags(Task): tag_cloud_data[tag] = [len(posts), self.site.link( 'tag', tag, self.site.config['DEFAULT_LANG'])] output_name = os.path.join(kw['output_folder'], - 'assets','js','tag_cloud_data.json') - + 'assets', 'js', 'tag_cloud_data.json') + def write_tag_data(data): try: os.makedirs(os.path.dirname(output_name)) except: pass - with open(output_name, 'wb+') as fd: + with codecs.open(output_name, 'wb+', 'utf8') as fd: fd.write(json.dumps(data)) - + task = { 'basename': str(self.name), 'name': str(output_name) } task['uptodate'] = [utils.config_changed(tag_cloud_data)] task['targets'] = [output_name] - task['actions'] = [(write_tag_data,[tag_cloud_data])] + task['actions'] = [(write_tag_data, [tag_cloud_data])] yield task def list_tags_page(self, kw): """a global "all your tags" page for each language""" tags = list(self.site.posts_per_tag.keys()) - tags.sort() + # We want our tags to be sorted case insensitive + tags.sort(key=lambda a: a.lower()) template_name = "tags.tmpl" kw['tags'] = tags for lang in kw["translations"]: @@ -113,8 +113,8 @@ class RenderTags(Task): output_name = output_name.encode('utf8') context = {} context["title"] = kw["messages"][lang]["Tags"] - context["items"] = [(tag, self.site.link("tag", tag, lang)) - for tag in tags] + context["items"] = [(tag, self.site.link("tag", tag, lang)) for tag + in tags] context["permalink"] = self.site.link("tag_index", None, lang) task = self.site.generic_post_list_renderer( lang, @@ -128,9 +128,9 @@ class RenderTags(Task): task['uptodate'] = [utils.config_changed(task_cfg)] yield task - def tag_page_as_index(self, tag, lang, post_list, kw): - """render a sort of index page collection using only this tag's posts.""" + """render a sort of index page collection using only this + tag's posts.""" def page_name(tagname, i, lang): """Given tag, n, returns a page name.""" @@ -150,14 +150,13 @@ class RenderTags(Task): for i, post_list in enumerate(lists): context = {} # On a tag page, the feeds include the tag's feeds - rss_link = \ - """<link rel="alternate" type="application/rss+xml" """\ - """type="application/rss+xml" title="RSS for tag """\ - """%s (%s)" href="%s">""" % \ - (tag, lang, self.site.link("tag_rss", tag, lang)) + rss_link = ("""<link rel="alternate" type="application/rss+xml" """ + """type="application/rss+xml" title="RSS for tag """ + """%s (%s)" href="%s">""" % + (tag, lang, self.site.link("tag_rss", tag, lang))) context['rss_link'] = rss_link - output_name = os.path.join(kw['output_folder'], - page_name(tag, i, lang)) + output_name = os.path.join(kw['output_folder'], page_name(tag, i, + lang)) output_name = output_name.encode('utf8') context["title"] = kw["messages"][lang][ "Posts about %s"] % tag @@ -188,12 +187,11 @@ class RenderTags(Task): task['basename'] = str(self.name) yield task - def tag_page_as_list(self, tag, lang, post_list, kw): """We render a single flat link list with this tag's posts""" template_name = "tag.tmpl" - output_name = os.path.join(kw['output_folder'], - self.site.path("tag", tag, lang)) + output_name = os.path.join(kw['output_folder'], self.site.path( + "tag", tag, lang)) output_name = output_name.encode('utf8') context = {} context["lang"] = lang @@ -214,16 +212,15 @@ class RenderTags(Task): task['basename'] = str(self.name) yield task - def tag_rss(self, tag, lang, posts, kw): """RSS for a single tag / language""" #Render RSS output_name = os.path.join(kw['output_folder'], - self.site.path("tag_rss", tag, lang)) + self.site.path("tag_rss", tag, lang)) output_name = output_name.encode('utf8') deps = [] - post_list = [self.site.global_data[post] for post in posts - if self.site.global_data[post].use_in_feeds] + post_list = [self.site.global_data[post] for post in posts if + self.site.global_data[post].use_in_feeds] post_list.sort(key=lambda a: a.date) post_list.reverse() for post in post_list: @@ -234,9 +231,9 @@ class RenderTags(Task): 'file_dep': deps, 'targets': [output_name], 'actions': [(utils.generic_rss_renderer, - (lang, "%s (%s)" % (kw["blog_title"], tag), - kw["blog_url"], kw["blog_description"], - post_list, output_name))], + (lang, "%s (%s)" % (kw["blog_title"], tag), + kw["blog_url"], kw["blog_description"], post_list, + output_name, kw["rss_teasers"]))], 'clean': True, 'uptodate': [utils.config_changed(kw)], } diff --git a/nikola/plugins/task_sitemap/__init__.py b/nikola/plugins/task_sitemap/__init__.py index 1ed6c21..96b9dbd 100644 --- a/nikola/plugins/task_sitemap/__init__.py +++ b/nikola/plugins/task_sitemap/__init__.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -22,13 +22,15 @@ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +from __future__ import print_function, absolute_import import os +import sys import tempfile from nikola.plugin_categories import LateTask from nikola.utils import config_changed -import sitemap_gen +from nikola.plugins.task_sitemap import sitemap_gen class Sitemap(LateTask): @@ -37,6 +39,14 @@ class Sitemap(LateTask): name = "sitemap" def gen_tasks(self): + if sys.version_info[0] == 3: + print("sitemap generation is not available for python 3") + yield { + 'basename': 'sitemap', + 'name': 'sitemap', + 'actions': [], + } + return """Generate Google sitemap.""" kw = { "blog_url": self.site.config["BLOG_URL"], @@ -62,19 +72,20 @@ class Sitemap(LateTask): kw["blog_url"], ) config_file = tempfile.NamedTemporaryFile(delete=False) - config_file.write(config_data) + config_file.write(config_data.encode('utf8')) config_file.close() # Generate sitemap sitemap = sitemap_gen.CreateSitemapFromFile(config_file.name, True) if not sitemap: - sitemap_gen.output.Log('Configuration file errors -- exiting.', 0) + sitemap_gen.output.Log('Configuration file errors -- exiting.', + 0) else: sitemap.Generate() sitemap_gen.output.Log('Number of errors: %d' % - sitemap_gen.output.num_errors, 1) + sitemap_gen.output.num_errors, 1) sitemap_gen.output.Log('Number of warnings: %d' % - sitemap_gen.output.num_warns, 1) + sitemap_gen.output.num_warns, 1) os.unlink(config_file.name) yield { diff --git a/nikola/plugins/task_sitemap/sitemap_gen.py b/nikola/plugins/task_sitemap/sitemap_gen.py index eef2b0b..a877c24 100755..100644 --- a/nikola/plugins/task_sitemap/sitemap_gen.py +++ b/nikola/plugins/task_sitemap/sitemap_gen.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# flake8: noqa # # Copyright (c) 2004, 2005 Google Inc. # All rights reserved. @@ -43,7 +42,7 @@ from __future__ import print_function __usage__ = \ -"""A simple script to automatically produce sitemaps for a webserver, + """A simple script to automatically produce sitemaps for a webserver, in the Google Sitemap Protocol (GSP). Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] @@ -52,41 +51,68 @@ Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] --testing, specified when user is experimenting """ -# Please be careful that all syntax used in this file can be parsed on -# Python 1.5 -- this version check is not evaluated until after the -# entire file has been parsed. -import sys -if sys.hexversion < 0x02020000: - print('This script requires Python 2.2 or later.') - print('Currently run with version: %s' % sys.version) - sys.exit(1) - import fnmatch import glob import gzip -import hashlib import os import re import stat +import sys import time -import types import urllib import xml.sax try: - from urlparse import urlparse, urlsplit, urlunsplit + import md5 except ImportError: - from urllib.parse import urlparse, urlsplit, urlunsplit + md5 = None # NOQA + import hashlib + +try: + from urlparse import urlsplit, urlunsplit, urljoin +except ImportError: + from urllib.parse import urlsplit, urlunsplit, urljoin # NOQA + +try: + from urllib import quote as urllib_quote + from urllib import FancyURLopener + from urllib import urlopen +except ImportError: + from urllib.parse import quote as urllib_quote # NOQA + from urllib.request import FancyURLopener # NOQA + from urllib.request import urlopen # NOQA + + +if sys.version_info[0] == 3: + # Python 3 + bytes_str = bytes + unicode_str = str + unichr = chr +else: + bytes_str = str + unicode_str = unicode # Text encodings ENC_ASCII = 'ASCII' -ENC_UTF8 = 'UTF-8' -ENC_IDNA = 'IDNA' +ENC_UTF8 = 'UTF-8' +ENC_IDNA = 'IDNA' ENC_ASCII_LIST = ['ASCII', 'US-ASCII', 'US', 'IBM367', 'CP367', 'ISO646-US' 'ISO_646.IRV:1991', 'ISO-IR-6', 'ANSI_X3.4-1968', - 'ANSI_X3.4-1986', 'CPASCII' ] + 'ANSI_X3.4-1986', 'CPASCII'] ENC_DEFAULT_LIST = ['ISO-8859-1', 'ISO-8859-2', 'ISO-8859-5'] +# Available Sitemap types +SITEMAP_TYPES = ['web', 'mobile', 'news'] + +# General Sitemap tags +GENERAL_SITEMAP_TAGS = ['loc', 'changefreq', 'priority', 'lastmod'] + +# News specific tags +NEWS_SPECIFIC_TAGS = ['keywords', 'publication_date', 'stock_tickers'] + +# News Sitemap tags +NEWS_SITEMAP_TAGS = GENERAL_SITEMAP_TAGS + NEWS_SPECIFIC_TAGS + # Maximum number of urls in each sitemap, before next Sitemap is created MAXURLS_PER_SITEMAP = 50000 @@ -95,53 +121,77 @@ SITEINDEX_SUFFIX = '_index.xml' # Regular expressions tried for extracting URLs from access logs. ACCESSLOG_CLF_PATTERN = re.compile( - r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' - ) + r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' +) # Match patterns for lastmod attributes -LASTMOD_PATTERNS = map(re.compile, [ - r'^\d\d\d\d$', - r'^\d\d\d\d-\d\d$', - r'^\d\d\d\d-\d\d-\d\d$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', - ]) +DATE_PATTERNS = list(map(re.compile, [ + r'^\d\d\d\d$', + r'^\d\d\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', +])) # Match patterns for changefreq attributes CHANGEFREQ_PATTERNS = [ - 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' - ] + 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' +] # XML formats -SITEINDEX_HEADER = \ - '<?xml version="1.0" encoding="UTF-8"?>\n' \ - '<?xml-stylesheet type="text/xsl" href="gss.xsl"?>\n' \ - '<sitemapindex\n' \ - ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ - ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ - ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ - ' http://www.google.com/schemas/sitemap/0.84/' \ - 'siteindex.xsd">\n' -SITEINDEX_FOOTER = '</sitemapindex>\n' -SITEINDEX_ENTRY = \ - ' <sitemap>\n' \ - ' <loc>%(loc)s</loc>\n' \ - ' <lastmod>%(lastmod)s</lastmod>\n' \ - ' </sitemap>\n' -SITEMAP_HEADER = \ - '<?xml version="1.0" encoding="UTF-8"?>\n' \ - '<urlset\n' \ - ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ - ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ - ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ - ' http://www.google.com/schemas/sitemap/0.84/' \ - 'sitemap.xsd">\n' -SITEMAP_FOOTER = '</urlset>\n' +GENERAL_SITEINDEX_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<sitemapindex\n' \ + ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n' \ + ' http://www.sitemaps.org/schemas/sitemap/0.9/' \ + 'siteindex.xsd">\n' + +NEWS_SITEINDEX_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<sitemapindex\n' \ + ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n' \ + ' xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n' \ + ' http://www.sitemaps.org/schemas/sitemap/0.9/' \ + 'siteindex.xsd">\n' + +SITEINDEX_FOOTER = '</sitemapindex>\n' +SITEINDEX_ENTRY = \ + ' <sitemap>\n' \ + ' <loc>%(loc)s</loc>\n' \ + ' <lastmod>%(lastmod)s</lastmod>\n' \ + ' </sitemap>\n' +GENERAL_SITEMAP_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<urlset\n' \ + ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n' \ + ' http://www.sitemaps.org/schemas/sitemap/0.9/' \ + 'sitemap.xsd">\n' + +NEWS_SITEMAP_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<urlset\n' \ + ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n' \ + ' xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n' \ + ' http://www.sitemaps.org/schemas/sitemap/0.9/' \ + 'sitemap.xsd">\n' + +SITEMAP_FOOTER = '</urlset>\n' SITEURL_XML_PREFIX = ' <url>\n' SITEURL_XML_SUFFIX = ' </url>\n' +NEWS_TAG_XML_PREFIX = ' <news:news>\n' +NEWS_TAG_XML_SUFFIX = ' </news:news>\n' + # Search engines to notify with the updated sitemaps # # This list is very non-obvious in what's going on. Here's the gist: @@ -156,2067 +206,1916 @@ SITEURL_XML_SUFFIX = ' </url>\n' # 5 - query attribute that should be set to the new Sitemap URL # Clear as mud, I know. NOTIFICATION_SITES = [ - ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap') - ] + ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap'), +] + + +def get_hash(text): + if md5 is not None: + return md5.new(text).digest() + else: + m = hashlib.md5() + m.update(text.encode('utf8')) + return m.digest() class Error(Exception): - """ - Base exception class. In this module we tend not to use our own exception - types for very much, but they come in very handy on XML parsing with SAX. - """ - pass -#end class Error + """ + Base exception class. In this module we tend not to use our own exception + types for very much, but they come in very handy on XML parsing with SAX. + """ + pass +# end class Error class SchemaError(Error): - """Failure to process an XML file according to the schema we know.""" - pass -#end class SchemeError + """Failure to process an XML file according to the schema we know.""" + pass +# end class SchemeError class Encoder: - """ - Manages wide-character/narrow-character conversions for just about all - text that flows into or out of the script. - - You should always use this class for string coercion, as opposed to - letting Python handle coercions automatically. Reason: Python - usually assumes ASCII (7-bit) as a default narrow character encoding, - which is not the kind of data we generally deal with. - - General high-level methodologies used in sitemap_gen: - - [PATHS] - File system paths may be wide or narrow, depending on platform. - This works fine, just be aware of it and be very careful to not - mix them. That is, if you have to pass several file path arguments - into a library call, make sure they are all narrow or all wide. - This class has MaybeNarrowPath() which should be called on every - file system path you deal with. - - [URLS] - URL locations are stored in Narrow form, already escaped. This has the - benefit of keeping escaping and encoding as close as possible to the format - we read them in. The downside is we may end up with URLs that have - intermingled encodings -- the root path may be encoded in one way - while the filename is encoded in another. This is obviously wrong, but - it should hopefully be an issue hit by very few users. The workaround - from the user level (assuming they notice) is to specify a default_encoding - parameter in their config file. - - [OTHER] - Other text, such as attributes of the URL class, configuration options, - etc, are generally stored in Unicode for simplicity. - """ - - def __init__(self): - self._user = None # User-specified default encoding - self._learned = [] # Learned default encodings - self._widefiles = False # File system can be wide - - # Can the file system be Unicode? - try: - self._widefiles = os.path.supports_unicode_filenames - except AttributeError: - try: - self._widefiles = sys.getwindowsversion() == os.VER_PLATFORM_WIN32_NT - except AttributeError: - pass - - # Try to guess a working default - try: - encoding = sys.getfilesystemencoding() - if encoding and not (encoding.upper() in ENC_ASCII_LIST): - self._learned = [ encoding ] - except AttributeError: - pass - - if not self._learned: - encoding = sys.getdefaultencoding() - if encoding and not (encoding.upper() in ENC_ASCII_LIST): - self._learned = [ encoding ] - - # If we had no guesses, start with some European defaults - if not self._learned: - self._learned = ENC_DEFAULT_LIST - #end def __init__ - - def SetUserEncoding(self, encoding): - self._user = encoding - #end def SetUserEncoding - - def NarrowText(self, text, encoding): - """ Narrow a piece of arbitrary text """ - if type(text) != types.UnicodeType: - return text - - # Try the passed in preference - if encoding: - try: - result = text.encode(encoding) - if not encoding in self._learned: - self._learned.append(encoding) - return result - except UnicodeError: - pass - except LookupError: - output.Warn('Unknown encoding: %s' % encoding) - - # Try the user preference - if self._user: - try: - return text.encode(self._user) - except UnicodeError: - pass - except LookupError: - temp = self._user - self._user = None - output.Warn('Unknown default_encoding: %s' % temp) - - # Look through learned defaults, knock any failing ones out of the list - while self._learned: - try: - return text.encode(self._learned[0]) - except: - del self._learned[0] - - # When all other defaults are exhausted, use UTF-8 - try: - return text.encode(ENC_UTF8) - except UnicodeError: - pass - - # Something is seriously wrong if we get to here - return text.encode(ENC_ASCII, 'ignore') - #end def NarrowText - - def MaybeNarrowPath(self, text): - """ Paths may be allowed to stay wide """ - if self._widefiles: - return text - return self.NarrowText(text, None) - #end def MaybeNarrowPath - - def WidenText(self, text, encoding): - """ Widen a piece of arbitrary text """ - if type(text) != types.StringType: - return text - - # Try the passed in preference - if encoding: - try: - result = unicode(text, encoding) - if not encoding in self._learned: - self._learned.append(encoding) - return result - except UnicodeError: - pass - except LookupError: - output.Warn('Unknown encoding: %s' % encoding) - - # Try the user preference - if self._user: - try: - return unicode(text, self._user) - except UnicodeError: - pass - except LookupError: - temp = self._user - self._user = None - output.Warn('Unknown default_encoding: %s' % temp) - - # Look through learned defaults, knock any failing ones out of the list - while self._learned: - try: - return unicode(text, self._learned[0]) - except: - del self._learned[0] - - # When all other defaults are exhausted, use UTF-8 - try: - return unicode(text, ENC_UTF8) - except UnicodeError: - pass - - # Getting here means it wasn't UTF-8 and we had no working default. - # We really don't have anything "right" we can do anymore. - output.Warn('Unrecognized encoding in text: %s' % text) - if not self._user: - output.Warn('You may need to set a default_encoding in your ' - 'configuration file.') - return text.decode(ENC_ASCII, 'ignore') - #end def WidenText -#end class Encoder + """ + Manages wide-character/narrow-character conversions for just about all + text that flows into or out of the script. + + You should always use this class for string coercion, as opposed to + letting Python handle coercions automatically. Reason: Python + usually assumes ASCII (7-bit) as a default narrow character encoding, + which is not the kind of data we generally deal with. + + General high-level methodologies used in sitemap_gen: + + [PATHS] + File system paths may be wide or narrow, depending on platform. + This works fine, just be aware of it and be very careful to not + mix them. That is, if you have to pass several file path arguments + into a library call, make sure they are all narrow or all wide. + This class has MaybeNarrowPath() which should be called on every + file system path you deal with. + + [URLS] + URL locations are stored in Narrow form, already escaped. This has the + benefit of keeping escaping and encoding as close as possible to the format + we read them in. The downside is we may end up with URLs that have + intermingled encodings -- the root path may be encoded in one way + while the filename is encoded in another. This is obviously wrong, but + it should hopefully be an issue hit by very few users. The workaround + from the user level (assuming they notice) is to specify a default_encoding + parameter in their config file. + + [OTHER] + Other text, such as attributes of the URL class, configuration options, + etc, are generally stored in Unicode for simplicity. + """ + + def __init__(self): + self._user = None # User-specified default encoding + self._learned = [] # Learned default encodings + self._widefiles = False # File system can be wide + + # Can the file system be Unicode? + try: + self._widefiles = os.path.supports_unicode_filenames + except AttributeError: + try: + self._widefiles = sys.getwindowsversion( + ) == os.VER_PLATFORM_WIN32_NT + except AttributeError: + pass + + # Try to guess a working default + try: + encoding = sys.getfilesystemencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [encoding] + except AttributeError: + pass + + if not self._learned: + encoding = sys.getdefaultencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [encoding] + + # If we had no guesses, start with some European defaults + if not self._learned: + self._learned = ENC_DEFAULT_LIST + # end def __init__ + + def SetUserEncoding(self, encoding): + self._user = encoding + # end def SetUserEncoding + + def NarrowText(self, text, encoding): + """ Narrow a piece of arbitrary text """ + if isinstance(text, bytes_str): + return text + + # Try the passed in preference + if encoding: + try: + result = text.encode(encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return text.encode(self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return text.encode(self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return text.encode(ENC_UTF8) + except UnicodeError: + pass + + # Something is seriously wrong if we get to here + return text.encode(ENC_ASCII, 'ignore') + # end def NarrowText + + def MaybeNarrowPath(self, text): + """ Paths may be allowed to stay wide """ + if self._widefiles: + return text + return self.NarrowText(text, None) + # end def MaybeNarrowPath + + def WidenText(self, text, encoding): + """ Widen a piece of arbitrary text """ + if not isinstance(text, bytes_str): + return text + + # Try the passed in preference + if encoding: + try: + result = unicode_str(text, encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return unicode_str(text, self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return unicode_str(text, self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return unicode_str(text, ENC_UTF8) + except UnicodeError: + pass + + # Getting here means it wasn't UTF-8 and we had no working default. + # We really don't have anything "right" we can do anymore. + output.Warn('Unrecognized encoding in text: %s' % text) + if not self._user: + output.Warn('You may need to set a default_encoding in your ' + 'configuration file.') + return text.decode(ENC_ASCII, 'ignore') + # end def WidenText +# end class Encoder encoder = Encoder() class Output: - """ - Exposes logging functionality, and tracks how many errors - we have thus output. - - Logging levels should be used as thus: - Fatal -- extremely sparingly - Error -- config errors, entire blocks of user 'intention' lost - Warn -- individual URLs lost - Log(,0) -- Un-suppressable text that's not an error - Log(,1) -- touched files, major actions - Log(,2) -- parsing notes, filtered or duplicated URLs - Log(,3) -- each accepted URL - """ - - def __init__(self): - self.num_errors = 0 # Count of errors - self.num_warns = 0 # Count of warnings - - self._errors_shown = {} # Shown errors - self._warns_shown = {} # Shown warnings - self._verbose = 0 # Level of verbosity - #end def __init__ - - def Log(self, text, level): - """ Output a blurb of diagnostic text, if the verbose level allows it """ - if text: - text = encoder.NarrowText(text, None) - if self._verbose >= level: - print(text) - #end def Log - - def Warn(self, text): - """ Output and count a warning. Suppress duplicate warnings. """ - if text: - text = encoder.NarrowText(text, None) - hash = hashlib.md5(text).digest() - if not self._warns_shown.has_key(hash): - self._warns_shown[hash] = 1 - print('[WARNING] ' + text) - else: - self.Log('(suppressed) [WARNING] ' + text, 3) - self.num_warns = self.num_warns + 1 - #end def Warn - - def Error(self, text): - """ Output and count an error. Suppress duplicate errors. """ - if text: - text = encoder.NarrowText(text, None) - hash = hashlib.md5(text).digest() - if not self._errors_shown.has_key(hash): - self._errors_shown[hash] = 1 - print('[ERROR] ' + text) - else: - self.Log('(suppressed) [ERROR] ' + text, 3) - self.num_errors = self.num_errors + 1 - #end def Error - - def Fatal(self, text): - """ Output an error and terminate the program. """ - if text: - text = encoder.NarrowText(text, None) - print('[FATAL] ' + text) - else: - print('Fatal error.') - sys.exit(1) - #end def Fatal + """ + Exposes logging functionality, and tracks how many errors + we have thus output. + + Logging levels should be used as thus: + Fatal -- extremely sparingly + Error -- config errors, entire blocks of user 'intention' lost + Warn -- individual URLs lost + Log(,0) -- Un-suppressable text that's not an error + Log(,1) -- touched files, major actions + Log(,2) -- parsing notes, filtered or duplicated URLs + Log(,3) -- each accepted URL + """ - def SetVerbose(self, level): - """ Sets the verbose level. """ - try: - if type(level) != types.IntType: - level = int(level) - if (level >= 0) and (level <= 3): - self._verbose = level - return - except ValueError: - pass - self.Error('Verbose level (%s) must be between 0 and 3 inclusive.' % level) - #end def SetVerbose -#end class Output + def __init__(self): + self.num_errors = 0 # Count of errors + self.num_warns = 0 # Count of warnings + + self._errors_shown = {} # Shown errors + self._warns_shown = {} # Shown warnings + self._verbose = 0 # Level of verbosity + # end def __init__ + + def Log(self, text, level): + """ Output a blurb of diagnostic text, if the verbose level allows it """ + if text: + text = encoder.NarrowText(text, None) + if self._verbose >= level: + print(text) + # end def Log + + def Warn(self, text): + """ Output and count a warning. Suppress duplicate warnings. """ + if text: + text = encoder.NarrowText(text, None) + hash = get_hash(text) + if not hash in self._warns_shown: + self._warns_shown[hash] = 1 + print('[WARNING] ' + text) + else: + self.Log('(suppressed) [WARNING] ' + text, 3) + self.num_warns = self.num_warns + 1 + # end def Warn + + def Error(self, text): + """ Output and count an error. Suppress duplicate errors. """ + if text: + text = encoder.NarrowText(text, None) + hash = get_hash(text) + if not hash in self._errors_shown: + self._errors_shown[hash] = 1 + print('[ERROR] ' + text) + else: + self.Log('(suppressed) [ERROR] ' + text, 3) + self.num_errors = self.num_errors + 1 + # end def Error + + def Fatal(self, text): + """ Output an error and terminate the program. """ + if text: + text = encoder.NarrowText(text, None) + print('[FATAL] ' + text) + else: + print('Fatal error.') + sys.exit(1) + # end def Fatal + + def SetVerbose(self, level): + """ Sets the verbose level. """ + try: + if not isinstance(level, int): + level = int(level) + if (level >= 0) and (level <= 3): + self._verbose = level + return + except ValueError: + pass + self.Error( + 'Verbose level (%s) must be between 0 and 3 inclusive.' % level) + # end def SetVerbose +# end class Output output = Output() class URL(object): - """ URL is a smart structure grouping together the properties we - care about for a single web reference. """ - __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' - - def __init__(self): - self.loc = None # URL -- in Narrow characters - self.lastmod = None # ISO8601 timestamp of last modify - self.changefreq = None # Text term for update frequency - self.priority = None # Float between 0 and 1 (inc) - #end def __init__ - - def __cmp__(self, other): - if self.loc < other.loc: - return -1 - if self.loc > other.loc: - return 1 - return 0 - #end def __cmp__ - - def TrySetAttribute(self, attribute, value): - """ Attempt to set the attribute to the value, with a pretty try - block around it. """ - if attribute == 'loc': - self.loc = self.Canonicalize(value) - else: - try: - setattr(self, attribute, value) - except AttributeError: - output.Warn('Unknown URL attribute: %s' % attribute) - #end def TrySetAttribute - - def IsAbsolute(loc): - """ Decide if the URL is absolute or not """ - if not loc: - return False - narrow = encoder.NarrowText(loc, None) - (scheme, netloc, path, query, frag) = urlsplit(narrow) - if (not scheme) or (not netloc): - return False - return True - #end def IsAbsolute - IsAbsolute = staticmethod(IsAbsolute) - - def Canonicalize(loc): - """ Do encoding and canonicalization on a URL string """ - if not loc: - return loc - - # Let the encoder try to narrow it - narrow = encoder.NarrowText(loc, None) - - # Escape components individually - (scheme, netloc, path, query, frag) = urlsplit(narrow) - unr = '-._~' - sub = '!$&\'()*+,;=' - netloc = urllib.quote(netloc, unr + sub + '%:@/[]') - path = urllib.quote(path, unr + sub + '%:@/') - query = urllib.quote(query, unr + sub + '%:@/?') - frag = urllib.quote(frag, unr + sub + '%:@/?') - - # Try built-in IDNA encoding on the netloc - try: - (ignore, widenetloc, ignore, ignore, ignore) = urlsplit(loc) - for c in widenetloc: - if c >= unichr(128): - netloc = widenetloc.encode(ENC_IDNA) - netloc = urllib.quote(netloc, unr + sub + '%:@/[]') - break - except UnicodeError: - # urlsplit must have failed, based on implementation differences in the - # library. There is not much we can do here, except ignore it. - pass - except LookupError: - output.Warn('An International Domain Name (IDN) is being used, but this ' - 'version of Python does not have support for IDNA encoding. ' - ' (IDNA support was introduced in Python 2.3) The encoding ' - 'we have used instead is wrong and will probably not yield ' - 'valid URLs.') - bad_netloc = False - if '%' in netloc: - bad_netloc = True - - # Put it all back together - narrow = urlunsplit((scheme, netloc, path, query, frag)) - - # I let '%' through. Fix any that aren't pre-existing escapes. - HEXDIG = '0123456789abcdefABCDEF' - list = narrow.split('%') - narrow = list[0] - del list[0] - for item in list: - if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): - narrow = narrow + '%' + item - else: - narrow = narrow + '%25' + item - - # Issue a warning if this is a bad URL - if bad_netloc: - output.Warn('Invalid characters in the host or domain portion of a URL: ' - + narrow) - - return narrow - #end def Canonicalize - Canonicalize = staticmethod(Canonicalize) - - def Validate(self, base_url, allow_fragment): - """ Verify the data in this URL is well-formed, and override if not. """ - assert type(base_url) == types.StringType - - # Test (and normalize) the ref - if not self.loc: - output.Warn('Empty URL') - return False - if allow_fragment: - self.loc = urlparse.urljoin(base_url, self.loc) - if not self.loc.startswith(base_url): - output.Warn('Discarded URL for not starting with the base_url: %s' % - self.loc) - self.loc = None - return False - - # Test the lastmod - if self.lastmod: - match = False - self.lastmod = self.lastmod.upper() - for pattern in LASTMOD_PATTERNS: - match = pattern.match(self.lastmod) - if match: - break - if not match: - output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' - 'URL: %s' % (self.lastmod, self.loc)) - self.lastmod = None - - # Test the changefreq - if self.changefreq: - match = False - self.changefreq = self.changefreq.lower() - for pattern in CHANGEFREQ_PATTERNS: - if self.changefreq == pattern: - match = True - break - if not match: - output.Warn('Changefreq "%s" is not a valid change frequency on URL ' - ': %s' % (self.changefreq, self.loc)) - self.changefreq = None - - # Test the priority - if self.priority: - priority = -1.0 - try: - priority = float(self.priority) - except ValueError: - pass - if (priority < 0.0) or (priority > 1.0): - output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' - 'on URL: %s' % (self.priority, self.loc)) - self.priority = None - - return True - #end def Validate - - def MakeHash(self): - """ Provides a uniform way of hashing URLs """ - if not self.loc: - return None - if self.loc.endswith('/'): - return hashlib.md5(self.loc[:-1]).digest() - return hashlib.md5(self.loc).digest() - #end def MakeHash - - def Log(self, prefix='URL', level=3): - """ Dump the contents, empty or not, to the log. """ - out = prefix + ':' - - for attribute in self.__slots__: - value = getattr(self, attribute) - if not value: - value = '' - out = out + (' %s=[%s]' % (attribute, value)) - - output.Log('%s' % encoder.NarrowText(out, None), level) - #end def Log - - def WriteXML(self, file): - """ Dump non-empty contents to the output file, in XML format. """ - if not self.loc: - return - out = SITEURL_XML_PREFIX - - for attribute in self.__slots__: - value = getattr(self, attribute) - if value: - if type(value) == types.UnicodeType: - value = encoder.NarrowText(value, None) - elif type(value) != types.StringType: - value = str(value) - value = xml.sax.saxutils.escape(value) - out = out + (' <%s>%s</%s>\n' % (attribute, value, attribute)) - - out = out + SITEURL_XML_SUFFIX - file.write(out) - #end def WriteXML -#end class URL + """ URL is a smart structure grouping together the properties we + care about for a single web reference. """ + __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' + def __init__(self): + self.loc = None # URL -- in Narrow characters + self.lastmod = None # ISO8601 timestamp of last modify + self.changefreq = None # Text term for update frequency + self.priority = None # Float between 0 and 1 (inc) + # end def __init__ + + def __cmp__(self, other): + if self.loc < other.loc: + return -1 + if self.loc > other.loc: + return 1 + return 0 + # end def __cmp__ + + def TrySetAttribute(self, attribute, value): + """ Attempt to set the attribute to the value, with a pretty try + block around it. """ + if attribute == 'loc': + self.loc = self.Canonicalize(value) + else: + try: + setattr(self, attribute, value) + except AttributeError: + output.Warn('Unknown URL attribute: %s' % attribute) + # end def TrySetAttribute + + def IsAbsolute(loc): + """ Decide if the URL is absolute or not """ + if not loc: + return False + narrow = encoder.NarrowText(loc, None) + (scheme, netloc, path, query, frag) = urlsplit(narrow) + if (not scheme) or (not netloc): + return False + return True + # end def IsAbsolute + IsAbsolute = staticmethod(IsAbsolute) + + def Canonicalize(loc): + """ Do encoding and canonicalization on a URL string """ + if not loc: + return loc + + # Let the encoder try to narrow it + narrow = encoder.NarrowText(loc, None) + + # Escape components individually + (scheme, netloc, path, query, frag) = urlsplit(narrow) + unr = '-._~' + sub = '!$&\'()*+,;=' + netloc = urllib_quote(netloc, unr + sub + '%:@/[]') + path = urllib_quote(path, unr + sub + '%:@/') + query = urllib_quote(query, unr + sub + '%:@/?') + frag = urllib_quote(frag, unr + sub + '%:@/?') + + # Try built-in IDNA encoding on the netloc + try: + (ignore, widenetloc, ignore, ignore, ignore) = urlsplit(loc) + for c in widenetloc: + if c >= unichr(128): + netloc = widenetloc.encode(ENC_IDNA) + netloc = urllib_quote(netloc, unr + sub + '%:@/[]') + break + except UnicodeError: + # urlsplit must have failed, based on implementation differences in the + # library. There is not much we can do here, except ignore it. + pass + except LookupError: + output.Warn('An International Domain Name (IDN) is being used, but this ' + 'version of Python does not have support for IDNA encoding. ' + ' (IDNA support was introduced in Python 2.3) The encoding ' + 'we have used instead is wrong and will probably not yield ' + 'valid URLs.') + bad_netloc = False + if '%' in netloc: + bad_netloc = True + + # Put it all back together + narrow = urlunsplit((scheme, netloc, path, query, frag)) + + # I let '%' through. Fix any that aren't pre-existing escapes. + HEXDIG = '0123456789abcdefABCDEF' + list = narrow.split('%') + narrow = list[0] + del list[0] + for item in list: + if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): + narrow = narrow + '%' + item + else: + narrow = narrow + '%25' + item + + # Issue a warning if this is a bad URL + if bad_netloc: + output.Warn('Invalid characters in the host or domain portion of a URL: ' + + narrow) + + return narrow + # end def Canonicalize + Canonicalize = staticmethod(Canonicalize) + + def VerifyDate(self, date, metatag): + """Verify the date format is valid""" + match = False + if date: + date = date.upper() + for pattern in DATE_PATTERNS: + match = pattern.match(date) + if match: + return True + if not match: + output.Warn('The value for %s does not appear to be in ISO8601 ' + 'format on URL: %s' % (metatag, self.loc)) + return False + # end of VerifyDate + + def Validate(self, base_url, allow_fragment): + """ Verify the data in this URL is well-formed, and override if not. """ + assert isinstance(base_url, bytes_str) + + # Test (and normalize) the ref + if not self.loc: + output.Warn('Empty URL') + return False + if allow_fragment: + self.loc = urljoin(base_url, self.loc) + if not self.loc.startswith(base_url): + output.Warn('Discarded URL for not starting with the base_url: %s' % + self.loc) + self.loc = None + return False + + # Test the lastmod + if self.lastmod: + if not self.VerifyDate(self.lastmod, "lastmod"): + self.lastmod = None + + # Test the changefreq + if self.changefreq: + match = False + self.changefreq = self.changefreq.lower() + for pattern in CHANGEFREQ_PATTERNS: + if self.changefreq == pattern: + match = True + break + if not match: + output.Warn('Changefreq "%s" is not a valid change frequency on URL ' + ': %s' % (self.changefreq, self.loc)) + self.changefreq = None + + # Test the priority + if self.priority: + priority = -1.0 + try: + priority = float(self.priority) + except ValueError: + pass + if (priority < 0.0) or (priority > 1.0): + output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' + 'on URL: %s' % (self.priority, self.loc)) + self.priority = None -class Filter: - """ - A filter on the stream of URLs we find. A filter is, in essence, - a wildcard applied to the stream. You can think of this as an - operator that returns a tri-state when given a URL: - - True -- this URL is to be included in the sitemap - None -- this URL is undecided - False -- this URL is to be dropped from the sitemap - """ - - def __init__(self, attributes): - self._wildcard = None # Pattern for wildcard match - self._regexp = None # Pattern for regexp match - self._pass = False # "Drop" filter vs. "Pass" filter - - if not ValidateAttributes('FILTER', attributes, - ('pattern', 'type', 'action')): - return - - # Check error count on the way in - num_errors = output.num_errors + return True + # end def Validate + + def MakeHash(self): + """ Provides a uniform way of hashing URLs """ + if not self.loc: + return None + if self.loc.endswith('/'): + return get_hash(self.loc[:-1]) + return get_hash(self.loc) + # end def MakeHash + + def Log(self, prefix='URL', level=3): + """ Dump the contents, empty or not, to the log. """ + out = prefix + ':' + + for attribute in self.__slots__: + value = getattr(self, attribute) + if not value: + value = '' + out = out + (' %s=[%s]' % (attribute, value)) + + output.Log('%s' % encoder.NarrowText(out, None), level) + # end def Log + + def WriteXML(self, file): + """ Dump non-empty contents to the output file, in XML format. """ + if not self.loc: + return + out = SITEURL_XML_PREFIX + + for attribute in self.__slots__: + value = getattr(self, attribute) + if value: + if isinstance(value, unicode_str): + value = encoder.NarrowText(value, None) + elif not isinstance(value, bytes_str): + value = str(value) + value = xml.sax.saxutils.escape(value) + out = out + (' <%s>%s</%s>\n' % (attribute, value, attribute)) + + out = out + SITEURL_XML_SUFFIX + file.write(out) + # end def WriteXML +# end class URL + + +class NewsURL(URL): + """ NewsURL is a subclass of URL with News-Sitemap specific properties. """ + __slots__ = 'loc', 'lastmod', 'changefreq', 'priority', 'publication_date', \ + 'keywords', 'stock_tickers' - # Fetch the attributes - pattern = attributes.get('pattern') - type = attributes.get('type', 'wildcard') - action = attributes.get('action', 'drop') - if type: - type = type.lower() - if action: - action = action.lower() - - # Verify the attributes - if not pattern: - output.Error('On a filter you must specify a "pattern" to match') - elif (not type) or ((type != 'wildcard') and (type != 'regexp')): - output.Error('On a filter you must specify either \'type="wildcard"\' ' - 'or \'type="regexp"\'') - elif (action != 'pass') and (action != 'drop'): - output.Error('If you specify a filter action, it must be either ' - '\'action="pass"\' or \'action="drop"\'') - - # Set the rule - if action == 'drop': - self._pass = False - elif action == 'pass': - self._pass = True - - if type == 'wildcard': - self._wildcard = pattern - elif type == 'regexp': - try: - self._regexp = re.compile(pattern) - except re.error: - output.Error('Bad regular expression: %s' % pattern) - - # Log the final results iff we didn't add any errors - if num_errors == output.num_errors: - output.Log('Filter: %s any URL that matches %s "%s"' % - (action, type, pattern), 2) - #end def __init__ + def __init__(self): + URL.__init__(self) + self.publication_date = None # ISO8601 timestamp of publication date + self.keywords = None # Text keywords + self.stock_tickers = None # Text stock + # end def __init__ - def Apply(self, url): - """ Process the URL, as above. """ - if (not url) or (not url.loc): - return None + def Validate(self, base_url, allow_fragment): + """ Verify the data in this News URL is well-formed, and override if not. """ + assert isinstance(base_url, bytes_str) - if self._wildcard: - if fnmatch.fnmatchcase(url.loc, self._wildcard): - return self._pass - return None + if not URL.Validate(self, base_url, allow_fragment): + return False - if self._regexp: - if self._regexp.search(url.loc): - return self._pass - return None + if not URL.VerifyDate(self, self.publication_date, "publication_date"): + self.publication_date = None - assert False # unreachable - #end def Apply -#end class Filter + return True + # end def Validate + + def WriteXML(self, file): + """ Dump non-empty contents to the output file, in XML format. """ + if not self.loc: + return + out = SITEURL_XML_PREFIX + + # printed_news_tag indicates if news-specific metatags are present + printed_news_tag = False + for attribute in self.__slots__: + value = getattr(self, attribute) + if value: + if isinstance(value, unicode_str): + value = encoder.NarrowText(value, None) + elif not isinstance(value, bytes_str): + value = str(value) + value = xml.sax.saxutils.escape(value) + if attribute in NEWS_SPECIFIC_TAGS: + if not printed_news_tag: + printed_news_tag = True + out = out + NEWS_TAG_XML_PREFIX + out = out + (' <news:%s>%s</news:%s>\n' % + (attribute, value, attribute)) + else: + out = out + (' <%s>%s</%s>\n' % ( + attribute, value, attribute)) + + if printed_news_tag: + out = out + NEWS_TAG_XML_SUFFIX + out = out + SITEURL_XML_SUFFIX + file.write(out) + # end def WriteXML +# end class NewsURL -class InputURL: - """ - Each Input class knows how to yield a set of URLs from a data source. +class Filter: + """ + A filter on the stream of URLs we find. A filter is, in essence, + a wildcard applied to the stream. You can think of this as an + operator that returns a tri-state when given a URL: - This one handles a single URL, manually specified in the config file. - """ + True -- this URL is to be included in the sitemap + None -- this URL is undecided + False -- this URL is to be dropped from the sitemap + """ - def __init__(self, attributes): - self._url = None # The lonely URL + def __init__(self, attributes): + self._wildcard = None # Pattern for wildcard match + self._regexp = None # Pattern for regexp match + self._pass = False # "Drop" filter vs. "Pass" filter + + if not ValidateAttributes('FILTER', attributes, + ('pattern', 'type', 'action')): + return + + # Check error count on the way in + num_errors = output.num_errors + + # Fetch the attributes + pattern = attributes.get('pattern') + type = attributes.get('type', 'wildcard') + action = attributes.get('action', 'drop') + if type: + type = type.lower() + if action: + action = action.lower() + + # Verify the attributes + if not pattern: + output.Error('On a filter you must specify a "pattern" to match') + elif (not type) or ((type != 'wildcard') and (type != 'regexp')): + output.Error('On a filter you must specify either \'type="wildcard"\' ' + 'or \'type="regexp"\'') + elif (action != 'pass') and (action != 'drop'): + output.Error('If you specify a filter action, it must be either ' + '\'action="pass"\' or \'action="drop"\'') + + # Set the rule + if action == 'drop': + self._pass = False + elif action == 'pass': + self._pass = True + + if type == 'wildcard': + self._wildcard = pattern + elif type == 'regexp': + try: + self._regexp = re.compile(pattern) + except re.error: + output.Error('Bad regular expression: %s' % pattern) + + # Log the final results iff we didn't add any errors + if num_errors == output.num_errors: + output.Log('Filter: %s any URL that matches %s "%s"' % + (action, type, pattern), 2) + # end def __init__ + + def Apply(self, url): + """ Process the URL, as above. """ + if (not url) or (not url.loc): + return None + + if self._wildcard: + if fnmatch.fnmatchcase(url.loc, self._wildcard): + return self._pass + return None + + if self._regexp: + if self._regexp.search(url.loc): + return self._pass + return None + + assert False # unreachable + # end def Apply +# end class Filter - if not ValidateAttributes('URL', attributes, - ('href', 'lastmod', 'changefreq', 'priority')): - return - url = URL() - for attr in attributes.keys(): - if attr == 'href': - url.TrySetAttribute('loc', attributes[attr]) - else: - url.TrySetAttribute(attr, attributes[attr]) +class InputURL: + """ + Each Input class knows how to yield a set of URLs from a data source. - if not url.loc: - output.Error('Url entries must have an href attribute.') - return + This one handles a single URL, manually specified in the config file. + """ - self._url = url - output.Log('Input: From URL "%s"' % self._url.loc, 2) - #end def __init__ + def __init__(self, attributes): + self._url = None # The lonely URL - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - if self._url: - consumer(self._url, True) - #end def ProduceURLs -#end class InputURL + if not ValidateAttributes('URL', attributes, + ('href', 'lastmod', 'changefreq', 'priority')): + return + url = URL() + for attr in attributes.keys(): + if attr == 'href': + url.TrySetAttribute('loc', attributes[attr]) + else: + url.TrySetAttribute(attr, attributes[attr]) -class InputURLList: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a text file with a list of URLs - """ - - def __init__(self, attributes): - self._path = None # The file path - self._encoding = None # Encoding of that file - - if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): - return - - self._path = attributes.get('path') - self._encoding = attributes.get('encoding', ENC_UTF8) - if self._path: - self._path = encoder.MaybeNarrowPath(self._path) - if os.path.isfile(self._path): - output.Log('Input: From URLLIST "%s"' % self._path, 2) - else: - output.Error('Can not locate file: %s' % self._path) - self._path = None - else: - output.Error('Urllist entries must have a "path" attribute.') - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - - # Open the file - (frame, file) = OpenFileForRead(self._path, 'URLLIST') - if not file: - return - - # Iterate lines - linenum = 0 - for line in file.readlines(): - linenum = linenum + 1 - - # Strip comments and empty lines - if self._encoding: - line = encoder.WidenText(line, self._encoding) - line = line.strip() - if (not line) or line[0] == '#': - continue - - # Split the line on space - url = URL() - cols = line.split(' ') - for i in range(0,len(cols)): - cols[i] = cols[i].strip() - url.TrySetAttribute('loc', cols[0]) - - # Extract attributes from the other columns - for i in range(1,len(cols)): - if cols[i]: - try: - (attr_name, attr_val) = cols[i].split('=', 1) - url.TrySetAttribute(attr_name, attr_val) - except ValueError: - output.Warn('Line %d: Unable to parse attribute: %s' % - (linenum, cols[i])) - - # Pass it on - consumer(url, False) - - file.close() - if frame: - frame.close() - #end def ProduceURLs -#end class InputURLList + if not url.loc: + output.Error('Url entries must have an href attribute.') + return + self._url = url + output.Log('Input: From URL "%s"' % self._url.loc, 2) + # end def __init__ -class InputDirectory: - """ - Each Input class knows how to yield a set of URLs from a data source. + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if self._url: + consumer(self._url, True) + # end def ProduceURLs +# end class InputURL - This one handles a directory that acts as base for walking the filesystem. - """ - def __init__(self, attributes, base_url): - self._path = None # The directory - self._url = None # The URL equivelant - self._default_file = None +class InputURLList: + """ + Each Input class knows how to yield a set of URLs from a data source. - if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', - 'default_file')): - return + This one handles a text file with a list of URLs + """ - # Prep the path -- it MUST end in a sep - path = attributes.get('path') - if not path: - output.Error('Directory entries must have both "path" and "url" ' - 'attributes') - return - path = encoder.MaybeNarrowPath(path) - if not path.endswith(os.sep): - path = path + os.sep - if not os.path.isdir(path): - output.Error('Can not locate directory: %s' % path) - return - - # Prep the URL -- it MUST end in a sep - url = attributes.get('url') - if not url: - output.Error('Directory entries must have both "path" and "url" ' - 'attributes') - return - url = URL.Canonicalize(url) - if not url.endswith('/'): - url = url + '/' - if not url.startswith(base_url): - url = urlparse.urljoin(base_url, url) - if not url.startswith(base_url): - output.Error('The directory URL "%s" is not relative to the ' - 'base_url: %s' % (url, base_url)) - return - - # Prep the default file -- it MUST be just a filename - file = attributes.get('default_file') - if file: - file = encoder.MaybeNarrowPath(file) - if os.sep in file: - output.Error('The default_file "%s" can not include path information.' - % file) - file = None + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + + if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From URLLIST "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Urllist entries must have a "path" attribute.') + # end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'URLLIST') + if not file: + return + + # Iterate lines + linenum = 0 + for line in file.readlines(): + linenum = linenum + 1 + + # Strip comments and empty lines + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + if (not line) or line[0] == '#': + continue + + # Split the line on space + url = URL() + cols = line.split(' ') + for i in range(0, len(cols)): + cols[i] = cols[i].strip() + url.TrySetAttribute('loc', cols[0]) + + # Extract attributes from the other columns + for i in range(1, len(cols)): + if cols[i]: + try: + (attr_name, attr_val) = cols[i].split('=', 1) + url.TrySetAttribute(attr_name, attr_val) + except ValueError: + output.Warn('Line %d: Unable to parse attribute: %s' % + (linenum, cols[i])) + + # Pass it on + consumer(url, False) + + file.close() + if frame: + frame.close() + # end def ProduceURLs +# end class InputURLList + + +class InputNewsURLList: + """ + Each Input class knows how to yield a set of URLs from a data source. - self._path = path - self._url = url - self._default_file = file - if file: - output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' - % (path, url, file), 2) - else: - output.Log('Input: From DIRECTORY "%s" (%s) with no default file' - % (path, url), 2) - #end def __init__ + This one handles a text file with a list of News URLs and their metadata + """ - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - if not self._path: - return + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + self._tag_order = [] # Order of URL metadata + + if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding', 'tag_order')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + self._tag_order = attributes.get('tag_order') + + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From URLLIST "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Urllist entries must have a "path" attribute.') + + # parse tag_order into an array + # tag_order_ascii created for more readable logging + tag_order_ascii = [] + if self._tag_order: + self._tag_order = self._tag_order.split(",") + for i in range(0, len(self._tag_order)): + element = self._tag_order[i].strip().lower() + self._tag_order[i] = element + tag_order_ascii.append(element.encode('ascii')) + output.Log( + 'Input: From URLLIST tag order is "%s"' % tag_order_ascii, 0) + else: + output.Error('News Urllist configuration file must contain tag_order ' + 'to define Sitemap metatags.') + + # verify all tag_order inputs are valid + tag_order_dict = {} + for tag in self._tag_order: + tag_order_dict[tag] = "" + if not ValidateAttributes('URLLIST', tag_order_dict, + NEWS_SITEMAP_TAGS): + return + + # loc tag must be present + loc_tag = False + for tag in self._tag_order: + if tag == 'loc': + loc_tag = True + break + if not loc_tag: + output.Error('News Urllist tag_order in configuration file ' + 'does not contain "loc" value: %s' % tag_order_ascii) + # end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'URLLIST') + if not file: + return + + # Iterate lines + linenum = 0 + for line in file.readlines(): + linenum = linenum + 1 + + # Strip comments and empty lines + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + if (not line) or line[0] == '#': + continue + + # Split the line on tabs + url = NewsURL() + cols = line.split('\t') + for i in range(0, len(cols)): + cols[i] = cols[i].strip() + + for i in range(0, len(cols)): + if cols[i]: + attr_value = cols[i] + if i < len(self._tag_order): + attr_name = self._tag_order[i] + try: + url.TrySetAttribute(attr_name, attr_value) + except ValueError: + output.Warn('Line %d: Unable to parse attribute: %s' % + (linenum, cols[i])) + + # Pass it on + consumer(url, False) + + file.close() + if frame: + frame.close() + # end def ProduceURLs +# end class InputNewsURLList - root_path = self._path - root_URL = self._url - root_file = "index.html" - def DecideFilename(name): - assert "/" not in name +class InputDirectory: + """ + Each Input class knows how to yield a set of URLs from a data source. - if name in ( "robots.txt, " ): - return False + This one handles a directory that acts as base for walking the filesystem. + """ - if ".thumbnail." in name: - return False + def __init__(self, attributes, base_url): + self._path = None # The directory + self._url = None # The URL equivalent + self._default_file = None + self._remove_empty_directories = False + + if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', + 'default_file', 'remove_empty_directories')): + return + + # Prep the path -- it MUST end in a sep + path = attributes.get('path') + if not path: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + path = encoder.MaybeNarrowPath(path) + if not path.endswith(os.sep): + path = path + os.sep + if not os.path.isdir(path): + output.Error('Can not locate directory: %s' % path) + return + + # Prep the URL -- it MUST end in a sep + url = attributes.get('url') + if not url: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + url = URL.Canonicalize(url) + if not url.endswith('/'): + url = url + '/' + if not url.startswith(base_url): + url = urljoin(base_url, url) + if not url.startswith(base_url): + output.Error('The directory URL "%s" is not relative to the ' + 'base_url: %s' % (url, base_url)) + return + + # Prep the default file -- it MUST be just a filename + file = attributes.get('default_file') + if file: + file = encoder.MaybeNarrowPath(file) + if os.sep in file: + output.Error('The default_file "%s" can not include path information.' + % file) + file = None + + # Prep the remove_empty_directories -- default is false + remove_empty_directories = attributes.get('remove_empty_directories') + if remove_empty_directories: + if (remove_empty_directories == '1') or \ + (remove_empty_directories.lower() == 'true'): + remove_empty_directories = True + elif (remove_empty_directories == '0') or \ + (remove_empty_directories.lower() == 'false'): + remove_empty_directories = False + # otherwise the user set a non-default value + else: + output.Error('Configuration file remove_empty_directories ' + 'value is not recognized. Value must be true or false.') + return + else: + remove_empty_directories = False - if re.match( r"google[a-f0-9]+.html", name ): - return False + self._path = path + self._url = url + self._default_file = file + self._remove_empty_directories = remove_empty_directories - return not re.match( r"^index(\-\d+)?.html$", name ) + if file: + output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' + % (path, url, file), 2) + else: + output.Log('Input: From DIRECTORY "%s" (%s) with no default file' + % (path, url), 2) + # end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if not self._path: + return + + root_path = self._path + root_URL = self._url + root_file = self._default_file + remove_empty_directories = self._remove_empty_directories + + def HasReadPermissions(path): + """ Verifies a given path has read permissions. """ + stat_info = os.stat(path) + mode = stat_info[stat.ST_MODE] + if mode & stat.S_IREAD: + return True + else: + return None + + def PerFile(dirpath, name): + """ + Called once per file. + Note that 'name' will occasionally be None -- for a directory itself + """ + # Pull a timestamp + url = URL() + isdir = False + try: + if name: + path = os.path.join(dirpath, name) + else: + path = dirpath + isdir = os.path.isdir(path) + time = None + if isdir and root_file: + file = os.path.join(path, root_file) + try: + time = os.stat(file)[stat.ST_MTIME] + except OSError: + pass + if not time: + time = os.stat(path)[stat.ST_MTIME] + url.lastmod = TimestampISO8601(time) + except OSError: + pass + except ValueError: + pass + + # Build a URL + middle = dirpath[len(root_path):] + if os.sep != '/': + middle = middle.replace(os.sep, '/') + if middle: + middle = middle + '/' + if name: + middle = middle + name + if isdir: + middle = middle + '/' + url.TrySetAttribute( + 'loc', root_URL + encoder.WidenText(middle, None)) + + # Suppress default files. (All the way down here so we can log + # it.) + if name and (root_file == name): + url.Log(prefix='IGNORED (default file)', level=2) + return + + # Suppress directories when remove_empty_directories="true" + try: + if isdir: + if HasReadPermissions(path): + if remove_empty_directories == 'true' and \ + len(os.listdir(path)) == 0: + output.Log( + 'IGNORED empty directory %s' % str(path), level=1) + return + elif path == self._path: + output.Error('IGNORED configuration file directory input %s due ' + 'to file permissions' % self._path) + else: + output.Log('IGNORED files within directory %s due to file ' + 'permissions' % str(path), level=0) + except OSError: + pass + except ValueError: + pass + + consumer(url, False) + # end def PerFile + + def PerDirectory(ignore, dirpath, namelist): + """ + Called once per directory with a list of all the contained files/dirs. + """ + ignore = ignore # Avoid warnings of an unused parameter + + if not dirpath.startswith(root_path): + output.Warn('Unable to decide what the root path is for directory: ' + '%s' % dirpath) + return + + for name in namelist: + PerFile(dirpath, name) + # end def PerDirectory + + output.Log('Walking DIRECTORY "%s"' % self._path, 1) + PerFile(self._path, None) + os.path.walk(self._path, PerDirectory, None) + # end def ProduceURLs +# end class InputDirectory - def DecideDirectory(dirpath): - subpath = dirpath[len(root_path):] - assert not subpath.startswith( "/" ), subpath +class InputAccessLog: + """ + Each Input class knows how to yield a set of URLs from a data source. - for remove in ( "assets", ): - if subpath == remove or subpath.startswith( remove + os.path.sep ): - return False - else: - return True + This one handles access logs. It's non-trivial in that we want to + auto-detect log files in the Common Logfile Format (as used by Apache, + for instance) and the Extended Log File Format (as used by IIS, for + instance). + """ - def PerFile(dirpath, name): - """ - Called once per file. - Note that 'name' will occasionally be None -- for a directory itself - """ - if not DecideDirectory(dirpath): - return - - if name is not None and not DecideFilename(name): - return - - # Pull a timestamp - url = URL() - isdir = False - try: - if name: - path = os.path.join(dirpath, name) + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + self._is_elf = False # Extended Log File Format? + self._is_clf = False # Common Logfile Format? + self._elf_status = -1 # ELF field: '200' + self._elf_method = -1 # ELF field: 'HEAD' + self._elf_uri = -1 # ELF field: '/foo?bar=1' + self._elf_urifrag1 = -1 # ELF field: '/foo' + self._elf_urifrag2 = -1 # ELF field: 'bar=1' + + if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None else: - path = dirpath - isdir = os.path.isdir(path) - time = None - if isdir and root_file: - file = os.path.join(path, root_file) - try: - time = os.stat(file)[stat.ST_MTIME]; - except OSError: - pass - if not time: - time = os.stat(path)[stat.ST_MTIME]; - url.lastmod = TimestampISO8601(time) - except OSError: - pass - except ValueError: - pass - - # Build a URL - middle = dirpath[len(root_path):] - if os.sep != '/': - middle = middle.replace(os.sep, '/') - if middle: - middle = middle + '/' - if name: - middle = middle + name - if isdir: - middle = middle + '/' - url.TrySetAttribute('loc', root_URL + encoder.WidenText(middle, None)) - - # Suppress default files. (All the way down here so we can log it.) - if name and (root_file == name): - url.Log(prefix='IGNORED (default file)', level=2) - return - - consumer(url, False) - #end def PerFile - - def PerDirectory(ignore, dirpath, namelist): - """ - Called once per directory with a list of all the contained files/dirs. - """ - ignore = ignore # Avoid warnings of an unused parameter - - if not dirpath.startswith(root_path): - output.Warn('Unable to decide what the root path is for directory: ' - '%s' % dirpath) - return - - if not DecideDirectory(dirpath): - return - - for name in namelist: - PerFile(dirpath, name) - #end def PerDirectory - - output.Log('Walking DIRECTORY "%s"' % self._path, 1) - PerFile(self._path, None) - os.path.walk(self._path, PerDirectory, None) - #end def ProduceURLs -#end class InputDirectory - + output.Error('Accesslog entries must have a "path" attribute.') + # end def __init__ + + def RecognizeELFLine(self, line): + """ Recognize the Fields directive that heads an ELF file """ + if not line.startswith('#Fields:'): + return False + fields = line.split(' ') + del fields[0] + for i in range(0, len(fields)): + field = fields[i].strip() + if field == 'sc-status': + self._elf_status = i + elif field == 'cs-method': + self._elf_method = i + elif field == 'cs-uri': + self._elf_uri = i + elif field == 'cs-uri-stem': + self._elf_urifrag1 = i + elif field == 'cs-uri-query': + self._elf_urifrag2 = i + output.Log('Recognized an Extended Log File Format file.', 2) + return True + # end def RecognizeELFLine + + def GetELFLine(self, line): + """ Fetch the requested URL from an ELF line """ + fields = line.split(' ') + count = len(fields) + + # Verify status was Ok + if self._elf_status >= 0: + if self._elf_status >= count: + return None + if not fields[self._elf_status].strip() == '200': + return None + + # Verify method was HEAD or GET + if self._elf_method >= 0: + if self._elf_method >= count: + return None + if not fields[self._elf_method].strip() in ('HEAD', 'GET'): + return None + + # Pull the full URL if we can + if self._elf_uri >= 0: + if self._elf_uri >= count: + return None + url = fields[self._elf_uri].strip() + if url != '-': + return url + + # Put together a fragmentary URL + if self._elf_urifrag1 >= 0: + if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: + return None + urlfrag1 = fields[self._elf_urifrag1].strip() + urlfrag2 = None + if self._elf_urifrag2 >= 0: + urlfrag2 = fields[self._elf_urifrag2] + if urlfrag1 and (urlfrag1 != '-'): + if urlfrag2 and (urlfrag2 != '-'): + urlfrag1 = urlfrag1 + '?' + urlfrag2 + return urlfrag1 -class InputAccessLog: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles access logs. It's non-trivial in that we want to - auto-detect log files in the Common Logfile Format (as used by Apache, - for instance) and the Extended Log File Format (as used by IIS, for - instance). - """ - - def __init__(self, attributes): - self._path = None # The file path - self._encoding = None # Encoding of that file - self._is_elf = False # Extended Log File Format? - self._is_clf = False # Common Logfile Format? - self._elf_status = -1 # ELF field: '200' - self._elf_method = -1 # ELF field: 'HEAD' - self._elf_uri = -1 # ELF field: '/foo?bar=1' - self._elf_urifrag1 = -1 # ELF field: '/foo' - self._elf_urifrag2 = -1 # ELF field: 'bar=1' - - if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): - return - - self._path = attributes.get('path') - self._encoding = attributes.get('encoding', ENC_UTF8) - if self._path: - self._path = encoder.MaybeNarrowPath(self._path) - if os.path.isfile(self._path): - output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) - else: - output.Error('Can not locate file: %s' % self._path) - self._path = None - else: - output.Error('Accesslog entries must have a "path" attribute.') - #end def __init__ - - def RecognizeELFLine(self, line): - """ Recognize the Fields directive that heads an ELF file """ - if not line.startswith('#Fields:'): - return False - fields = line.split(' ') - del fields[0] - for i in range(0, len(fields)): - field = fields[i].strip() - if field == 'sc-status': - self._elf_status = i - elif field == 'cs-method': - self._elf_method = i - elif field == 'cs-uri': - self._elf_uri = i - elif field == 'cs-uri-stem': - self._elf_urifrag1 = i - elif field == 'cs-uri-query': - self._elf_urifrag2 = i - output.Log('Recognized an Extended Log File Format file.', 2) - return True - #end def RecognizeELFLine - - def GetELFLine(self, line): - """ Fetch the requested URL from an ELF line """ - fields = line.split(' ') - count = len(fields) - - # Verify status was Ok - if self._elf_status >= 0: - if self._elf_status >= count: return None - if not fields[self._elf_status].strip() == '200': + # end def GetELFLine + + def RecognizeCLFLine(self, line): + """ Try to tokenize a logfile line according to CLF pattern and see if + it works. """ + match = ACCESSLOG_CLF_PATTERN.match(line) + recognize = match and (match.group(1) in ('HEAD', 'GET')) + if recognize: + output.Log('Recognized a Common Logfile Format file.', 2) + return recognize + # end def RecognizeCLFLine + + def GetCLFLine(self, line): + """ Fetch the requested URL from a CLF line """ + match = ACCESSLOG_CLF_PATTERN.match(line) + if match: + request = match.group(1) + if request in ('HEAD', 'GET'): + return match.group(2) return None + # end def GetCLFLine + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') + if not file: + return + + # Iterate lines + for line in file.readlines(): + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + + # If we don't know the format yet, try them both + if (not self._is_clf) and (not self._is_elf): + self._is_elf = self.RecognizeELFLine(line) + self._is_clf = self.RecognizeCLFLine(line) + + # Digest the line + match = None + if self._is_elf: + match = self.GetELFLine(line) + elif self._is_clf: + match = self.GetCLFLine(line) + if not match: + continue + + # Pass it on + url = URL() + url.TrySetAttribute('loc', match) + consumer(url, True) + + file.close() + if frame: + frame.close() + # end def ProduceURLs +# end class InputAccessLog - # Verify method was HEAD or GET - if self._elf_method >= 0: - if self._elf_method >= count: - return None - if not fields[self._elf_method].strip() in ('HEAD', 'GET'): - return None - # Pull the full URL if we can - if self._elf_uri >= 0: - if self._elf_uri >= count: - return None - url = fields[self._elf_uri].strip() - if url != '-': - return url +class FilePathGenerator: + """ + This class generates filenames in a series, upon request. + You can request any iteration number at any time, you don't + have to go in order. + + Example of iterations for '/path/foo.xml.gz': + 0 --> /path/foo.xml.gz + 1 --> /path/foo1.xml.gz + 2 --> /path/foo2.xml.gz + _index.xml --> /path/foo_index.xml + """ - # Put together a fragmentary URL - if self._elf_urifrag1 >= 0: - if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: - return None - urlfrag1 = fields[self._elf_urifrag1].strip() - urlfrag2 = None - if self._elf_urifrag2 >= 0: - urlfrag2 = fields[self._elf_urifrag2] - if urlfrag1 and (urlfrag1 != '-'): - if urlfrag2 and (urlfrag2 != '-'): - urlfrag1 = urlfrag1 + '?' + urlfrag2 - return urlfrag1 + def __init__(self): + self.is_gzip = False # Is this a GZIP file? + + self._path = None # '/path/' + self._prefix = None # 'foo' + self._suffix = None # '.xml.gz' + # end def __init__ + + def Preload(self, path): + """ Splits up a path into forms ready for recombination. """ + path = encoder.MaybeNarrowPath(path) + + # Get down to a base name + path = os.path.normpath(path) + base = os.path.basename(path).lower() + if not base: + output.Error('Couldn\'t parse the file path: %s' % path) + return False + lenbase = len(base) + + # Recognize extension + lensuffix = 0 + compare_suffix = ['.xml', '.xml.gz', '.gz'] + for suffix in compare_suffix: + if base.endswith(suffix): + lensuffix = len(suffix) + break + if not lensuffix: + output.Error('The path "%s" doesn\'t end in a supported file ' + 'extension.' % path) + return False + self.is_gzip = suffix.endswith('.gz') + + # Split the original path + lenpath = len(path) + self._path = path[:lenpath - lenbase] + self._prefix = path[lenpath - lenbase:lenpath - lensuffix] + self._suffix = path[lenpath - lensuffix:] - return None - #end def GetELFLine - - def RecognizeCLFLine(self, line): - """ Try to tokenize a logfile line according to CLF pattern and see if - it works. """ - match = ACCESSLOG_CLF_PATTERN.match(line) - recognize = match and (match.group(1) in ('HEAD', 'GET')) - if recognize: - output.Log('Recognized a Common Logfile Format file.', 2) - return recognize - #end def RecognizeCLFLine - - def GetCLFLine(self, line): - """ Fetch the requested URL from a CLF line """ - match = ACCESSLOG_CLF_PATTERN.match(line) - if match: - request = match.group(1) - if request in ('HEAD', 'GET'): - return match.group(2) - return None - #end def GetCLFLine - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - - # Open the file - (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') - if not file: - return - - # Iterate lines - for line in file.readlines(): - if self._encoding: - line = encoder.WidenText(line, self._encoding) - line = line.strip() - - # If we don't know the format yet, try them both - if (not self._is_clf) and (not self._is_elf): - self._is_elf = self.RecognizeELFLine(line) - self._is_clf = self.RecognizeCLFLine(line) - - # Digest the line - match = None - if self._is_elf: - match = self.GetELFLine(line) - elif self._is_clf: - match = self.GetCLFLine(line) - if not match: - continue - - # Pass it on - url = URL() - url.TrySetAttribute('loc', match) - consumer(url, True) - - file.close() - if frame: - frame.close() - #end def ProduceURLs -#end class InputAccessLog - - -class InputSitemap(xml.sax.handler.ContentHandler): - - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles Sitemap files and Sitemap index files. For the sake - of simplicity in design (and simplicity in interfacing with the SAX - package), we do not handle these at the same time, recursively. Instead - we read an index file completely and make a list of Sitemap files, then - go back and process each Sitemap. - """ - - class _ContextBase(object): - - """Base class for context handlers in our SAX processing. A context - handler is a class that is responsible for understanding one level of - depth in the XML schema. The class knows what sub-tags are allowed, - and doing any processing specific for the tag we're in. - - This base class is the API filled in by specific context handlers, - all defined below. - """ + return True + # end def Preload + + def GeneratePath(self, instance): + """ Generates the iterations, as described above. """ + prefix = self._path + self._prefix + if isinstance(instance, int): + if instance: + return '%s%d%s' % (prefix, instance, self._suffix) + return prefix + self._suffix + return prefix + instance + # end def GeneratePath + + def GenerateURL(self, instance, root_url): + """ Generates iterations, but as a URL instead of a path. """ + prefix = root_url + self._prefix + retval = None + if isinstance(instance, int): + if instance: + retval = '%s%d%s' % (prefix, instance, self._suffix) + else: + retval = prefix + self._suffix + else: + retval = prefix + instance + return URL.Canonicalize(retval) + # end def GenerateURL - def __init__(self, subtags): - """Initialize with a sequence of the sub-tags that would be valid in - this context.""" - self._allowed_tags = subtags # Sequence of sub-tags we can have - self._last_tag = None # Most recent seen sub-tag - #end def __init__ - - def AcceptTag(self, tag): - """Returns True iff opening a sub-tag is valid in this context.""" - valid = tag in self._allowed_tags - if valid: - self._last_tag = tag - else: - self._last_tag = None - return valid - #end def AcceptTag - - def AcceptText(self, text): - """Returns True iff a blurb of text is valid in this context.""" - return False - #end def AcceptText - - def Open(self): - """The context is opening. Do initialization.""" - pass - #end def Open - - def Close(self): - """The context is closing. Return our result, if any.""" - pass - #end def Close - - def Return(self, result): - """We're returning to this context after handling a sub-tag. This - method is called with the result data from the sub-tag that just - closed. Here in _ContextBase, if we ever see a result it means - the derived child class forgot to override this method.""" - if result: - raise NotImplementedError - #end def Return - #end class _ContextBase - - class _ContextUrlSet(_ContextBase): - - """Context handler for the document node in a Sitemap.""" + def GenerateWildURL(self, root_url): + """ Generates a wildcard that should match all our iterations """ + prefix = URL.Canonicalize(root_url + self._prefix) + temp = URL.Canonicalize(prefix + self._suffix) + suffix = temp[len(prefix):] + return prefix + '*' + suffix + # end def GenerateURL +# end class FilePathGenerator - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('url',)) - #end def __init__ - #end class _ContextUrlSet - - class _ContextUrl(_ContextBase): - - """Context handler for a URL node in a Sitemap.""" - - def __init__(self, consumer): - """Initialize this context handler with the callable consumer that - wants our URLs.""" - InputSitemap._ContextBase.__init__(self, URL.__slots__) - self._url = None # The URL object we're building - self._consumer = consumer # Who wants to consume it - #end def __init__ - - def Open(self): - """Initialize the URL.""" - assert not self._url - self._url = URL() - #end def Open - - def Close(self): - """Pass the URL to the consumer and reset it to None.""" - assert self._url - self._consumer(self._url, False) - self._url = None - #end def Close - - def Return(self, result): - """A value context has closed, absorb the data it gave us.""" - assert self._url - if result: - self._url.TrySetAttribute(self._last_tag, result) - #end def Return - #end class _ContextUrl - - class _ContextSitemapIndex(_ContextBase): - - """Context handler for the document node in an index file.""" - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('sitemap',)) - self._loclist = [] # List of accumulated Sitemap URLs - #end def __init__ - - def Open(self): - """Just a quick verify of state.""" - assert not self._loclist - #end def Open - - def Close(self): - """Return our list of accumulated URLs.""" - if self._loclist: - temp = self._loclist - self._loclist = [] - return temp - #end def Close - - def Return(self, result): - """Getting a new loc URL, add it to the collection.""" - if result: - self._loclist.append(result) - #end def Return - #end class _ContextSitemapIndex - - class _ContextSitemap(_ContextBase): - - """Context handler for a Sitemap entry in an index file.""" +class PerURLStatistics: + """ Keep track of some simple per-URL statistics, like file extension. """ def __init__(self): - InputSitemap._ContextBase.__init__(self, ('loc', 'lastmod')) - self._loc = None # The URL to the Sitemap - #end def __init__ - - def Open(self): - """Just a quick verify of state.""" - assert not self._loc - #end def Open - - def Close(self): - """Return our URL to our parent.""" - if self._loc: - temp = self._loc - self._loc = None - return temp - output.Warn('In the Sitemap index file, a "sitemap" entry had no "loc".') - #end def Close - - def Return(self, result): - """A value has closed. If it was a 'loc', absorb it.""" - if result and (self._last_tag == 'loc'): - self._loc = result - #end def Return - #end class _ContextSitemap - - class _ContextValue(_ContextBase): - - """Context handler for a single value. We return just the value. The - higher level context has to remember what tag led into us.""" + self._extensions = {} # Count of extension instances + # end def __init__ + + def Consume(self, url): + """ Log some stats for the URL. At the moment, that means extension. """ + if url and url.loc: + (scheme, netloc, path, query, frag) = urlsplit(url.loc) + if not path: + return + + # Recognize directories + if path.endswith('/'): + if '/' in self._extensions: + self._extensions['/'] = self._extensions['/'] + 1 + else: + self._extensions['/'] = 1 + return + + # Strip to a filename + i = path.rfind('/') + if i >= 0: + assert i < len(path) + path = path[i:] + + # Find extension + i = path.rfind('.') + if i > 0: + assert i < len(path) + ext = path[i:].lower() + if ext in self._extensions: + self._extensions[ext] = self._extensions[ext] + 1 + else: + self._extensions[ext] = 1 + else: + if '(no extension)' in self._extensions: + self._extensions['(no extension)'] = self._extensions[ + '(no extension)'] + 1 + else: + self._extensions['(no extension)'] = 1 + # end def Consume + + def Log(self): + """ Dump out stats to the output. """ + if len(self._extensions): + output.Log('Count of file extensions on URLs:', 1) + set = sorted(self._extensions.keys()) + for ext in set: + output.Log(' %7d %s' % (self._extensions[ext], ext), 1) + # end def Log - def __init__(self): - InputSitemap._ContextBase.__init__(self, ()) - self._text = None - #end def __init__ - - def AcceptText(self, text): - """Allow all text, adding it to our buffer.""" - if self._text: - self._text = self._text + text - else: - self._text = text - return True - #end def AcceptText - - def Open(self): - """Initialize our buffer.""" - self._text = None - #end def Open - - def Close(self): - """Return what's in our buffer.""" - text = self._text - self._text = None - if text: - text = text.strip() - return text - #end def Close - #end class _ContextValue - - def __init__(self, attributes): - """Initialize with a dictionary of attributes from our entry in the - config file.""" - xml.sax.handler.ContentHandler.__init__(self) - self._pathlist = None # A list of files - self._current = -1 # Current context in _contexts - self._contexts = None # The stack of contexts we allow - self._contexts_idx = None # ...contexts for index files - self._contexts_stm = None # ...contexts for Sitemap files - - if not ValidateAttributes('SITEMAP', attributes, ['path']): - return - - # Init the first file path - path = attributes.get('path') - if path: - path = encoder.MaybeNarrowPath(path) - if os.path.isfile(path): - output.Log('Input: From SITEMAP "%s"' % path, 2) - self._pathlist = [path] - else: - output.Error('Can not locate file "%s"' % path) - else: - output.Error('Sitemap entries must have a "path" attribute.') - #end def __init__ - - def ProduceURLs(self, consumer): - """In general: Produces URLs from our data source, hand them to the - callable consumer. - - In specific: Iterate over our list of paths and delegate the actual - processing to helper methods. This is a complexity no other data source - needs to suffer. We are unique in that we can have files that tell us - to bring in other files. - - Note the decision to allow an index file or not is made in this method. - If we call our parser with (self._contexts == None) the parser will - grab whichever context stack can handle the file. IE: index is allowed. - If instead we set (self._contexts = ...) before parsing, the parser - will only use the stack we specify. IE: index not allowed. - """ - # Set up two stacks of contexts - self._contexts_idx = [InputSitemap._ContextSitemapIndex(), - InputSitemap._ContextSitemap(), - InputSitemap._ContextValue()] - - self._contexts_stm = [InputSitemap._ContextUrlSet(), - InputSitemap._ContextUrl(consumer), - InputSitemap._ContextValue()] - - # Process the first file - assert self._pathlist - path = self._pathlist[0] - self._contexts = None # We allow an index file here - self._ProcessFile(path) - - # Iterate over remaining files - self._contexts = self._contexts_stm # No index files allowed - for path in self._pathlist[1:]: - self._ProcessFile(path) - #end def ProduceURLs - - def _ProcessFile(self, path): - """Do per-file reading/parsing/consuming for the file path passed in.""" - assert path - - # Open our file - (frame, file) = OpenFileForRead(path, 'SITEMAP') - if not file: - return - - # Rev up the SAX engine - try: - self._current = -1 - xml.sax.parse(file, self) - except SchemaError: - output.Error('An error in file "%s" made us abort reading the Sitemap.' - % path) - except IOError: - output.Error('Cannot read from file "%s"' % path) - except xml.sax._exceptions.SAXParseException as e: - output.Error('XML error in the file "%s" (line %d, column %d): %s' % - (path, e._linenum, e._colnum, e.getMessage())) - - # Clean up - file.close() - if frame: - frame.close() - #end def _ProcessFile - - def _MungeLocationListIntoFiles(self, urllist): - """Given a list of URLs, munge them into our self._pathlist property. - We do this by assuming all the files live in the same directory as - the first file in the existing pathlist. That is, we assume a - Sitemap index points to Sitemaps only in the same directory. This - is not true in general, but will be true for any output produced - by this script. - """ - assert self._pathlist - path = self._pathlist[0] - path = os.path.normpath(path) - dir = os.path.dirname(path) - wide = False - if type(path) == types.UnicodeType: - wide = True - - for url in urllist: - url = URL.Canonicalize(url) - output.Log('Index points to Sitemap file at: %s' % url, 2) - (scheme, netloc, path, query, frag) = urlsplit(url) - file = os.path.basename(path) - file = urllib.unquote(file) - if wide: - file = encoder.WidenText(file) - if dir: - file = dir + os.sep + file - if file: - self._pathlist.append(file) - output.Log('Will attempt to read Sitemap file: %s' % file, 1) - #end def _MungeLocationListIntoFiles - - def startElement(self, tag, attributes): - """SAX processing, called per node in the config stream. - As long as the new tag is legal in our current context, this - becomes an Open call on one context deeper. - """ - # If this is the document node, we may have to look for a context stack - if (self._current < 0) and not self._contexts: - assert self._contexts_idx and self._contexts_stm - if tag == 'urlset': - self._contexts = self._contexts_stm - elif tag == 'sitemapindex': - self._contexts = self._contexts_idx - output.Log('File is a Sitemap index.', 2) - else: - output.Error('The document appears to be neither a Sitemap nor a ' - 'Sitemap index.') - raise SchemaError - - # Display a kinder error on a common mistake - if (self._current < 0) and (self._contexts == self._contexts_stm) and ( - tag == 'sitemapindex'): - output.Error('A Sitemap index can not refer to another Sitemap index.') - raise SchemaError - - # Verify no unexpected attributes - if attributes: - text = '' - for attr in attributes.keys(): - # The document node will probably have namespaces - if self._current < 0: - if attr.find('xmlns') >= 0: - continue - if attr.find('xsi') >= 0: - continue - if text: - text = text + ', ' - text = text + attr - if text: - output.Warn('Did not expect any attributes on any tag, instead tag ' - '"%s" had attributes: %s' % (tag, text)) - - # Switch contexts - if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): - self._current = self._current + 1 - assert self._current < len(self._contexts) - self._contexts[self._current].Open() - else: - output.Error('Can not accept tag "%s" where it appears.' % tag) - raise SchemaError - #end def startElement - - def endElement(self, tag): - """SAX processing, called per node in the config stream. - This becomes a call to Close on one context followed by a call - to Return on the previous. + +class Sitemap(xml.sax.handler.ContentHandler): """ - tag = tag # Avoid warning on unused argument - assert self._current >= 0 - retval = self._contexts[self._current].Close() - self._current = self._current - 1 - if self._current >= 0: - self._contexts[self._current].Return(retval) - elif retval and (self._contexts == self._contexts_idx): - self._MungeLocationListIntoFiles(retval) - #end def endElement - - def characters(self, text): - """SAX processing, called when text values are read. Important to - note that one single text value may be split across multiple calls - of this method. + This is the big workhorse class that processes your inputs and spits + out sitemap files. It is built as a SAX handler for set up purposes. + That is, it processes an XML stream to bring itself up. """ - if (self._current < 0) or ( - not self._contexts[self._current].AcceptText(text)): - if text.strip(): - output.Error('Can not accept text "%s" where it appears.' % text) - raise SchemaError - #end def characters -#end class InputSitemap - - -class FilePathGenerator: - """ - This class generates filenames in a series, upon request. - You can request any iteration number at any time, you don't - have to go in order. - - Example of iterations for '/path/foo.xml.gz': - 0 --> /path/foo.xml.gz - 1 --> /path/foo1.xml.gz - 2 --> /path/foo2.xml.gz - _index.xml --> /path/foo_index.xml - """ - - def __init__(self): - self.is_gzip = False # Is this a GZIP file? - - self._path = None # '/path/' - self._prefix = None # 'foo' - self._suffix = None # '.xml.gz' - #end def __init__ - - def Preload(self, path): - """ Splits up a path into forms ready for recombination. """ - path = encoder.MaybeNarrowPath(path) - - # Get down to a base name - path = os.path.normpath(path) - base = os.path.basename(path).lower() - if not base: - output.Error('Couldn\'t parse the file path: %s' % path) - return False - lenbase = len(base) - - # Recognize extension - lensuffix = 0 - compare_suffix = ['.xml', '.xml.gz', '.gz'] - for suffix in compare_suffix: - if base.endswith(suffix): - lensuffix = len(suffix) - break - if not lensuffix: - output.Error('The path "%s" doesn\'t end in a supported file ' - 'extension.' % path) - return False - self.is_gzip = suffix.endswith('.gz') - - # Split the original path - lenpath = len(path) - self._path = path[:lenpath-lenbase] - self._prefix = path[lenpath-lenbase:lenpath-lensuffix] - self._suffix = path[lenpath-lensuffix:] - - return True - #end def Preload - - def GeneratePath(self, instance): - """ Generates the iterations, as described above. """ - prefix = self._path + self._prefix - if type(instance) == types.IntType: - if instance: - return '%s%d%s' % (prefix, instance, self._suffix) - return prefix + self._suffix - return prefix + instance - #end def GeneratePath - - def GenerateURL(self, instance, root_url): - """ Generates iterations, but as a URL instead of a path. """ - prefix = root_url + self._prefix - retval = None - if type(instance) == types.IntType: - if instance: - retval = '%s%d%s' % (prefix, instance, self._suffix) - else: - retval = prefix + self._suffix - else: - retval = prefix + instance - return URL.Canonicalize(retval) - #end def GenerateURL - def GenerateWildURL(self, root_url): - """ Generates a wildcard that should match all our iterations """ - prefix = URL.Canonicalize(root_url + self._prefix) - temp = URL.Canonicalize(prefix + self._suffix) - suffix = temp[len(prefix):] - return prefix + '*' + suffix - #end def GenerateURL -#end class FilePathGenerator + def __init__(self, suppress_notify): + xml.sax.handler.ContentHandler.__init__(self) + self._filters = [] # Filter objects + self._inputs = [] # Input objects + self._urls = {} # Maps URLs to count of dups + self._set = [] # Current set of URLs + self._filegen = None # Path generator for output files + self._wildurl1 = None # Sitemap URLs to filter out + self._wildurl2 = None # Sitemap URLs to filter out + self._sitemaps = 0 # Number of output files + # We init _dup_max to 2 so the default priority is 0.5 instead of 1.0 + self._dup_max = 2 # Max number of duplicate URLs + self._stat = PerURLStatistics() # Some simple stats + self._in_site = False # SAX: are we in a Site node? + self._in_Site_ever = False # SAX: were we ever in a Site? + + self._default_enc = None # Best encoding to try on URLs + self._base_url = None # Prefix to all valid URLs + self._store_into = None # Output filepath + self._sitemap_type = None # Sitemap type (web, mobile or news) + self._suppress = suppress_notify # Suppress notify of servers + # end def __init__ + + def ValidateBasicConfig(self): + """ Verifies (and cleans up) the basic user-configurable options. """ + all_good = True + + if self._default_enc: + encoder.SetUserEncoding(self._default_enc) + + # Canonicalize the base_url + if all_good and not self._base_url: + output.Error('A site needs a "base_url" attribute.') + all_good = False + if all_good and not URL.IsAbsolute(self._base_url): + output.Error('The "base_url" must be absolute, not relative: %s' % + self._base_url) + all_good = False + if all_good: + self._base_url = URL.Canonicalize(self._base_url) + if not self._base_url.endswith('/'): + self._base_url = self._base_url + '/' + output.Log('BaseURL is set to: %s' % self._base_url, 2) + + # Load store_into into a generator + if all_good: + if self._store_into: + self._filegen = FilePathGenerator() + if not self._filegen.Preload(self._store_into): + all_good = False + else: + output.Error('A site needs a "store_into" attribute.') + all_good = False + + # Ask the generator for patterns on what its output will look like + if all_good: + self._wildurl1 = self._filegen.GenerateWildURL(self._base_url) + self._wildurl2 = self._filegen.GenerateURL(SITEINDEX_SUFFIX, + self._base_url) + + # Unify various forms of False + if all_good: + if self._suppress: + if (isinstance(self._suppress, bytes_str)) or (isinstance(self._suppress, unicode_str)): + if (self._suppress == '0') or (self._suppress.lower() == 'false'): + self._suppress = False + + # Clean up the sitemap_type + if all_good: + match = False + # If sitemap_type is not specified, default to web sitemap + if not self._sitemap_type: + self._sitemap_type = 'web' + else: + self._sitemap_type = self._sitemap_type.lower() + for pattern in SITEMAP_TYPES: + if self._sitemap_type == pattern: + match = True + break + if not match: + output.Error('The "sitemap_type" value must be "web", "mobile" ' + 'or "news": %s' % self._sitemap_type) + all_good = False + output.Log('The Sitemap type is %s Sitemap.' % + self._sitemap_type.upper(), 0) + + # Done + if not all_good: + output.Log('See "example_config.xml" for more information.', 0) + return all_good + # end def ValidateBasicConfig + + def Generate(self): + """ Run over all the Inputs and ask them to Produce """ + # Run the inputs + for input in self._inputs: + input.ProduceURLs(self.ConsumeURL) + + # Do last flushes + if len(self._set): + self.FlushSet() + if not self._sitemaps: + output.Warn('No URLs were recorded, writing an empty sitemap.') + self.FlushSet() + + # Write an index as needed + if self._sitemaps > 1: + self.WriteIndex() + + # Notify + self.NotifySearch() + + # Dump stats + self._stat.Log() + # end def Generate + + def ConsumeURL(self, url, allow_fragment): + """ + All per-URL processing comes together here, regardless of Input. + Here we run filters, remove duplicates, spill to disk as needed, etc. + + """ + if not url: + return + + # Validate + if not url.Validate(self._base_url, allow_fragment): + return + + # Run filters + accept = None + for filter in self._filters: + accept = filter.Apply(url) + if accept is not None: + break + if not (accept or (accept is None)): + url.Log(prefix='FILTERED', level=2) + return + + # Ignore our out output URLs + if fnmatch.fnmatchcase(url.loc, self._wildurl1) or fnmatch.fnmatchcase( + url.loc, self._wildurl2): + url.Log(prefix='IGNORED (output file)', level=2) + return + + # Note the sighting + hash = url.MakeHash() + if hash in self._urls: + dup = self._urls[hash] + if dup > 0: + dup = dup + 1 + self._urls[hash] = dup + if self._dup_max < dup: + self._dup_max = dup + url.Log(prefix='DUPLICATE') + return + + # Acceptance -- add to set + self._urls[hash] = 1 + self._set.append(url) + self._stat.Consume(url) + url.Log() + + # Flush the set if needed + if len(self._set) >= MAXURLS_PER_SITEMAP: + self.FlushSet() + # end def ConsumeURL + + def FlushSet(self): + """ + Flush the current set of URLs to the output. This is a little + slow because we like to sort them all and normalize the priorities + before dumping. + """ + + # Determine what Sitemap header to use (News or General) + if self._sitemap_type == 'news': + sitemap_header = NEWS_SITEMAP_HEADER + else: + sitemap_header = GENERAL_SITEMAP_HEADER + + # Sort and normalize + output.Log('Sorting and normalizing collected URLs.', 1) + self._set.sort() + for url in self._set: + hash = url.MakeHash() + dup = self._urls[hash] + if dup > 0: + self._urls[hash] = -1 + if not url.priority: + url.priority = '%.4f' % (float(dup) / float(self._dup_max)) + + # Get the filename we're going to write to + filename = self._filegen.GeneratePath(self._sitemaps) + if not filename: + output.Fatal('Unexpected: Couldn\'t generate output filename.') + self._sitemaps = self._sitemaps + 1 + output.Log('Writing Sitemap file "%s" with %d URLs' % + (filename, len(self._set)), 1) + + # Write to it + frame = None + file = None + try: + if self._filegen.is_gzip: + basename = os.path.basename(filename) + frame = open(filename, 'wb') + file = gzip.GzipFile( + fileobj=frame, filename=basename, mode='wt') + else: + file = open(filename, 'wt') + + file.write(sitemap_header) + for url in self._set: + url.WriteXML(file) + file.write(SITEMAP_FOOTER) + + file.close() + if frame: + frame.close() + + frame = None + file = None + except IOError: + output.Fatal('Couldn\'t write out to file: %s' % filename) + os.chmod(filename, 0o0644) + + # Flush + self._set = [] + # end def FlushSet + + def WriteIndex(self): + """ Write the master index of all Sitemap files """ + # Make a filename + filename = self._filegen.GeneratePath(SITEINDEX_SUFFIX) + if not filename: + output.Fatal( + 'Unexpected: Couldn\'t generate output index filename.') + output.Log('Writing index file "%s" with %d Sitemaps' % + (filename, self._sitemaps), 1) + + # Determine what Sitemap index header to use (News or General) + if self._sitemap_type == 'news': + sitemap_index_header = NEWS_SITEMAP_HEADER + else: + sitemap_index_header = GENERAL_SITEMAP_HEADER + + # Make a lastmod time + lastmod = TimestampISO8601(time.time()) + + # Write to it + try: + fd = open(filename, 'wt') + fd.write(sitemap_index_header) + + for mapnumber in range(0, self._sitemaps): + # Write the entry + mapurl = self._filegen.GenerateURL(mapnumber, self._base_url) + mapattributes = {'loc': mapurl, 'lastmod': lastmod} + fd.write(SITEINDEX_ENTRY % mapattributes) + + fd.write(SITEINDEX_FOOTER) + + fd.close() + fd = None + except IOError: + output.Fatal('Couldn\'t write out to file: %s' % filename) + os.chmod(filename, 0o0644) + # end def WriteIndex + + def NotifySearch(self): + """ Send notification of the new Sitemap(s) to the search engines. """ + if self._suppress: + output.Log('Search engine notification is suppressed.', 1) + return + + output.Log('Notifying search engines.', 1) + + # Override the urllib's opener class with one that doesn't ignore 404s + class ExceptionURLopener(FancyURLopener): + def http_error_default(self, url, fp, errcode, errmsg, headers): + output.Log('HTTP error %d: %s' % (errcode, errmsg), 2) + raise IOError + # end def http_error_default + # end class ExceptionURLOpener + if sys.version_info[0] == 3: + old_opener = urllib.request._urlopener + urllib.request._urlopener = ExceptionURLopener() + else: + old_opener = urllib._urlopener + urllib._urlopener = ExceptionURLopener() -class PerURLStatistics: - """ Keep track of some simple per-URL statistics, like file extension. """ - - def __init__(self): - self._extensions = {} # Count of extension instances - #end def __init__ - - def Consume(self, url): - """ Log some stats for the URL. At the moment, that means extension. """ - if url and url.loc: - (scheme, netloc, path, query, frag) = urlsplit(url.loc) - if not path: - return - - # Recognize directories - if path.endswith('/'): - if self._extensions.has_key('/'): - self._extensions['/'] = self._extensions['/'] + 1 + # Build the URL we want to send in + if self._sitemaps > 1: + url = self._filegen.GenerateURL(SITEINDEX_SUFFIX, self._base_url) else: - self._extensions['/'] = 1 - return - - # Strip to a filename - i = path.rfind('/') - if i >= 0: - assert i < len(path) - path = path[i:] - - # Find extension - i = path.rfind('.') - if i > 0: - assert i < len(path) - ext = path[i:].lower() - if self._extensions.has_key(ext): - self._extensions[ext] = self._extensions[ext] + 1 + url = self._filegen.GenerateURL(0, self._base_url) + + # Test if we can hit it ourselves + try: + u = urlopen(url) + u.close() + except IOError: + output.Error('When attempting to access our generated Sitemap at the ' + 'following URL:\n %s\n we failed to read it. Please ' + 'verify the store_into path you specified in\n' + ' your configuration file is web-accessable. Consult ' + 'the FAQ for more\n information.' % url) + output.Warn('Proceeding to notify with an unverifyable URL.') + + # Cycle through notifications + # To understand this, see the comment near the NOTIFICATION_SITES + # comment + for ping in NOTIFICATION_SITES: + query_map = ping[3] + query_attr = ping[5] + query_map[query_attr] = url + query = urllib.urlencode(query_map) + notify = urlunsplit((ping[0], ping[1], ping[2], query, ping[4])) + + # Send the notification + output.Log('Notifying: %s' % ping[1], 0) + output.Log('Notification URL: %s' % notify, 2) + try: + u = urlopen(notify) + u.read() + u.close() + except IOError: + output.Warn('Cannot contact: %s' % ping[1]) + + if old_opener: + if sys.version_info[0] == 3: + urllib.request._urlopener = old_opener + else: + urllib._urlopener = old_opener + # end def NotifySearch + + def startElement(self, tag, attributes): + """ SAX processing, called per node in the config stream. """ + if tag == 'site': + if self._in_site: + output.Error('Can not nest Site entries in the configuration.') + else: + self._in_site = True + + if not ValidateAttributes('SITE', attributes, + ('verbose', 'default_encoding', 'base_url', 'store_into', + 'suppress_search_engine_notify', 'sitemap_type')): + return + + verbose = attributes.get('verbose', 0) + if verbose: + output.SetVerbose(verbose) + + self._default_enc = attributes.get('default_encoding') + self._base_url = attributes.get('base_url') + self._store_into = attributes.get('store_into') + self._sitemap_type = attributes.get('sitemap_type') + if not self._suppress: + self._suppress = attributes.get( + 'suppress_search_engine_notify', + False) + self.ValidateBasicConfig() + elif tag == 'filter': + self._filters.append(Filter(attributes)) + + elif tag == 'url': + print(type(attributes)) + self._inputs.append(InputURL(attributes)) + + elif tag == 'urllist': + for attributeset in ExpandPathAttribute(attributes, 'path'): + if self._sitemap_type == 'news': + self._inputs.append(InputNewsURLList(attributeset)) + else: + self._inputs.append(InputURLList(attributeset)) + + elif tag == 'directory': + self._inputs.append(InputDirectory(attributes, self._base_url)) + + elif tag == 'accesslog': + for attributeset in ExpandPathAttribute(attributes, 'path'): + self._inputs.append(InputAccessLog(attributeset)) else: - self._extensions[ext] = 1 - else: - if self._extensions.has_key('(no extension)'): - self._extensions['(no extension)'] = self._extensions[ - '(no extension)'] + 1 + output.Error('Unrecognized tag in the configuration: %s' % tag) + # end def startElement + + def endElement(self, tag): + """ SAX processing, called per node in the config stream. """ + if tag == 'site': + assert self._in_site + self._in_site = False + self._in_site_ever = True + # end def endElement + + def endDocument(self): + """ End of SAX, verify we can proceed. """ + if not self._in_site_ever: + output.Error('The configuration must specify a "site" element.') else: - self._extensions['(no extension)'] = 1 - #end def Consume - - def Log(self): - """ Dump out stats to the output. """ - if len(self._extensions): - output.Log('Count of file extensions on URLs:', 1) - set = self._extensions.keys() - set.sort() - for ext in set: - output.Log(' %7d %s' % (self._extensions[ext], ext), 1) - #end def Log + if not self._inputs: + output.Warn('There were no inputs to generate a sitemap from.') + # end def endDocument +# end class Sitemap -class Sitemap(xml.sax.handler.ContentHandler): - """ - This is the big workhorse class that processes your inputs and spits - out sitemap files. It is built as a SAX handler for set up purposes. - That is, it processes an XML stream to bring itself up. - """ - - def __init__(self, suppress_notify): - xml.sax.handler.ContentHandler.__init__(self) - self._filters = [] # Filter objects - self._inputs = [] # Input objects - self._urls = {} # Maps URLs to count of dups - self._set = [] # Current set of URLs - self._filegen = None # Path generator for output files - self._wildurl1 = None # Sitemap URLs to filter out - self._wildurl2 = None # Sitemap URLs to filter out - self._sitemaps = 0 # Number of output files - # We init _dup_max to 2 so the default priority is 0.5 instead of 1.0 - self._dup_max = 2 # Max number of duplicate URLs - self._stat = PerURLStatistics() # Some simple stats - self._in_site = False # SAX: are we in a Site node? - self._in_Site_ever = False # SAX: were we ever in a Site? - - self._default_enc = None # Best encoding to try on URLs - self._base_url = None # Prefix to all valid URLs - self._store_into = None # Output filepath - self._suppress = suppress_notify # Suppress notify of servers - #end def __init__ - - def ValidateBasicConfig(self): - """ Verifies (and cleans up) the basic user-configurable options. """ - all_good = True - if self._default_enc: - encoder.SetUserEncoding(self._default_enc) - - # Canonicalize the base_url - if all_good and not self._base_url: - output.Error('A site needs a "base_url" attribute.') - all_good = False - if all_good and not URL.IsAbsolute(self._base_url): - output.Error('The "base_url" must be absolute, not relative: %s' % - self._base_url) - all_good = False - if all_good: - self._base_url = URL.Canonicalize(self._base_url) - if not self._base_url.endswith('/'): - self._base_url = self._base_url + '/' - output.Log('BaseURL is set to: %s' % self._base_url, 2) - - # Load store_into into a generator - if all_good: - if self._store_into: - self._filegen = FilePathGenerator() - if not self._filegen.Preload(self._store_into): - all_good = False - else: - output.Error('A site needs a "store_into" attribute.') - all_good = False - - # Ask the generator for patterns on what its output will look like - if all_good: - self._wildurl1 = self._filegen.GenerateWildURL(self._base_url) - self._wildurl2 = self._filegen.GenerateURL(SITEINDEX_SUFFIX, - self._base_url) - - # Unify various forms of False - if all_good: - if self._suppress: - if (type(self._suppress) == types.StringType) or (type(self._suppress) - == types.UnicodeType): - if (self._suppress == '0') or (self._suppress.lower() == 'false'): - self._suppress = False - - # Done - if not all_good: - output.Log('See "example_config.xml" for more information.', 0) +def ValidateAttributes(tag, attributes, goodattributes): + """ Makes sure 'attributes' does not contain any attribute not + listed in 'goodattributes' """ + all_good = True + for attr in attributes.keys(): + if not attr in goodattributes: + output.Error('Unknown %s attribute: %s' % (tag, attr)) + all_good = False return all_good - #end def ValidateBasicConfig - - def Generate(self): - """ Run over all the Inputs and ask them to Produce """ - # Run the inputs - for input in self._inputs: - input.ProduceURLs(self.ConsumeURL) - - # Do last flushes - if len(self._set): - self.FlushSet() - if not self._sitemaps: - output.Warn('No URLs were recorded, writing an empty sitemap.') - self.FlushSet() +# end def ValidateAttributes - # Write an index as needed - if self._sitemaps > 1: - self.WriteIndex() - # Notify - self.NotifySearch() +def ExpandPathAttribute(src, attrib): + """ Given a dictionary of attributes, return a list of dictionaries + with all the same attributes except for the one named attrib. + That one, we treat as a file path and expand into all its possible + variations. """ + # Do the path expansion. On any error, just return the source dictionary. + path = src.get(attrib) + if not path: + return [src] + path = encoder.MaybeNarrowPath(path) + pathlist = glob.glob(path) + if not pathlist: + return [src] + + # If this isn't actually a dictionary, make it one + if not isinstance(src, dict): + tmp = {} + for key in src.keys(): + tmp[key] = src[key] + src = tmp + # Create N new dictionaries + retval = [] + for path in pathlist: + dst = src.copy() + dst[attrib] = path + retval.append(dst) + + return retval +# end def ExpandPathAttribute - # Dump stats - self._stat.Log() - #end def Generate - def ConsumeURL(self, url, allow_fragment): - """ - All per-URL processing comes together here, regardless of Input. - Here we run filters, remove duplicates, spill to disk as needed, etc. - """ - if not url: - return - - # Validate - if not url.Validate(self._base_url, allow_fragment): - return - - # Run filters - accept = None - for filter in self._filters: - accept = filter.Apply(url) - if accept != None: - break - if not (accept or (accept == None)): - url.Log(prefix='FILTERED', level=2) - return - - # Ignore our out output URLs - if fnmatch.fnmatchcase(url.loc, self._wildurl1) or fnmatch.fnmatchcase( - url.loc, self._wildurl2): - url.Log(prefix='IGNORED (output file)', level=2) - return - - # Note the sighting - hash = url.MakeHash() - if self._urls.has_key(hash): - dup = self._urls[hash] - if dup > 0: - dup = dup + 1 - self._urls[hash] = dup - if self._dup_max < dup: - self._dup_max = dup - url.Log(prefix='DUPLICATE') - return - - # Acceptance -- add to set - self._urls[hash] = 1 - self._set.append(url) - self._stat.Consume(url) - url.Log() - - # Flush the set if needed - if len(self._set) >= MAXURLS_PER_SITEMAP: - self.FlushSet() - #end def ConsumeURL - - def FlushSet(self): - """ - Flush the current set of URLs to the output. This is a little - slow because we like to sort them all and normalize the priorities - before dumping. - """ +def OpenFileForRead(path, logtext): + """ Opens a text file, be it GZip or plain """ - # Sort and normalize - output.Log('Sorting and normalizing collected URLs.', 1) - self._set.sort() - for url in self._set: - hash = url.MakeHash() - dup = self._urls[hash] - if dup > 0: - self._urls[hash] = -1 - if not url.priority: - url.priority = '%.4f' % (float(dup) / float(self._dup_max)) - - # Get the filename we're going to write to - filename = self._filegen.GeneratePath(self._sitemaps) - if not filename: - output.Fatal('Unexpected: Couldn\'t generate output filename.') - self._sitemaps = self._sitemaps + 1 - output.Log('Writing Sitemap file "%s" with %d URLs' % - (filename, len(self._set)), 1) - - # Write to it frame = None - file = None - - try: - if self._filegen.is_gzip: - basename = os.path.basename(filename); - frame = open(filename, 'wb') - file = gzip.GzipFile(fileobj=frame, filename=basename, mode='wt') - else: - file = open(filename, 'wt') - - file.write(SITEMAP_HEADER) - for url in self._set: - url.WriteXML(file) - file.write(SITEMAP_FOOTER) - - file.close() - if frame: - frame.close() - - frame = None - file = None - except IOError: - output.Fatal('Couldn\'t write out to file: %s' % filename) - os.chmod(filename, 0o0644) - - # Flush - self._set = [] - #end def FlushSet - - def WriteIndex(self): - """ Write the master index of all Sitemap files """ - # Make a filename - filename = self._filegen.GeneratePath(SITEINDEX_SUFFIX) - if not filename: - output.Fatal('Unexpected: Couldn\'t generate output index filename.') - output.Log('Writing index file "%s" with %d Sitemaps' % - (filename, self._sitemaps), 1) - - # Make a lastmod time - lastmod = TimestampISO8601(time.time()) - - # Write to it - try: - fd = open(filename, 'wt') - fd.write(SITEINDEX_HEADER) + file = None - for mapnumber in range(0,self._sitemaps): - # Write the entry - mapurl = self._filegen.GenerateURL(mapnumber, self._base_url) - mapattributes = { 'loc' : mapurl, 'lastmod' : lastmod } - fd.write(SITEINDEX_ENTRY % mapattributes) - - fd.write(SITEINDEX_FOOTER) - - fd.close() - fd = None - except IOError: - output.Fatal('Couldn\'t write out to file: %s' % filename) - os.chmod(filename, 0o0644) - #end def WriteIndex - - def NotifySearch(self): - """ Send notification of the new Sitemap(s) to the search engines. """ - if self._suppress: - output.Log('Search engine notification is suppressed.', 1) - return - - output.Log('Notifying search engines.', 1) - - # Override the urllib's opener class with one that doesn't ignore 404s - class ExceptionURLopener(urllib.FancyURLopener): - def http_error_default(self, url, fp, errcode, errmsg, headers): - output.Log('HTTP error %d: %s' % (errcode, errmsg), 2) - raise IOError - #end def http_error_default - #end class ExceptionURLOpener - old_opener = urllib._urlopener - urllib._urlopener = ExceptionURLopener() - - # Build the URL we want to send in - if self._sitemaps > 1: - url = self._filegen.GenerateURL(SITEINDEX_SUFFIX, self._base_url) - else: - url = self._filegen.GenerateURL(0, self._base_url) + if not path: + return (frame, file) - # Test if we can hit it ourselves try: - u = urllib.urlopen(url) - u.close() - except IOError: - output.Error('When attempting to access our generated Sitemap at the ' - 'following URL:\n %s\n we failed to read it. Please ' - 'verify the store_into path you specified in\n' - ' your configuration file is web-accessable. Consult ' - 'the FAQ for more\n information.' % url) - output.Warn('Proceeding to notify with an unverifyable URL.') - - # Cycle through notifications - # To understand this, see the comment near the NOTIFICATION_SITES comment - for ping in NOTIFICATION_SITES: - query_map = ping[3] - query_attr = ping[5] - query_map[query_attr] = url - query = urllib.urlencode(query_map) - notify = urlunsplit((ping[0], ping[1], ping[2], query, ping[4])) - - # Send the notification - output.Log('Notifying: %s' % ping[1], 1) - output.Log('Notification URL: %s' % notify, 2) - try: - u = urllib.urlopen(notify) - u.read() - u.close() - except IOError: - output.Warn('Cannot contact: %s' % ping[1]) - - if old_opener: - urllib._urlopener = old_opener - #end def NotifySearch - - def startElement(self, tag, attributes): - """ SAX processing, called per node in the config stream. """ - - if tag == 'site': - if self._in_site: - output.Error('Can not nest Site entries in the configuration.') - else: - self._in_site = True - - if not ValidateAttributes('SITE', attributes, - ('verbose', 'default_encoding', 'base_url', 'store_into', - 'suppress_search_engine_notify')): - return - - verbose = attributes.get('verbose', 0) - if verbose: - output.SetVerbose(verbose) - - self._default_enc = attributes.get('default_encoding') - self._base_url = attributes.get('base_url') - self._store_into = attributes.get('store_into') - if not self._suppress: - self._suppress = attributes.get('suppress_search_engine_notify', - False) - self.ValidateBasicConfig() - - elif tag == 'filter': - self._filters.append(Filter(attributes)) - - elif tag == 'url': - self._inputs.append(InputURL(attributes)) - - elif tag == 'urllist': - for attributeset in ExpandPathAttribute(attributes, 'path'): - self._inputs.append(InputURLList(attributeset)) - - elif tag == 'directory': - self._inputs.append(InputDirectory(attributes, self._base_url)) - - elif tag == 'accesslog': - for attributeset in ExpandPathAttribute(attributes, 'path'): - self._inputs.append(InputAccessLog(attributeset)) - - elif tag == 'sitemap': - for attributeset in ExpandPathAttribute(attributes, 'path'): - self._inputs.append(InputSitemap(attributeset)) - - else: - output.Error('Unrecognized tag in the configuration: %s' % tag) - #end def startElement - - def endElement(self, tag): - """ SAX processing, called per node in the config stream. """ - if tag == 'site': - assert self._in_site - self._in_site = False - self._in_site_ever = True - #end def endElement - - def endDocument(self): - """ End of SAX, verify we can proceed. """ - if not self._in_site_ever: - output.Error('The configuration must specify a "site" element.') - else: - if not self._inputs: - output.Warn('There were no inputs to generate a sitemap from.') - #end def endDocument -#end class Sitemap - - -def ValidateAttributes(tag, attributes, goodattributes): - """ Makes sure 'attributes' does not contain any attribute not - listed in 'goodattributes' """ - all_good = True - for attr in attributes.keys(): - if not attr in goodattributes: - output.Error('Unknown %s attribute: %s' % (tag, attr)) - all_good = False - return all_good -#end def ValidateAttributes + if path.endswith('.gz'): + frame = open(path, 'rb') + file = gzip.GzipFile(fileobj=frame, mode='rt') + else: + file = open(path, 'rt') -def ExpandPathAttribute(src, attrib): - """ Given a dictionary of attributes, return a list of dictionaries - with all the same attributes except for the one named attrib. - That one, we treat as a file path and expand into all its possible - variations. """ - # Do the path expansion. On any error, just return the source dictionary. - path = src.get(attrib) - if not path: - return [src] - path = encoder.MaybeNarrowPath(path); - pathlist = glob.glob(path) - if not pathlist: - return [src] - - # If this isn't actually a dictionary, make it one - if type(src) != types.DictionaryType: - tmp = {} - for key in src.keys(): - tmp[key] = src[key] - src = tmp - - # Create N new dictionaries - retval = [] - for path in pathlist: - dst = src.copy() - dst[attrib] = path - retval.append(dst) - - return retval -#end def ExpandPathAttribute + if logtext: + output.Log('Opened %s file: %s' % (logtext, path), 1) + else: + output.Log('Opened file: %s' % path, 1) + except IOError: + output.Error('Can not open file: %s' % path) -def OpenFileForRead(path, logtext): - """ Opens a text file, be it GZip or plain """ + return (frame, file) +# end def OpenFileForRead - frame = None - file = None - if not path: - return (frame, file) +def TimestampISO8601(t): + """Seconds since epoch (1970-01-01) --> ISO 8601 time string.""" + return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) +# end def TimestampISO8601 - try: - if path.endswith('.gz'): - frame = open(path, 'rb') - file = gzip.GzipFile(fileobj=frame, mode='rt') - else: - file = open(path, 'rt') - if logtext: - output.Log('Opened %s file: %s' % (logtext, path), 1) - else: - output.Log('Opened file: %s' % path, 1) - except IOError: - output.Error('Can not open file: %s' % path) +def CreateSitemapFromFile(configpath, suppress_notify): + """ Sets up a new Sitemap object from the specified configuration file. """ - return (frame, file) -#end def OpenFileForRead + # Remember error count on the way in + num_errors = output.num_errors -def TimestampISO8601(t): - """Seconds since epoch (1970-01-01) --> ISO 8601 time string.""" - return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) -#end def TimestampISO8601 + # Rev up SAX to parse the config + sitemap = Sitemap(suppress_notify) + try: + output.Log('Reading configuration file: %s' % configpath, 0) + xml.sax.parse(configpath, sitemap) + except IOError: + output.Error('Cannot read configuration file: %s' % configpath) + except xml.sax._exceptions.SAXParseException as e: + output.Error('XML error in the config file (line %d, column %d): %s' % + (e._linenum, e._colnum, e.getMessage())) + except xml.sax._exceptions.SAXReaderNotAvailable: + output.Error('Some installs of Python 2.2 did not include complete support' + ' for XML.\n Please try upgrading your version of Python' + ' and re-running the script.') + + # If we added any errors, return no sitemap + if num_errors == output.num_errors: + return sitemap + return None +# end def CreateSitemapFromFile -def CreateSitemapFromFile(configpath, suppress_notify): - """ Sets up a new Sitemap object from the specified configuration file. """ - - # Remember error count on the way in - num_errors = output.num_errors - - # Rev up SAX to parse the config - sitemap = Sitemap(suppress_notify) - try: - output.Log('Reading configuration file: %s' % configpath, 0) - xml.sax.parse(configpath, sitemap) - except IOError: - output.Error('Cannot read configuration file: %s' % configpath) - except xml.sax._exceptions.SAXParseException as e: - output.Error('XML error in the config file (line %d, column %d): %s' % - (e._linenum, e._colnum, e.getMessage())) - except xml.sax._exceptions.SAXReaderNotAvailable: - output.Error('Some installs of Python 2.2 did not include complete support' - ' for XML.\n Please try upgrading your version of Python' - ' and re-running the script.') - - # If we added any errors, return no sitemap - if num_errors == output.num_errors: - return sitemap - return None -#end def CreateSitemapFromFile def ProcessCommandFlags(args): - """ - Parse command line flags per specified usage, pick off key, value pairs - All flags of type "--key=value" will be processed as __flags[key] = value, - "--option" will be processed as __flags[option] = option - """ - - flags = {} - rkeyval = '--(?P<key>\S*)[=](?P<value>\S*)' # --key=val - roption = '--(?P<option>\S*)' # --key - r = '(' + rkeyval + ')|(' + roption + ')' - rc = re.compile(r) - for a in args: - try: - rcg = rc.search(a).groupdict() - if rcg.has_key('key'): - flags[rcg['key']] = rcg['value'] - if rcg.has_key('option'): - flags[rcg['option']] = rcg['option'] - except AttributeError: - return None - return flags -#end def ProcessCommandFlags + """ + Parse command line flags per specified usage, pick off key, value pairs + All flags of type "--key=value" will be processed as __flags[key] = value, + "--option" will be processed as __flags[option] = option + """ + + flags = {} + rkeyval = '--(?P<key>\S*)[=](?P<value>\S*)' # --key=val + roption = '--(?P<option>\S*)' # --key + r = '(' + rkeyval + ')|(' + roption + ')' + rc = re.compile(r) + for a in args: + try: + rcg = rc.search(a).groupdict() + if 'key' in rcg: + flags[rcg['key']] = rcg['value'] + if 'option' in rcg: + flags[rcg['option']] = rcg['option'] + except AttributeError: + return None + return flags +# end def ProcessCommandFlags # @@ -2224,15 +2123,15 @@ def ProcessCommandFlags(args): # if __name__ == '__main__': - flags = ProcessCommandFlags(sys.argv[1:]) - if not flags or not flags.has_key('config') or flags.has_key('help'): - output.Log(__usage__, 0) - else: - suppress_notify = flags.has_key('testing') - sitemap = CreateSitemapFromFile(flags['config'], suppress_notify) - if not sitemap: - output.Log('Configuration file errors -- exiting.', 0) + flags = ProcessCommandFlags(sys.argv[1:]) + if not flags or not 'config' in flags or 'help' in flags: + output.Log(__usage__, 0) else: - sitemap.Generate() - output.Log('Number of errors: %d' % output.num_errors, 1) - output.Log('Number of warnings: %d' % output.num_warns, 1) + suppress_notify = 'testing' in flags + sitemap = CreateSitemapFromFile(flags['config'], suppress_notify) + if not sitemap: + output.Log('Configuration file errors -- exiting.', 0) + else: + sitemap.Generate() + output.Log('Number of errors: %d' % output.num_errors, 1) + output.Log('Number of warnings: %d' % output.num_warns, 1) diff --git a/nikola/plugins/template_jinja.py b/nikola/plugins/template_jinja.py index f88b2c0..b6d762b 100644 --- a/nikola/plugins/template_jinja.py +++ b/nikola/plugins/template_jinja.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -25,10 +25,11 @@ """Jinja template handlers""" import os +import json try: import jinja2 except ImportError: - jinja2 = None + jinja2 = None # NOQA from nikola.plugin_categories import TemplateSystem @@ -39,19 +40,26 @@ class JinjaTemplates(TemplateSystem): name = "jinja" lookup = None + def __init__(self): + """ initialize Jinja2 wrapper with extended set of filters""" + if jinja2 is None: + return + self.lookup = jinja2.Environment() + self.lookup.filters['tojson'] = json.dumps + def set_directories(self, directories, cache_folder): """Createa template lookup.""" if jinja2 is None: - raise Exception('To use this theme you need to install the "Jinja2" package.') - self.lookup = jinja2.Environment(loader=jinja2.FileSystemLoader( - directories, - encoding='utf-8', - )) + raise Exception('To use this theme you need to install the ' + '"Jinja2" package.') + self.lookup.loader = jinja2.FileSystemLoader(directories, + encoding='utf-8') def render_template(self, template_name, output_name, context): """Render the template into output_name using context.""" if jinja2 is None: - raise Exception('To use this theme you need to install the "Jinja2" package.') + raise Exception('To use this theme you need to install the ' + '"Jinja2" package.') template = self.lookup.get_template(template_name) output = template.render(**context) if output_name is not None: diff --git a/nikola/plugins/template_mako.py b/nikola/plugins/template_mako.py index 2f8d52c..6ec6698 100644 --- a/nikola/plugins/template_mako.py +++ b/nikola/plugins/template_mako.py @@ -8,11 +8,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -48,7 +48,8 @@ class MakoTemplates(TemplateSystem): deps = [] for n in lex.template.nodes: - if getattr(n, 'keyword', None) == "inherit": + keyword = getattr(n, 'keyword', None) + if keyword in ["inherit", "namespace"]: deps.append(n.attributes['file']) # TODO: include tags are not handled return deps @@ -61,8 +62,7 @@ class MakoTemplates(TemplateSystem): self.lookup = TemplateLookup( directories=directories, module_directory=cache_dir, - output_encoding='utf-8', - ) + output_encoding='utf-8') def render_template(self, template_name, output_name, context): """Render the template into output_name using context.""" diff --git a/nikola/post.py b/nikola/post.py index d5b98f6..809e5b7 100644 --- a/nikola/post.py +++ b/nikola/post.py @@ -9,11 +9,11 @@ # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR @@ -23,6 +23,8 @@ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +from __future__ import unicode_literals, print_function + import codecs import os @@ -38,16 +40,15 @@ class Post(object): """Represents a blog post or web page.""" def __init__(self, source_path, cache_folder, destination, use_in_feeds, - translations, default_lang, blog_url, messages): + translations, default_lang, blog_url, messages, template_name, + file_metadata_regexp=None): """Initialize post. The base path is the .txt post file. From it we calculate the meta file, as well as any translations available, and the .html fragment file path. - - `compile_html` is a function that knows how to compile this Post to - html. """ + self.translated_to = set([default_lang]) self.prev_post = None self.next_post = None self.blog_url = blog_url @@ -61,21 +62,23 @@ class Post(object): self.translations = translations self.default_lang = default_lang self.messages = messages + self.template_name = template_name if os.path.isfile(self.metadata_path): with codecs.open(self.metadata_path, "r", "utf8") as meta_file: meta_data = meta_file.readlines() while len(meta_data) < 6: meta_data.append("") (default_title, default_pagename, self.date, self.tags, - self.link, default_description) = \ - [x.strip() for x in meta_data][:6] + self.link, default_description) = [x.strip() for x in + meta_data][:6] else: (default_title, default_pagename, self.date, self.tags, - self.link, default_description) = \ - utils.get_meta(self.source_path) + self.link, default_description) = utils.get_meta( + self.source_path, file_metadata_regexp) if not default_title or not default_pagename or not self.date: - raise OSError("You must set a title and slug and date!") + raise OSError("You must set a title and slug and date! [%s]" % + source_path) self.date = utils.to_datetime(self.date) self.tags = [x.strip() for x in self.tags.split(',')] @@ -99,12 +102,14 @@ class Post(object): else: metadata_path = self.metadata_path + "." + lang source_path = self.source_path + "." + lang + if os.path.isfile(source_path): + self.translated_to.add(lang) try: if os.path.isfile(metadata_path): with codecs.open( metadata_path, "r", "utf8") as meta_file: meta_data = [x.strip() for x in - meta_file.readlines()] + meta_file.readlines()] while len(meta_data) < 6: meta_data.append("") self.titles[lang] = meta_data[0] or default_title @@ -114,7 +119,7 @@ class Post(object): default_description else: ttitle, ppagename, tmp1, tmp2, tmp3, ddescription = \ - utils.get_meta(source_path) + utils.get_meta(source_path, file_metadata_regexp) self.titles[lang] = ttitle or default_title self.pagenames[lang] = ppagename or default_pagename self.descriptions[lang] = ddescription or\ @@ -146,17 +151,28 @@ class Post(object): if os.path.isfile(self.metadata_path): deps.append(self.metadata_path) if lang != self.default_lang: - lang_deps = list(filter(os.path.exists, [x + "." + lang for x in deps])) + lang_deps = list(filter(os.path.exists, [x + "." + lang for x in + deps])) deps += lang_deps return deps - def text(self, lang, teaser_only=False): - """Read the post file for that language and return its contents""" + def is_translation_available(self, lang): + """Return true if the translation actually exists.""" + return lang in self.translated_to + + def _translated_file_path(self, lang): + """Return path to the translation's file, or to the original.""" file_name = self.base_path if lang != self.default_lang: file_name_lang = file_name + ".%s" % lang if os.path.exists(file_name_lang): file_name = file_name_lang + return file_name + + def text(self, lang, teaser_only=False): + """Read the post file for that language and return its contents""" + file_name = self._translated_file_path(lang) + with codecs.open(file_name, "r", "utf8") as post_file: data = post_file.read() @@ -167,20 +183,21 @@ class Post(object): teaser = [] flag = False for elem in e: - elem_string = lxml.html.tostring(elem) + elem_string = lxml.html.tostring(elem).decode('utf8') if '<!-- TEASER_END -->' in elem_string.upper(): flag = True break teaser.append(elem_string) if flag: teaser.append('<p><a href="%s">%s...</a></p>' % - (self.permalink(lang), self.messages[lang]["Read more"])) + (self.permalink(lang), + self.messages[lang]["Read more"])) data = ''.join(teaser) return data def destination_path(self, lang, extension='.html'): path = os.path.join(self.translations[lang], - self.folder, self.pagenames[lang] + extension) + self.folder, self.pagenames[lang] + extension) return path def permalink(self, lang=None, absolute=False, extension='.html'): diff --git a/nikola/utils.py b/nikola/utils.py index 18b4646..f682c33 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -42,15 +42,25 @@ try: except ImportError: pass + +if sys.version_info[0] == 3: + # Python 3 + bytes_str = bytes + unicode_str = str + unichr = chr +else: + bytes_str = str + unicode_str = unicode + from doit import tools from unidecode import unidecode -from . import PyRSS2Gen as rss +import PyRSS2Gen as rss __all__ = ['get_theme_path', 'get_theme_chain', 'load_messages', 'copy_tree', - 'generic_rss_renderer', - 'copy_file', 'slugify', 'unslugify', 'get_meta', 'to_datetime', - 'apply_filters', 'config_changed'] + 'generic_rss_renderer', + 'copy_file', 'slugify', 'unslugify', 'get_meta', 'to_datetime', + 'apply_filters', 'config_changed'] class CustomEncoder(json.JSONEncoder): @@ -94,7 +104,7 @@ def get_theme_path(theme): if os.path.isdir(dir_name): return dir_name dir_name = os.path.join(os.path.dirname(__file__), - 'data', 'themes', theme) + 'data', 'themes', theme) if os.path.isdir(dir_name): return dir_name raise Exception("Can't find theme '%s'" % theme) @@ -110,22 +120,54 @@ def re_meta(line, match): return '' -def get_meta(source_path): - """get post's meta from source""" - with codecs.open(source_path, "r", "utf8") as meta_file: - meta_data = meta_file.readlines(15) +def _get_metadata_from_filename_by_regex(filename, metadata_regexp): + """ + Tries to ried the metadata from the filename based on the given re. + This requires to use symbolic group names in the pattern. + + The part to read the metadata from the filename based on a regular + expression is taken from Pelican - pelican/readers.py + """ title = slug = date = tags = link = description = '' + match = re.match(metadata_regexp, filename) + if match: + # .items() for py3k compat. + for key, value in match.groupdict().items(): + key = key.lower() # metadata must be lowercase + + if key == 'title': + title = value + if key == 'slug': + slug = value + if key == 'date': + date = value + if key == 'tags': + tags = value + if key == 'link': + link = value + if key == 'description': + description = value + return (title, slug, date, tags, link, description) + + +def _get_metadata_from_file(source_path, title='', slug='', date='', tags='', + link='', description=''): re_md_title = re.compile(r'^%s([^%s].*)' % - (re.escape('#'), re.escape('#'))) - re_rst_title = re.compile(r'^([^%s ].*)' % re.escape(string.punctuation)) + (re.escape('#'), re.escape('#'))) + # Assuming rst titles are going to be at least 4 chars long + # otherwise this detects things like ''' wich breaks other markups. + re_rst_title = re.compile(r'^([%s]{4,})' % re.escape(string.punctuation)) + + with codecs.open(source_path, "r", "utf8") as meta_file: + meta_data = meta_file.readlines(15) - for meta in meta_data: + for i, meta in enumerate(meta_data): if not title: title = re_meta(meta, '.. title:') if not title: - if re_rst_title.findall(meta): - title = re_rst_title.findall(meta)[0] + if re_rst_title.findall(meta) and i > 0: + title = meta_data[i - 1].strip() if not title: if re_md_title.findall(meta): title = re_md_title.findall(meta)[0] @@ -140,11 +182,34 @@ def get_meta(source_path): if not description: description = re_meta(meta, '.. description:') - # TODO: either enable or delete - #if not date: - #from datetime import datetime - #date = datetime.fromtimestamp( - # os.path.getmtime(source_path)).strftime('%Y/%m/%d %H:%M') + return (title, slug, date, tags, link, description) + + +def get_meta(source_path, file_metadata_regexp=None): + """Get post's meta from source. + + If ``file_metadata_regexp`` ist given it will be tried to read + metadata from the filename. + If any metadata is then found inside the file the metadata from the + file will override previous findings. + """ + title = slug = date = tags = link = description = '' + + if not (file_metadata_regexp is None): + (title, slug, date, tags, link, + description) = _get_metadata_from_filename_by_regex( + source_path, file_metadata_regexp) + + (title, slug, date, tags, link, description) = _get_metadata_from_file( + source_path, title, slug, date, tags, link, description) + + if not slug: + # If no slug is found in the metadata use the filename + slug = slugify(os.path.splitext(os.path.basename(source_path))[0]) + + if not title: + # If no title is found, use the filename without extension + title = os.path.splitext(os.path.basename(source_path))[0] return (title, slug, date, tags, link, description) @@ -194,13 +259,14 @@ def load_messages(themes, translations): english = __import__('messages_en') for lang in list(translations.keys()): # If we don't do the reload, the module is cached - translation = __import__('messages_'+lang) + translation = __import__('messages_' + lang) reload(translation) if sorted(translation.MESSAGES.keys()) !=\ sorted(english.MESSAGES.keys()) and \ - lang not in warned: + lang not in warned: # FIXME: get real logging in place - print("Warning: Incomplete translation for language '%s'." % lang) + print("Warning: Incomplete translation for language '%s'." % + lang) warned.append(lang) messages[lang].update(english.MESSAGES) messages[lang].update(translation.MESSAGES) @@ -247,15 +313,15 @@ def copy_tree(src, dst, link_cutoff=None): } -def generic_rss_renderer(lang, title, link, description, - timeline, output_path): +def generic_rss_renderer(lang, title, link, description, timeline, output_path, + rss_teasers): """Takes all necessary data, and renders a RSS feed in output_path.""" items = [] for post in timeline[:10]: args = { 'title': post.title(lang), 'link': post.permalink(lang, absolute=True), - 'description': post.text(lang, teaser_only=True), + 'description': post.text(lang, teaser_only=rss_teasers), 'guid': post.permalink(lang, absolute=True), 'pubDate': post.date, } @@ -271,8 +337,11 @@ def generic_rss_renderer(lang, title, link, description, dst_dir = os.path.dirname(output_path) if not os.path.isdir(dst_dir): os.makedirs(dst_dir) - with open(output_path, "wb+") as rss_file: - rss_obj.write_xml(rss_file) + with codecs.open(output_path, "wb+", "utf-8") as rss_file: + data = rss_obj.to_xml(encoding='utf-8') + if isinstance(data, bytes_str): + data = data.decode('utf-8') + rss_file.write(data) def copy_file(source, dest, cutoff=None): @@ -318,7 +387,7 @@ def slugify(value): """ value = unidecode(value) # WARNING: this may not be python2/3 equivalent - #value = unicode(_slugify_strip_re.sub('', value).strip().lower()) + # value = unicode(_slugify_strip_re.sub('', value).strip().lower()) value = str(_slugify_strip_re.sub('', value).strip().lower()) return _slugify_hyphenate_re.sub('-', value) @@ -343,21 +412,21 @@ class UnsafeZipException(Exception): def extract_all(zipfile): pwd = os.getcwd() os.chdir('themes') - z = list(zip(zipfile)) - namelist = z.namelist() - for f in namelist: - if f.endswith('/') and '..' in f: - raise UnsafeZipException( - 'The zip file contains ".." and is not safe to expand.') - for f in namelist: - if f.endswith('/'): - if not os.path.isdir(f): - try: - os.makedirs(f) - except: - raise OSError("mkdir '%s' error!" % f) - else: - z.extract(f) + with zip(zipfile) as z: + namelist = z.namelist() + for f in namelist: + if f.endswith('/') and '..' in f: + raise UnsafeZipException( + 'The zip file contains ".." and is not safe to expand.') + for f in namelist: + if f.endswith('/'): + if not os.path.isdir(f): + try: + os.makedirs(f) + except: + raise OSError("mkdir '%s' error!" % f) + else: + z.extract(f) os.chdir(pwd) |
