From b0b24795b24ee6809397fbbadf42f31f310a219f Mon Sep 17 00:00:00 2001 From: Agustin Henze Date: Wed, 8 Jul 2015 07:35:02 -0300 Subject: Imported Upstream version 7.6.0 --- nikola/plugins/task/sitemap/__init__.py | 106 ++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 27 deletions(-) (limited to 'nikola/plugins/task/sitemap') diff --git a/nikola/plugins/task/sitemap/__init__.py b/nikola/plugins/task/sitemap/__init__.py index 943e9b2..92d557d 100644 --- a/nikola/plugins/task/sitemap/__init__.py +++ b/nikola/plugins/task/sitemap/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright © 2012-2014 Roberto Alsina and others. +# Copyright © 2012-2015 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -36,7 +36,7 @@ except ImportError: import urllib.robotparser as robotparser # NOQA from nikola.plugin_categories import LateTask -from nikola.utils import config_changed +from nikola.utils import config_changed, apply_filters urlset_header = """ @@ -49,7 +49,7 @@ urlset_header = """ loc_format = """ {0} - {1} + {1}{2} """ @@ -69,6 +69,9 @@ sitemap_format = """ """ +alternates_format = """\n """ + + sitemapindex_footer = "" @@ -111,8 +114,10 @@ class Sitemap(LateTask): "strip_indexes": self.site.config["STRIP_INDEXES"], "index_file": self.site.config["INDEX_FILE"], "sitemap_include_fileless_dirs": self.site.config["SITEMAP_INCLUDE_FILELESS_DIRS"], - "mapped_extensions": self.site.config.get('MAPPED_EXTENSIONS', ['.html', '.htm', '.xml', '.rss']), - "robots_exclusions": self.site.config["ROBOTS_EXCLUSIONS"] + "mapped_extensions": self.site.config.get('MAPPED_EXTENSIONS', ['.atom', '.html', '.htm', '.xml', '.rss']), + "robots_exclusions": self.site.config["ROBOTS_EXCLUSIONS"], + "filters": self.site.config["FILTERS"], + "translations": self.site.config["TRANSLATIONS"], } output = kw['output_folder'] @@ -136,7 +141,17 @@ class Sitemap(LateTask): lastmod = self.get_lastmod(root) loc = urljoin(base_url, base_path + path) if kw['index_file'] in files and kw['strip_indexes']: # ignore folders when not stripping urls - urlset[loc] = loc_format.format(loc, lastmod) + post = self.site.post_per_file.get(path + kw['index_file']) + if post and (post.is_draft or post.is_private or post.publish_later): + continue + alternates = [] + if post: + for lang in kw['translations']: + alt_url = post.permalink(lang=lang, absolute=True) + if loc == alt_url: + continue + alternates.append(alternates_format.format(lang, alt_url)) + urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates)) for fname in files: if kw['strip_indexes'] and fname == kw['index_file']: continue # We already mapped the folder @@ -148,20 +163,30 @@ class Sitemap(LateTask): continue if not robot_fetch(path): continue + + # read in binary mode to make ancient files work + fh = open(real_path, 'rb') + filehead = fh.read(1024) + fh.close() + if path.endswith('.html') or path.endswith('.htm'): - try: - if u'