diff options
| author | 2016-11-15 14:18:53 -0300 | |
|---|---|---|
| committer | 2016-11-15 14:18:53 -0300 | |
| commit | 1ad5102b7ddd181bb9c632b124d3ea4c7db28be6 (patch) | |
| tree | 73dda18465d0f4b8eb52d4482282a387c9f67c95 /nikola/plugins/task/sitemap | |
| parent | b67294f76809a681ff73f209ed691a3e3f00563d (diff) | |
| parent | ffb671c61a24a9086343b54bad080e145ff33fc5 (diff) | |
Merge tag 'upstream/7.8.1'
Upstream version 7.8.1
# gpg: Firmado el mar 15 nov 2016 14:18:48 ART
# gpg: usando RSA clave A6C7B88B9583046A11C5403E0B00FB6CEBE2D002
# gpg: Firma correcta de "Ulises Vitulli <dererk@debian.org>" [absoluta]
# gpg: alias "Dererk <dererk@torproject.org>" [absoluta]
# gpg: alias "Ulises Vitulli <uvitulli@fi.uba.ar>" [absoluta]
# gpg: alias "Ulises Vitulli <dererk@satellogic.com>" [absoluta]
Diffstat (limited to 'nikola/plugins/task/sitemap')
| -rw-r--r-- | nikola/plugins/task/sitemap/__init__.py | 33 |
1 files changed, 20 insertions, 13 deletions
diff --git a/nikola/plugins/task/sitemap/__init__.py b/nikola/plugins/task/sitemap/__init__.py index fd781d6..64fcb45 100644 --- a/nikola/plugins/task/sitemap/__init__.py +++ b/nikola/plugins/task/sitemap/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright © 2012-2015 Roberto Alsina and others. +# Copyright © 2012-2016 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -31,6 +31,7 @@ import io import datetime import dateutil.tz import os +import sys try: from urlparse import urljoin, urlparse import robotparser as robotparser @@ -39,7 +40,7 @@ except ImportError: import urllib.robotparser as robotparser # NOQA from nikola.plugin_categories import LateTask -from nikola.utils import config_changed, apply_filters +from nikola.utils import apply_filters, config_changed, encodelink urlset_header = """<?xml version="1.0" encoding="UTF-8"?> @@ -106,7 +107,6 @@ def get_base_path(base): class Sitemap(LateTask): - """Generate a sitemap.""" name = "sitemap" @@ -146,7 +146,10 @@ class Sitemap(LateTask): continue # Totally empty, not on sitemap path = os.path.relpath(root, output) # ignore the current directory. - path = (path.replace(os.sep, '/') + '/').replace('./', '') + if path == '.': + path = '' + else: + path = path.replace(os.sep, '/') + '/' lastmod = self.get_lastmod(root) loc = urljoin(base_url, base_path + path) if kw['index_file'] in files and kw['strip_indexes']: # ignore folders when not stripping urls @@ -155,12 +158,12 @@ class Sitemap(LateTask): continue alternates = [] if post: - for lang in kw['translations']: + for lang in post.translated_to: alt_url = post.permalink(lang=lang, absolute=True) - if loc == alt_url: + if encodelink(loc) == alt_url: continue alternates.append(alternates_format.format(lang, alt_url)) - urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates)) + urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates)) for fname in files: if kw['strip_indexes'] and fname == kw['index_file']: continue # We already mapped the folder @@ -200,7 +203,7 @@ class Sitemap(LateTask): path = path.replace(os.sep, '/') lastmod = self.get_lastmod(real_path) loc = urljoin(base_url, base_path + path) - sitemapindex[loc] = sitemap_format.format(loc, lastmod) + sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod) continue else: continue # ignores all XML files except those presumed to be RSS @@ -212,20 +215,24 @@ class Sitemap(LateTask): loc = urljoin(base_url, base_path + path) alternates = [] if post: - for lang in kw['translations']: + for lang in post.translated_to: alt_url = post.permalink(lang=lang, absolute=True) - if loc == alt_url: + if encodelink(loc) == alt_url: continue alternates.append(alternates_format.format(lang, alt_url)) - urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates)) + urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates)) def robot_fetch(path): """Check if robots can fetch a file.""" for rule in kw["robots_exclusions"]: robot = robotparser.RobotFileParser() robot.parse(["User-Agent: *", "Disallow: {0}".format(rule)]) - if not robot.can_fetch("*", '/' + path): - return False # not robot food + if sys.version_info[0] == 3: + if not robot.can_fetch("*", '/' + path): + return False # not robot food + else: + if not robot.can_fetch("*", ('/' + path).encode('utf-8')): + return False # not robot food return True def write_sitemap(): |
