aboutsummaryrefslogtreecommitdiffstats
path: root/nikola/plugins/task/sitemap
diff options
context:
space:
mode:
authorLibravatarDererk <dererk@satellogic.com>2016-11-15 14:18:53 -0300
committerLibravatarDererk <dererk@satellogic.com>2016-11-15 14:18:53 -0300
commit1ad5102b7ddd181bb9c632b124d3ea4c7db28be6 (patch)
tree73dda18465d0f4b8eb52d4482282a387c9f67c95 /nikola/plugins/task/sitemap
parentb67294f76809a681ff73f209ed691a3e3f00563d (diff)
parentffb671c61a24a9086343b54bad080e145ff33fc5 (diff)
Merge tag 'upstream/7.8.1'
Upstream version 7.8.1 # gpg: Firmado el mar 15 nov 2016 14:18:48 ART # gpg: usando RSA clave A6C7B88B9583046A11C5403E0B00FB6CEBE2D002 # gpg: Firma correcta de "Ulises Vitulli <dererk@debian.org>" [absoluta] # gpg: alias "Dererk <dererk@torproject.org>" [absoluta] # gpg: alias "Ulises Vitulli <uvitulli@fi.uba.ar>" [absoluta] # gpg: alias "Ulises Vitulli <dererk@satellogic.com>" [absoluta]
Diffstat (limited to 'nikola/plugins/task/sitemap')
-rw-r--r--nikola/plugins/task/sitemap/__init__.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/nikola/plugins/task/sitemap/__init__.py b/nikola/plugins/task/sitemap/__init__.py
index fd781d6..64fcb45 100644
--- a/nikola/plugins/task/sitemap/__init__.py
+++ b/nikola/plugins/task/sitemap/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright © 2012-2015 Roberto Alsina and others.
+# Copyright © 2012-2016 Roberto Alsina and others.
# Permission is hereby granted, free of charge, to any
# person obtaining a copy of this software and associated
@@ -31,6 +31,7 @@ import io
import datetime
import dateutil.tz
import os
+import sys
try:
from urlparse import urljoin, urlparse
import robotparser as robotparser
@@ -39,7 +40,7 @@ except ImportError:
import urllib.robotparser as robotparser # NOQA
from nikola.plugin_categories import LateTask
-from nikola.utils import config_changed, apply_filters
+from nikola.utils import apply_filters, config_changed, encodelink
urlset_header = """<?xml version="1.0" encoding="UTF-8"?>
@@ -106,7 +107,6 @@ def get_base_path(base):
class Sitemap(LateTask):
-
"""Generate a sitemap."""
name = "sitemap"
@@ -146,7 +146,10 @@ class Sitemap(LateTask):
continue # Totally empty, not on sitemap
path = os.path.relpath(root, output)
# ignore the current directory.
- path = (path.replace(os.sep, '/') + '/').replace('./', '')
+ if path == '.':
+ path = ''
+ else:
+ path = path.replace(os.sep, '/') + '/'
lastmod = self.get_lastmod(root)
loc = urljoin(base_url, base_path + path)
if kw['index_file'] in files and kw['strip_indexes']: # ignore folders when not stripping urls
@@ -155,12 +158,12 @@ class Sitemap(LateTask):
continue
alternates = []
if post:
- for lang in kw['translations']:
+ for lang in post.translated_to:
alt_url = post.permalink(lang=lang, absolute=True)
- if loc == alt_url:
+ if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
- urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates))
+ urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
for fname in files:
if kw['strip_indexes'] and fname == kw['index_file']:
continue # We already mapped the folder
@@ -200,7 +203,7 @@ class Sitemap(LateTask):
path = path.replace(os.sep, '/')
lastmod = self.get_lastmod(real_path)
loc = urljoin(base_url, base_path + path)
- sitemapindex[loc] = sitemap_format.format(loc, lastmod)
+ sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
continue
else:
continue # ignores all XML files except those presumed to be RSS
@@ -212,20 +215,24 @@ class Sitemap(LateTask):
loc = urljoin(base_url, base_path + path)
alternates = []
if post:
- for lang in kw['translations']:
+ for lang in post.translated_to:
alt_url = post.permalink(lang=lang, absolute=True)
- if loc == alt_url:
+ if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
- urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates))
+ urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))
def robot_fetch(path):
"""Check if robots can fetch a file."""
for rule in kw["robots_exclusions"]:
robot = robotparser.RobotFileParser()
robot.parse(["User-Agent: *", "Disallow: {0}".format(rule)])
- if not robot.can_fetch("*", '/' + path):
- return False # not robot food
+ if sys.version_info[0] == 3:
+ if not robot.can_fetch("*", '/' + path):
+ return False # not robot food
+ else:
+ if not robot.can_fetch("*", ('/' + path).encode('utf-8')):
+ return False # not robot food
return True
def write_sitemap():