summaryrefslogtreecommitdiffstats
path: root/nikola/plugins/task/sitemap/__init__.py
diff options
context:
space:
mode:
authorLibravatarDererk <dererk@debian.org>2015-11-11 16:34:34 -0300
committerLibravatarDererk <dererk@debian.org>2015-11-11 16:34:34 -0300
commit4e3224c012df9f74f010eb92203520515e8537b9 (patch)
tree19322dc0c595268cb6864f21d7e92fd93cb826e9 /nikola/plugins/task/sitemap/__init__.py
parent787b97a4cb24330b36f11297c6d3a7a473a907d0 (diff)
Imported Upstream version 7.7.3upstream/7.7.3
Diffstat (limited to 'nikola/plugins/task/sitemap/__init__.py')
-rw-r--r--nikola/plugins/task/sitemap/__init__.py27
1 files changed, 17 insertions, 10 deletions
diff --git a/nikola/plugins/task/sitemap/__init__.py b/nikola/plugins/task/sitemap/__init__.py
index fd781d6..90acdd3 100644
--- a/nikola/plugins/task/sitemap/__init__.py
+++ b/nikola/plugins/task/sitemap/__init__.py
@@ -31,6 +31,7 @@ import io
import datetime
import dateutil.tz
import os
+import sys
try:
from urlparse import urljoin, urlparse
import robotparser as robotparser
@@ -39,7 +40,7 @@ except ImportError:
import urllib.robotparser as robotparser # NOQA
from nikola.plugin_categories import LateTask
-from nikola.utils import config_changed, apply_filters
+from nikola.utils import apply_filters, config_changed, encodelink
urlset_header = """<?xml version="1.0" encoding="UTF-8"?>
@@ -106,7 +107,6 @@ def get_base_path(base):
class Sitemap(LateTask):
-
"""Generate a sitemap."""
name = "sitemap"
@@ -146,7 +146,10 @@ class Sitemap(LateTask):
continue # Totally empty, not on sitemap
path = os.path.relpath(root, output)
# ignore the current directory.
- path = (path.replace(os.sep, '/') + '/').replace('./', '')
+ if path == '.':
+ path = ''
+ else:
+ path = path.replace(os.sep, '/') + '/'
lastmod = self.get_lastmod(root)
loc = urljoin(base_url, base_path + path)
if kw['index_file'] in files and kw['strip_indexes']: # ignore folders when not stripping urls
@@ -157,10 +160,10 @@ class Sitemap(LateTask):
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
- if loc == alt_url:
+ if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
- urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates))
+ urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
for fname in files:
if kw['strip_indexes'] and fname == kw['index_file']:
continue # We already mapped the folder
@@ -200,7 +203,7 @@ class Sitemap(LateTask):
path = path.replace(os.sep, '/')
lastmod = self.get_lastmod(real_path)
loc = urljoin(base_url, base_path + path)
- sitemapindex[loc] = sitemap_format.format(loc, lastmod)
+ sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
continue
else:
continue # ignores all XML files except those presumed to be RSS
@@ -214,18 +217,22 @@ class Sitemap(LateTask):
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
- if loc == alt_url:
+ if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
- urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates))
+ urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))
def robot_fetch(path):
"""Check if robots can fetch a file."""
for rule in kw["robots_exclusions"]:
robot = robotparser.RobotFileParser()
robot.parse(["User-Agent: *", "Disallow: {0}".format(rule)])
- if not robot.can_fetch("*", '/' + path):
- return False # not robot food
+ if sys.version_info[0] == 3:
+ if not robot.can_fetch("*", '/' + path):
+ return False # not robot food
+ else:
+ if not robot.can_fetch("*", ('/' + path).encode('utf-8')):
+ return False # not robot food
return True
def write_sitemap():