1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
# Copyright (c) 2012 Roberto Alsina y otros.
# Permission is hereby granted, free of charge, to any
# person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the
# Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the
# Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice
# shall be included in all copies or substantial portions of
# the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from __future__ import print_function, absolute_import, unicode_literals
import codecs
import datetime
import os
try:
from urlparse import urljoin
except ImportError:
from urllib.parse import urljoin # NOQA
from nikola.plugin_categories import LateTask
from nikola.utils import config_changed
header = """<?xml version="1.0" encoding="UTF-8"?>
<urlset
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
"""
url_format = """ <url>
<loc>{0}</loc>
<lastmod>{1}</lastmod>
<priority>0.5000</priority>
</url>
"""
get_lastmod = lambda p: datetime.datetime.fromtimestamp(os.stat(p).st_mtime).isoformat().split('T')[0]
class Sitemap(LateTask):
"""Generate google sitemap."""
name = "sitemap"
def gen_tasks(self):
"""Generate Google sitemap."""
kw = {
"base_url": self.site.config["BASE_URL"],
"site_url": self.site.config["SITE_URL"],
"output_folder": self.site.config["OUTPUT_FOLDER"],
"mapped_extensions": self.site.config.get('MAPPED_EXTENSIONS', ['.html', '.htm'])
}
output_path = kw['output_folder']
sitemap_path = os.path.join(output_path, "sitemap.xml")
def sitemap():
with codecs.open(sitemap_path, 'wb+', 'utf8') as outf:
output = kw['output_folder']
base_url = kw['base_url']
mapped_exts = kw['mapped_extensions']
outf.write(header)
locs = {}
for root, dirs, files in os.walk(output):
path = os.path.relpath(root, output)
path = path.replace(os.sep, '/') + '/'
lastmod = get_lastmod(root)
loc = urljoin(base_url, path)
locs[loc] = url_format.format(loc, lastmod)
for fname in files:
if os.path.splitext(fname)[-1] in mapped_exts:
real_path = os.path.join(root, fname)
path = os.path.relpath(real_path, output)
path = path.replace(os.sep, '/')
lastmod = get_lastmod(real_path)
loc = urljoin(base_url, path)
locs[loc] = url_format.format(loc, lastmod)
for k in sorted(locs.keys()):
outf.write(locs[k])
outf.write("</urlset>")
yield {
"basename": "sitemap",
"name": sitemap_path,
"targets": [sitemap_path],
"actions": [(sitemap,)],
"uptodate": [config_changed(kw)],
"clean": True,
}
|