summaryrefslogtreecommitdiffstats
path: root/nikola/plugins/task_sitemap/__init__.py
blob: 044e0e3184b2bef996ffdcd99d0494188d50b009 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Copyright (c) 2012 Roberto Alsina y otros.

# Permission is hereby granted, free of charge, to any
# person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the
# Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the
# Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice
# shall be included in all copies or substantial portions of
# the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from __future__ import print_function, absolute_import, unicode_literals
import codecs
import datetime
import os
try:
    from urlparse import urljoin
except ImportError:
    from urllib.parse import urljoin  # NOQA

from nikola.plugin_categories import LateTask
from nikola.utils import config_changed


header = """<?xml version="1.0" encoding="UTF-8"?>
<urlset
    xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
                        http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
"""

url_format = """ <url>
  <loc>{0}</loc>
  <lastmod>{1}</lastmod>
  <priority>0.5000</priority>
 </url>
"""

get_lastmod = lambda p: datetime.datetime.fromtimestamp(os.stat(p).st_mtime).isoformat().split('T')[0]


class Sitemap(LateTask):
    """Generate google sitemap."""

    name = "sitemap"

    def gen_tasks(self):
        """Generate Google sitemap."""
        kw = {
            "base_url": self.site.config["BASE_URL"],
            "site_url": self.site.config["SITE_URL"],
            "output_folder": self.site.config["OUTPUT_FOLDER"],
            "mapped_extensions": self.site.config.get('MAPPED_EXTENSIONS', ['.html', '.htm'])
        }
        output_path = kw['output_folder']
        sitemap_path = os.path.join(output_path, "sitemap.xml")

        def sitemap():
            with codecs.open(sitemap_path, 'wb+', 'utf8') as outf:
                output = kw['output_folder']
                base_url = kw['base_url']
                mapped_exts = kw['mapped_extensions']
                outf.write(header)
                locs = {}
                for root, dirs, files in os.walk(output):
                    path = os.path.relpath(root, output)
                    path = path.replace(os.sep, '/') + '/'
                    lastmod = get_lastmod(root)
                    loc = urljoin(base_url, path)
                    locs[loc] = url_format.format(loc, lastmod)
                    for fname in files:
                        if os.path.splitext(fname)[-1] in mapped_exts:
                            real_path = os.path.join(root, fname)
                            path = os.path.relpath(real_path, output)
                            path = path.replace(os.sep, '/')
                            lastmod = get_lastmod(real_path)
                            loc = urljoin(base_url, path)
                            locs[loc] = url_format.format(loc, lastmod)

                for k in sorted(locs.keys()):
                    outf.write(locs[k])
                outf.write("</urlset>")

        yield {
            "basename": "sitemap",
            "name": sitemap_path,
            "targets": [sitemap_path],
            "actions": [(sitemap,)],
            "uptodate": [config_changed(kw)],
            "clean": True,
        }