aboutsummaryrefslogtreecommitdiffstats
path: root/nikola/plugins/command_check.py
blob: ce1e2e3a224392f0de228a49db12fc2bbf266cff (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from optparse import OptionParser
import os
import sys
import urllib
from urlparse import urlparse

import lxml.html

from nikola.plugin_categories import Command


class CommandCheck(Command):
    """Check the generated site."""

    name = "check"

    def run(self, *args):
        """Check the generated site."""
        parser = OptionParser(usage="nikola %s [options]" % self.name)
        parser.add_option('-l', '--check-links', dest='links',
            action='store_true',
            help='Check for dangling links.')
        parser.add_option('-f', '--check-files', dest='files',
            action='store_true',
            help='Check for unknown files.')

        (options, args) = parser.parse_args(list(args))
        if options.links:
            scan_links()
        if options.files:
            scan_files()

existing_targets = set([])


def analize(task):
    try:
        filename = task.split(":")[-1]
        d = lxml.html.fromstring(open(filename).read())
        for l in d.iterlinks():
            target = l[0].attrib[l[1]]
            if target == "#":
                continue
            parsed = urlparse(target)
            if parsed.scheme:
                continue
            if parsed.fragment:
                target = target.split('#')[0]
            target_filename = os.path.abspath(
                os.path.join(os.path.dirname(filename),
                    urllib.unquote(target)))
            if target_filename not in existing_targets:
                if os.path.exists(target_filename):
                    existing_targets.add(target_filename)
                else:
                    print "In %s broken link: " % filename, target
                    if '--find-sources' in sys.argv:
                        print "Possible sources:"
                        print os.popen(
                            'nikola build list --deps %s' % task, 'r').read()
                        print "===============================\n"

    except Exception as exc:
        print "Error with:", filename, exc


def scan_links():
    print "Checking Links:\n===============\n"
    for task in os.popen('nikola build list --all', 'r').readlines():
        task = task.strip()
        if task.split(':')[0] in (
            'render_tags',
            'render_archive',
            'render_galleries',
            'render_indexes',
            'render_pages',
            'render_site') and '.html' in task:
            analize(task)


def scan_files():
    print "Checking Files:\n===============\n"
    task_fnames = set([])
    real_fnames = set([])
    # First check that all targets are generated in the right places
    for task in os.popen('nikola build list --all', 'r').readlines():
        task = task.strip()
        if 'output' in task and ':' in task:
            fname = task.split(':')[-1]
            task_fnames.add(fname)
     # And now check that there are no non-target files
    for root, dirs, files in os.walk('output'):
        for src_name in files:
            fname = os.path.join(root, src_name)
            real_fnames.add(fname)

    only_on_output = list(real_fnames - task_fnames)
    if only_on_output:
        only_on_output.sort()
        print "\nFiles from unknown origins:\n"
        for f in only_on_output:
            print f

    only_on_input = list(task_fnames - real_fnames)
    if only_on_input:
        only_on_input.sort()
        print "\nFiles not generated:\n"
        for f in only_on_input:
            print f