1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
from optparse import OptionParser
import os
import sys
import urllib
from urlparse import urlparse
import lxml.html
from nikola.plugin_categories import Command
class CommandCheck(Command):
"""Check the generated site."""
name = "check"
def run(self, *args):
"""Check the generated site."""
parser = OptionParser(usage="nikola %s [options]" % self.name)
parser.add_option('-l', '--check-links', dest='links',
action='store_true',
help='Check for dangling links.')
parser.add_option('-f', '--check-files', dest='files',
action='store_true',
help='Check for unknown files.')
(options, args) = parser.parse_args(list(args))
if options.links:
scan_links()
if options.files:
scan_files()
existing_targets = set([])
def analize(task):
try:
filename = task.split(":")[-1]
d = lxml.html.fromstring(open(filename).read())
for l in d.iterlinks():
target = l[0].attrib[l[1]]
if target == "#":
continue
parsed = urlparse(target)
if parsed.scheme:
continue
if parsed.fragment:
target = target.split('#')[0]
target_filename = os.path.abspath(
os.path.join(os.path.dirname(filename),
urllib.unquote(target)))
if target_filename not in existing_targets:
if os.path.exists(target_filename):
existing_targets.add(target_filename)
else:
print "In %s broken link: " % filename, target
if '--find-sources' in sys.argv:
print "Possible sources:"
print os.popen(
'nikola build list --deps %s' % task, 'r').read()
print "===============================\n"
except Exception as exc:
print "Error with:", filename, exc
def scan_links():
print "Checking Links:\n===============\n"
for task in os.popen('nikola build list --all', 'r').readlines():
task = task.strip()
if task.split(':')[0] in (
'render_tags',
'render_archive',
'render_galleries',
'render_indexes',
'render_pages',
'render_site') and '.html' in task:
analize(task)
def scan_files():
print "Checking Files:\n===============\n"
task_fnames = set([])
real_fnames = set([])
# First check that all targets are generated in the right places
for task in os.popen('nikola build list --all', 'r').readlines():
task = task.strip()
if 'output' in task and ':' in task:
fname = task.split(':')[-1]
task_fnames.add(fname)
# And now check that there are no non-target files
for root, dirs, files in os.walk('output'):
for src_name in files:
fname = os.path.join(root, src_name)
real_fnames.add(fname)
only_on_output = list(real_fnames - task_fnames)
if only_on_output:
only_on_output.sort()
print "\nFiles from unknown origins:\n"
for f in only_on_output:
print f
only_on_input = list(task_fnames - real_fnames)
if only_on_input:
only_on_input.sort()
print "\nFiles not generated:\n"
for f in only_on_input:
print f
|