summaryrefslogtreecommitdiffstats
path: root/nikola/plugins/command_check.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/plugins/command_check.py')
-rw-r--r--nikola/plugins/command_check.py109
1 files changed, 109 insertions, 0 deletions
diff --git a/nikola/plugins/command_check.py b/nikola/plugins/command_check.py
new file mode 100644
index 0000000..ce1e2e3
--- /dev/null
+++ b/nikola/plugins/command_check.py
@@ -0,0 +1,109 @@
+from optparse import OptionParser
+import os
+import sys
+import urllib
+from urlparse import urlparse
+
+import lxml.html
+
+from nikola.plugin_categories import Command
+
+
+class CommandCheck(Command):
+ """Check the generated site."""
+
+ name = "check"
+
+ def run(self, *args):
+ """Check the generated site."""
+ parser = OptionParser(usage="nikola %s [options]" % self.name)
+ parser.add_option('-l', '--check-links', dest='links',
+ action='store_true',
+ help='Check for dangling links.')
+ parser.add_option('-f', '--check-files', dest='files',
+ action='store_true',
+ help='Check for unknown files.')
+
+ (options, args) = parser.parse_args(list(args))
+ if options.links:
+ scan_links()
+ if options.files:
+ scan_files()
+
+existing_targets = set([])
+
+
+def analize(task):
+ try:
+ filename = task.split(":")[-1]
+ d = lxml.html.fromstring(open(filename).read())
+ for l in d.iterlinks():
+ target = l[0].attrib[l[1]]
+ if target == "#":
+ continue
+ parsed = urlparse(target)
+ if parsed.scheme:
+ continue
+ if parsed.fragment:
+ target = target.split('#')[0]
+ target_filename = os.path.abspath(
+ os.path.join(os.path.dirname(filename),
+ urllib.unquote(target)))
+ if target_filename not in existing_targets:
+ if os.path.exists(target_filename):
+ existing_targets.add(target_filename)
+ else:
+ print "In %s broken link: " % filename, target
+ if '--find-sources' in sys.argv:
+ print "Possible sources:"
+ print os.popen(
+ 'nikola build list --deps %s' % task, 'r').read()
+ print "===============================\n"
+
+ except Exception as exc:
+ print "Error with:", filename, exc
+
+
+def scan_links():
+ print "Checking Links:\n===============\n"
+ for task in os.popen('nikola build list --all', 'r').readlines():
+ task = task.strip()
+ if task.split(':')[0] in (
+ 'render_tags',
+ 'render_archive',
+ 'render_galleries',
+ 'render_indexes',
+ 'render_pages',
+ 'render_site') and '.html' in task:
+ analize(task)
+
+
+def scan_files():
+ print "Checking Files:\n===============\n"
+ task_fnames = set([])
+ real_fnames = set([])
+ # First check that all targets are generated in the right places
+ for task in os.popen('nikola build list --all', 'r').readlines():
+ task = task.strip()
+ if 'output' in task and ':' in task:
+ fname = task.split(':')[-1]
+ task_fnames.add(fname)
+ # And now check that there are no non-target files
+ for root, dirs, files in os.walk('output'):
+ for src_name in files:
+ fname = os.path.join(root, src_name)
+ real_fnames.add(fname)
+
+ only_on_output = list(real_fnames - task_fnames)
+ if only_on_output:
+ only_on_output.sort()
+ print "\nFiles from unknown origins:\n"
+ for f in only_on_output:
+ print f
+
+ only_on_input = list(task_fnames - real_fnames)
+ if only_on_input:
+ only_on_input.sort()
+ print "\nFiles not generated:\n"
+ for f in only_on_input:
+ print f