diff options
Diffstat (limited to 'nikola/plugins/command_check.py')
| -rw-r--r-- | nikola/plugins/command_check.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/nikola/plugins/command_check.py b/nikola/plugins/command_check.py new file mode 100644 index 0000000..ce1e2e3 --- /dev/null +++ b/nikola/plugins/command_check.py @@ -0,0 +1,109 @@ +from optparse import OptionParser +import os +import sys +import urllib +from urlparse import urlparse + +import lxml.html + +from nikola.plugin_categories import Command + + +class CommandCheck(Command): + """Check the generated site.""" + + name = "check" + + def run(self, *args): + """Check the generated site.""" + parser = OptionParser(usage="nikola %s [options]" % self.name) + parser.add_option('-l', '--check-links', dest='links', + action='store_true', + help='Check for dangling links.') + parser.add_option('-f', '--check-files', dest='files', + action='store_true', + help='Check for unknown files.') + + (options, args) = parser.parse_args(list(args)) + if options.links: + scan_links() + if options.files: + scan_files() + +existing_targets = set([]) + + +def analize(task): + try: + filename = task.split(":")[-1] + d = lxml.html.fromstring(open(filename).read()) + for l in d.iterlinks(): + target = l[0].attrib[l[1]] + if target == "#": + continue + parsed = urlparse(target) + if parsed.scheme: + continue + if parsed.fragment: + target = target.split('#')[0] + target_filename = os.path.abspath( + os.path.join(os.path.dirname(filename), + urllib.unquote(target))) + if target_filename not in existing_targets: + if os.path.exists(target_filename): + existing_targets.add(target_filename) + else: + print "In %s broken link: " % filename, target + if '--find-sources' in sys.argv: + print "Possible sources:" + print os.popen( + 'nikola build list --deps %s' % task, 'r').read() + print "===============================\n" + + except Exception as exc: + print "Error with:", filename, exc + + +def scan_links(): + print "Checking Links:\n===============\n" + for task in os.popen('nikola build list --all', 'r').readlines(): + task = task.strip() + if task.split(':')[0] in ( + 'render_tags', + 'render_archive', + 'render_galleries', + 'render_indexes', + 'render_pages', + 'render_site') and '.html' in task: + analize(task) + + +def scan_files(): + print "Checking Files:\n===============\n" + task_fnames = set([]) + real_fnames = set([]) + # First check that all targets are generated in the right places + for task in os.popen('nikola build list --all', 'r').readlines(): + task = task.strip() + if 'output' in task and ':' in task: + fname = task.split(':')[-1] + task_fnames.add(fname) + # And now check that there are no non-target files + for root, dirs, files in os.walk('output'): + for src_name in files: + fname = os.path.join(root, src_name) + real_fnames.add(fname) + + only_on_output = list(real_fnames - task_fnames) + if only_on_output: + only_on_output.sort() + print "\nFiles from unknown origins:\n" + for f in only_on_output: + print f + + only_on_input = list(task_fnames - real_fnames) + if only_on_input: + only_on_input.sort() + print "\nFiles not generated:\n" + for f in only_on_input: + print f |
