Imported Upstream version 7.1.0upstream/7.1.0

author: Agustin Henze <tin@sluc.org.ar> 2014-10-21 10:33:15 -0300
committer: Agustin Henze <tin@sluc.org.ar> 2014-10-21 10:33:15 -0300
commit: 5ec02211214350ee558fd9f6bb052264fd24f75e (patch)
tree: b61e8c61a95d18a91d053e71dcbd7b30e47552a1 /nikola/plugins/command/check.py
parent: 58c4878526dec5510f23c812274686787d8724ba (diff)
1 files changed, 23 insertions, 15 deletions
diff --git a/nikola/plugins/command/check.py b/nikola/plugins/command/check.py
index 76571a0..bd254f4 100644
--- a/nikola/plugins/command/check.py
+++ b/nikola/plugins/command/check.py
@@ -30,9 +30,9 @@ import re
 import sys
 try:
     from urllib import unquote
-    from urlparse import urlparse
+    from urlparse import urlparse, urljoin, urldefrag
 except ImportError:
-    from urllib.parse import unquote, urlparse  # NOQA
+    from urllib.parse import unquote, urlparse, urljoin, urldefrag  # NOQA
 
 import lxml.html
 
@@ -63,6 +63,15 @@ def real_scan_files(site):
     return (only_on_output, only_on_input)
 
 
+def fs_relpath_from_url_path(url_path):
+    """Expects as input an urlparse(s).path"""
+    url_path = unquote(url_path)
+    # in windows relative paths don't begin with os.sep
+    if sys.platform == 'win32' and len(url_path):
+        url_path = url_path[1:].replace('/', '\\')
+    return url_path
+
+
 class CommandCheck(Command):
     """Check the generated site."""
 
@@ -142,6 +151,8 @@ class CommandCheck(Command):
         self.existing_targets.add(self.site.config['SITE_URL'])
         self.existing_targets.add(self.site.config['BASE_URL'])
         url_type = self.site.config['URL_TYPE']
+        if url_type == 'absolute':
+            url_netloc_to_root = urlparse(self.site.config['SITE_URL']).path
         try:
             filename = task.split(":")[-1]
             d = lxml.html.fromstring(open(filename).read())
@@ -149,6 +160,7 @@ class CommandCheck(Command):
                 target = l[0].attrib[l[1]]
                 if target == "#":
                     continue
+                target, _ = urldefrag(target)
                 parsed = urlparse(target)
 
                 # Absolute links when using only paths, skip.
@@ -159,24 +171,20 @@ class CommandCheck(Command):
                 if (parsed.scheme or target.startswith('//')) and parsed.netloc != base_url.netloc:
                     continue
 
-                if parsed.fragment:
-                    target = target.split('#')[0]
                 if url_type == 'rel_path':
                     target_filename = os.path.abspath(
                         os.path.join(os.path.dirname(filename), unquote(target)))
 
                 elif url_type in ('full_path', 'absolute'):
-                    target_filename = os.path.abspath(
-                        os.path.join(os.path.dirname(filename), parsed.path))
-                    if parsed.path in ['', '/']:
-                        target_filename = os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.config['INDEX_FILE'])
-                    elif parsed.path.endswith('/'):  # abspath removes trailing slashes
-                        target_filename += '/{0}'.format(self.site.config['INDEX_FILE'])
-                    if target_filename.startswith(base_url.path):
-                        target_filename = target_filename[len(base_url.path):]
-                    target_filename = os.path.join(self.site.config['OUTPUT_FOLDER'], target_filename)
-                    if parsed.path in ['', '/']:
-                        target_filename = os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.config['INDEX_FILE'])
+                    if url_type == 'absolute':
+                        # convert to 'full_path' case, ie url relative to root
+                        url_rel_path = target.path[len(url_netloc_to_root):]
+                    else:
+                        url_rel_path = target.path
+                    if url_rel_path == '' or url_rel_path.endswith('/'):
+                        url_rel_path = urljoin(url_rel_path, self.site.config['INDEX_FILE'])
+                    fs_rel_path = fs_relpath_from_url_path(url_rel_path)
+                    target_filename = os.path.join(self.site.config['OUTPUT_FOLDER'], fs_rel_path)
 
                 if any(re.match(x, target_filename) for x in self.whitelist):
                     continue
author	Agustin Henze <tin@sluc.org.ar>	2014-10-21 10:33:15 -0300
committer	Agustin Henze <tin@sluc.org.ar>	2014-10-21 10:33:15 -0300
commit	5ec02211214350ee558fd9f6bb052264fd24f75e (patch)
tree	b61e8c61a95d18a91d053e71dcbd7b30e47552a1 /nikola/plugins/command/check.py
parent	58c4878526dec5510f23c812274686787d8724ba (diff)