diff options
| author | 2014-10-21 10:33:15 -0300 | |
|---|---|---|
| committer | 2014-10-21 10:33:15 -0300 | |
| commit | 5ec02211214350ee558fd9f6bb052264fd24f75e (patch) | |
| tree | b61e8c61a95d18a91d053e71dcbd7b30e47552a1 /nikola/plugins/command/import_wordpress.py | |
| parent | 58c4878526dec5510f23c812274686787d8724ba (diff) | |
Imported Upstream version 7.1.0upstream/7.1.0
Diffstat (limited to 'nikola/plugins/command/import_wordpress.py')
| -rw-r--r-- | nikola/plugins/command/import_wordpress.py | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/nikola/plugins/command/import_wordpress.py b/nikola/plugins/command/import_wordpress.py index 8ddc8c7..1af4083 100644 --- a/nikola/plugins/command/import_wordpress.py +++ b/nikola/plugins/command/import_wordpress.py @@ -158,6 +158,7 @@ class CommandImportWordpress(Command, ImportMixin): channel = self.get_channel_from_file(self.wordpress_export_file) self.context = self.populate_context(channel) + self.base_dir = urlparse(self.context['BASE_URL']).path conf_template = self.generate_base_site() # If user has specified a custom pattern for translation files we @@ -323,13 +324,15 @@ class CommandImportWordpress(Command, ImportMixin): # your blogging into another site or system its not. # Why don't they just use JSON? if sys.version_info[0] == 2: - metadata = phpserialize.loads(utils.sys_encode(meta_value.text)) - size_key = 'sizes' - file_key = 'file' + try: + metadata = phpserialize.loads(utils.sys_encode(meta_value.text)) + except ValueError: + # local encoding might be wrong sometimes + metadata = phpserialize.loads(meta_value.text.encode('utf-8')) else: - metadata = phpserialize.loads(meta_value.text.encode('UTF-8')) - size_key = b'sizes' - file_key = b'file' + metadata = phpserialize.loads(meta_value.text.encode('utf-8')) + size_key = b'sizes' + file_key = b'file' if size_key not in metadata: continue @@ -385,26 +388,34 @@ class CommandImportWordpress(Command, ImportMixin): # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug link = get_text_tag(item, 'link', None) - path = unquote(urlparse(link).path.strip('/')) + parsed = urlparse(link) + path = unquote(parsed.path.strip('/')) # In python 2, path is a str. slug requires a unicode # object. According to wikipedia, unquoted strings will # usually be UTF8 if isinstance(path, utils.bytes_str): path = path.decode('utf8') + + # Cut out the base directory. + if path.startswith(self.base_dir.strip('/')): + path = path.replace(self.base_dir.strip('/'), '', 1) + pathlist = path.split('/') - if len(pathlist) > 1: - out_folder = os.path.join(*([out_folder] + pathlist[:-1])) - slug = utils.slugify(pathlist[-1]) - if not slug: # it happens if the post has no "nice" URL + if parsed.query: # if there are no nice URLs and query strings are used + out_folder = os.path.join(*([out_folder] + pathlist)) slug = get_text_tag( item, '{{{0}}}post_name'.format(wordpress_namespace), None) - if not slug: # it *may* happen - slug = get_text_tag( - item, '{{{0}}}post_id'.format(wordpress_namespace), None) - if not slug: # should never happen - LOGGER.error("Error converting post:", title) - return + if not slug: # it *may* happen + slug = get_text_tag( + item, '{{{0}}}post_id'.format(wordpress_namespace), None) + if not slug: # should never happen + LOGGER.error("Error converting post:", title) + return + else: + if len(pathlist) > 1: + out_folder = os.path.join(*([out_folder] + pathlist[:-1])) + slug = utils.slugify(pathlist[-1]) description = get_text_tag(item, 'description', '') post_date = get_text_tag( @@ -440,8 +451,9 @@ class CommandImportWordpress(Command, ImportMixin): LOGGER.notice('Draft "{0}" will not be imported.'.format(title)) elif content.strip(): # If no content is found, no files are written. - self.url_map[link] = (self.context['SITE_URL'] + out_folder + '/' - + slug + '.html') + self.url_map[link] = (self.context['SITE_URL'] + + out_folder.rstrip('/') + '/' + slug + + '.html').replace(os.sep, '/') if hasattr(self, "separate_qtranslate_content") \ and self.separate_qtranslate_content: content_translations = separate_qtranslate_content(content) |
