From 8b14a1e5b2ca574fdd4fd2377567ec98a110d4b6 Mon Sep 17 00:00:00 2001 From: Agustin Henze Date: Wed, 13 Mar 2013 20:58:39 -0300 Subject: Imported Upstream version 5.4.2 --- nikola/plugins/command_import_wordpress.py | 240 ++++++++++++++++++----------- 1 file changed, 151 insertions(+), 89 deletions(-) (limited to 'nikola/plugins/command_import_wordpress.py') diff --git a/nikola/plugins/command_import_wordpress.py b/nikola/plugins/command_import_wordpress.py index 07028d8..e7ecca0 100644 --- a/nikola/plugins/command_import_wordpress.py +++ b/nikola/plugins/command_import_wordpress.py @@ -28,7 +28,6 @@ import csv import datetime import os import re -from optparse import OptionParser try: from urlparse import urlparse @@ -53,9 +52,104 @@ class CommandImportWordpress(Command): """Import a wordpress dump.""" name = "import_wordpress" + needs_config = False + doc_usage = "[options] wordpress_export_file" + doc_purpose = "Import a wordpress dump." + cmd_options = [ + { + 'name': 'output_folder', + 'long': 'output-folder', + 'short': 'o', + 'default': 'new_site', + 'help': 'Location to write imported content.' + }, + { + 'name': 'exclude_drafts', + 'long': 'no-drafts', + 'short': 'd', + 'default': False, + 'type': bool, + 'help': "Don't import drafts", + }, + { + 'name': 'squash_newlines', + 'long': 'squash-newlines', + 'default': False, + 'type': bool, + 'help': "Shorten multiple newlines in a row to only two newlines", + }, + { + 'name': 'no_downloads', + 'long': 'no-downloads', + 'default': False, + 'type': bool, + 'help': "Do not try to download files for the import", + }, + ] + + def _execute(self, options={}, args=[]): + """Import a Wordpress blog from an export file into a Nikola site.""" + # Parse the data + print(options, args) + if requests is None: + print('To use the import_wordpress command,' + ' you have to install the "requests" package.') + return - @staticmethod - def read_xml_file(filename): + if not args: + print(self.help()) + return + + options['filename'] = args[0] + + if len(args) > 1: + options['output_folder'] = args[1] + + self.wordpress_export_file = options['filename'] + self.squash_newlines = options.get('squash_newlines', False) + self.no_downloads = options.get('no_downloads', False) + self.output_folder = options.get('output_folder', 'new_site') + self.import_into_existing_site = False + self.exclude_drafts = options.get('exclude_drafts', False) + self.url_map = {} + channel = self.get_channel_from_file(self.wordpress_export_file) + self.context = self.populate_context(channel) + conf_template = self.generate_base_site() + + self.import_posts(channel) + + self.context['REDIRECTIONS'] = self.configure_redirections( + self.url_map) + self.write_urlmap_csv( + os.path.join(self.output_folder, 'url_map.csv'), self.url_map) + rendered_template = conf_template.render(**self.context) + rendered_template = re.sub('# REDIRECTIONS = ', 'REDIRECTIONS = ', + rendered_template) + self.write_configuration(self.get_configuration_output_path(), + rendered_template) + + @classmethod + def _glue_xml_lines(cls, xml): + new_xml = xml[0] + previous_line_ended_in_newline = new_xml.endswith(b'\n') + previous_line_was_indentet = False + for line in xml[1:]: + if (re.match(b'^[ \t]+', line) and previous_line_ended_in_newline): + new_xml = b''.join((new_xml, line)) + previous_line_was_indentet = True + elif previous_line_was_indentet: + new_xml = b''.join((new_xml, line)) + previous_line_was_indentet = False + else: + new_xml = b'\n'.join((new_xml, line)) + previous_line_was_indentet = False + + previous_line_ended_in_newline = line.endswith(b'\n') + + return new_xml + + @classmethod + def read_xml_file(cls, filename): xml = [] with open(filename, 'rb') as fd: @@ -64,9 +158,8 @@ class CommandImportWordpress(Command): if b' %s" % (url, dst_path)) + print("Downloading {0} => {1}".format(url, dst_path)) self.download_url_content_to_file(url, dst_path) dst_url = '/'.join(dst_path.split(os.sep)[2:]) links[link] = '/' + dst_url @@ -173,10 +271,18 @@ class CommandImportWordpress(Command): return new_caption - @classmethod - def transform_content(cls, content): - new_content = cls.transform_sourcecode(content) - return cls.transform_caption(new_content) + def transform_multiple_newlines(self, content): + """Replaces multiple newlines with only two.""" + if self.squash_newlines: + return re.sub(r'\n{3,}', r'\n\n', content) + else: + return content + + def transform_content(self, content): + new_content = self.transform_sourcecode(content) + new_content = self.transform_caption(new_content) + new_content = self.transform_multiple_newlines(new_content) + return new_content @classmethod def write_content(cls, filename, content): @@ -188,13 +294,16 @@ class CommandImportWordpress(Command): @staticmethod def write_metadata(filename, title, slug, post_date, description, tags): + if not description: + description = "" + with codecs.open(filename, "w+", "utf8") as fd: - fd.write('%s\n' % title) - fd.write('%s\n' % slug) - fd.write('%s\n' % post_date) - fd.write('%s\n' % ','.join(tags)) + fd.write('{0}\n'.format(title)) + fd.write('{0}\n'.format(slug)) + fd.write('{0}\n'.format(post_date)) + fd.write('{0}\n'.format(','.join(tags))) fd.write('\n') - fd.write('%s\n' % description) + fd.write('{0}\n'.format(description)) def import_item(self, item, wordpress_namespace, out_folder=None): """Takes an item from the feed and creates a post file.""" @@ -208,19 +317,19 @@ class CommandImportWordpress(Command): slug = utils.slugify(urlparse(link).path) if not slug: # it happens if the post has no "nice" URL slug = get_text_tag( - item, '{%s}post_name' % wordpress_namespace, None) + item, '{{{0}}}post_name'.format(wordpress_namespace), None) if not slug: # it *may* happen slug = get_text_tag( - item, '{%s}post_id' % wordpress_namespace, None) + item, '{{{0}}}post_id'.format(wordpress_namespace), None) if not slug: # should never happen print("Error converting post:", title) return description = get_text_tag(item, 'description', '') post_date = get_text_tag( - item, '{%s}post_date' % wordpress_namespace, None) + item, '{{{0}}}post_date'.format(wordpress_namespace), None) status = get_text_tag( - item, '{%s}status' % wordpress_namespace, 'publish') + item, '{{{0}}}status'.format(wordpress_namespace), 'publish') content = get_text_tag( item, '{http://purl.org/rss/1.0/modules/content/}encoded', '') @@ -237,13 +346,13 @@ class CommandImportWordpress(Command): continue tags.append(text) - self.url_map[link] = self.context['BLOG_URL'] + '/' + \ - out_folder + '/' + slug + '.html' - if is_draft and self.exclude_drafts: - print('Draft "%s" will not be imported.' % (title, )) + print('Draft "{0}" will not be imported.'.format(title)) elif content.strip(): # If no content is found, no files are written. + self.url_map[link] = self.context['SITE_URL'] + '/' + \ + out_folder + '/' + slug + '.html' + content = self.transform_content(content) self.write_metadata(os.path.join(self.output_folder, out_folder, @@ -253,15 +362,15 @@ class CommandImportWordpress(Command): os.path.join(self.output_folder, out_folder, slug + '.wp'), content) else: - print('Not going to import "%s" because it seems to contain' - ' no content.' % (title, )) + print('Not going to import "{0}" because it seems to contain' + ' no content.'.format(title)) def process_item(self, item): # The namespace usually is something like: # http://wordpress.org/export/1.2/ wordpress_namespace = item.nsmap['wp'] post_type = get_text_tag( - item, '{%s}post_type' % wordpress_namespace, 'post') + item, '{{{0}}}post_type'.format(wordpress_namespace), 'post') if post_type == 'attachment': self.import_attachment(item, wordpress_namespace) @@ -285,10 +394,10 @@ class CommandImportWordpress(Command): if not self.import_into_existing_site: filename = 'conf.py' else: - filename = 'conf.py.wordpress_import-%s' % datetime.datetime.now( - ).strftime('%Y%m%d_%H%M%s') + filename = 'conf.py.wordpress_import-{0}'.format( + datetime.datetime.now().strftime('%Y%m%d_%H%M%s')) config_output_path = os.path.join(self.output_folder, filename) - print('Configuration will be written to: %s' % config_output_path) + print('Configuration will be written to:', config_output_path) return config_output_path @@ -297,53 +406,6 @@ class CommandImportWordpress(Command): with codecs.open(filename, 'w+', 'utf8') as fd: fd.write(rendered_template) - def run(self, *arguments): - """Import a Wordpress blog from an export file into a Nikola site.""" - # Parse the data - if requests is None: - print('To use the import_wordpress command,' - ' you have to install the "requests" package.') - return - - parser = OptionParser(usage="nikola %s [options] " - "wordpress_export_file" % self.name) - parser.add_option('-f', '--filename', dest='filename', - help='WordPress export file from which the import ' - 'made.') - parser.add_option('-o', '--output-folder', dest='output_folder', - default='new_site', help='The location into which ' - 'the imported content will be written') - parser.add_option('-d', '--no-drafts', dest='exclude_drafts', - default=False, action="store_true", help='Do not ' - 'import drafts.') - - (options, args) = parser.parse_args(list(arguments)) - - if not options.filename and args: - options.filename = args[0] - - if not options.filename: - parser.print_usage() - return - - self.wordpress_export_file = options.filename - self.output_folder = options.output_folder - self.import_into_existing_site = False - self.exclude_drafts = options.exclude_drafts - self.url_map = {} - channel = self.get_channel_from_file(self.wordpress_export_file) - self.context = self.populate_context(channel) - conf_template = self.generate_base_site() - self.context['REDIRECTIONS'] = self.configure_redirections( - self.url_map) - - self.import_posts(channel) - self.write_urlmap_csv( - os.path.join(self.output_folder, 'url_map.csv'), self.url_map) - - self.write_configuration(self.get_configuration_output_path( - ), conf_template.render(**self.context)) - def replacer(dst): return links.get(dst, dst) -- cgit v1.2.3