aboutsummaryrefslogtreecommitdiffstats
path: root/nikola/plugins/command_import_wordpress.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/plugins/command_import_wordpress.py')
-rw-r--r--nikola/plugins/command_import_wordpress.py240
1 files changed, 151 insertions, 89 deletions
diff --git a/nikola/plugins/command_import_wordpress.py b/nikola/plugins/command_import_wordpress.py
index 07028d8..e7ecca0 100644
--- a/nikola/plugins/command_import_wordpress.py
+++ b/nikola/plugins/command_import_wordpress.py
@@ -28,7 +28,6 @@ import csv
import datetime
import os
import re
-from optparse import OptionParser
try:
from urlparse import urlparse
@@ -53,9 +52,104 @@ class CommandImportWordpress(Command):
"""Import a wordpress dump."""
name = "import_wordpress"
+ needs_config = False
+ doc_usage = "[options] wordpress_export_file"
+ doc_purpose = "Import a wordpress dump."
+ cmd_options = [
+ {
+ 'name': 'output_folder',
+ 'long': 'output-folder',
+ 'short': 'o',
+ 'default': 'new_site',
+ 'help': 'Location to write imported content.'
+ },
+ {
+ 'name': 'exclude_drafts',
+ 'long': 'no-drafts',
+ 'short': 'd',
+ 'default': False,
+ 'type': bool,
+ 'help': "Don't import drafts",
+ },
+ {
+ 'name': 'squash_newlines',
+ 'long': 'squash-newlines',
+ 'default': False,
+ 'type': bool,
+ 'help': "Shorten multiple newlines in a row to only two newlines",
+ },
+ {
+ 'name': 'no_downloads',
+ 'long': 'no-downloads',
+ 'default': False,
+ 'type': bool,
+ 'help': "Do not try to download files for the import",
+ },
+ ]
+
+ def _execute(self, options={}, args=[]):
+ """Import a Wordpress blog from an export file into a Nikola site."""
+ # Parse the data
+ print(options, args)
+ if requests is None:
+ print('To use the import_wordpress command,'
+ ' you have to install the "requests" package.')
+ return
- @staticmethod
- def read_xml_file(filename):
+ if not args:
+ print(self.help())
+ return
+
+ options['filename'] = args[0]
+
+ if len(args) > 1:
+ options['output_folder'] = args[1]
+
+ self.wordpress_export_file = options['filename']
+ self.squash_newlines = options.get('squash_newlines', False)
+ self.no_downloads = options.get('no_downloads', False)
+ self.output_folder = options.get('output_folder', 'new_site')
+ self.import_into_existing_site = False
+ self.exclude_drafts = options.get('exclude_drafts', False)
+ self.url_map = {}
+ channel = self.get_channel_from_file(self.wordpress_export_file)
+ self.context = self.populate_context(channel)
+ conf_template = self.generate_base_site()
+
+ self.import_posts(channel)
+
+ self.context['REDIRECTIONS'] = self.configure_redirections(
+ self.url_map)
+ self.write_urlmap_csv(
+ os.path.join(self.output_folder, 'url_map.csv'), self.url_map)
+ rendered_template = conf_template.render(**self.context)
+ rendered_template = re.sub('# REDIRECTIONS = ', 'REDIRECTIONS = ',
+ rendered_template)
+ self.write_configuration(self.get_configuration_output_path(),
+ rendered_template)
+
+ @classmethod
+ def _glue_xml_lines(cls, xml):
+ new_xml = xml[0]
+ previous_line_ended_in_newline = new_xml.endswith(b'\n')
+ previous_line_was_indentet = False
+ for line in xml[1:]:
+ if (re.match(b'^[ \t]+', line) and previous_line_ended_in_newline):
+ new_xml = b''.join((new_xml, line))
+ previous_line_was_indentet = True
+ elif previous_line_was_indentet:
+ new_xml = b''.join((new_xml, line))
+ previous_line_was_indentet = False
+ else:
+ new_xml = b'\n'.join((new_xml, line))
+ previous_line_was_indentet = False
+
+ previous_line_ended_in_newline = line.endswith(b'\n')
+
+ return new_xml
+
+ @classmethod
+ def read_xml_file(cls, filename):
xml = []
with open(filename, 'rb') as fd:
@@ -64,9 +158,8 @@ class CommandImportWordpress(Command):
if b'<atom:link rel=' in line:
continue
xml.append(line)
- xml = b'\n'.join(xml)
- return xml
+ return cls._glue_xml_lines(xml)
@classmethod
def get_channel_from_file(cls, filename):
@@ -82,7 +175,7 @@ class CommandImportWordpress(Command):
src = (urlparse(k).path + 'index.html')[1:]
dst = (urlparse(v).path)
if src == 'index.html':
- print("Can't do a redirect for: %r" % k)
+ print("Can't do a redirect for: {0!r}".format(k))
else:
redirections.append((src, dst))
@@ -90,11 +183,11 @@ class CommandImportWordpress(Command):
def generate_base_site(self):
if not os.path.exists(self.output_folder):
- os.system('nikola init --empty %s' % (self.output_folder, ))
+ os.system('nikola init ' + self.output_folder)
else:
self.import_into_existing_site = True
- print('The folder %s already exists - assuming that this is a '
- 'already existing nikola site.' % self.output_folder)
+ print('The folder {0} already exists - assuming that this is a '
+ 'already existing nikola site.'.format(self.output_folder))
conf_template = Template(filename=os.path.join(
os.path.dirname(utils.__file__), 'conf.py.in'))
@@ -111,15 +204,16 @@ class CommandImportWordpress(Command):
'PUT TITLE HERE')
context['BLOG_DESCRIPTION'] = get_text_tag(
channel, 'description', 'PUT DESCRIPTION HERE')
- context['BLOG_URL'] = get_text_tag(channel, 'link', '#')
- author = channel.find('{%s}author' % wordpress_namespace)
+ context['SITE_URL'] = get_text_tag(channel, 'link', '#')
+ context['BASE_URL'] = get_text_tag(channel, 'link', '#')
+ author = channel.find('{{{0}}}author'.format(wordpress_namespace))
context['BLOG_EMAIL'] = get_text_tag(
author,
- '{%s}author_email' % wordpress_namespace,
+ '{{{0}}}author_email'.format(wordpress_namespace),
"joe@example.com")
context['BLOG_AUTHOR'] = get_text_tag(
author,
- '{%s}author_display_name' % wordpress_namespace,
+ '{{{0}}}author_display_name'.format(wordpress_namespace),
"Joe Example")
context['POST_PAGES'] = '''(
("posts/*.wp", "posts", "post.tmpl", True),
@@ -134,25 +228,29 @@ class CommandImportWordpress(Command):
return context
- @staticmethod
- def download_url_content_to_file(url, dst_path):
+ def download_url_content_to_file(self, url, dst_path):
+ if self.no_downloads:
+ return
+
try:
with open(dst_path, 'wb+') as fd:
fd.write(requests.get(url).content)
except requests.exceptions.ConnectionError as err:
- print("Downloading %s to %s failed: %s" % (url, dst_path, err))
+ print("Downloading {0} to {1} failed: {2}".format(url, dst_path,
+ err))
def import_attachment(self, item, wordpress_namespace):
url = get_text_tag(
- item, '{%s}attachment_url' % wordpress_namespace, 'foo')
- link = get_text_tag(item, '{%s}link' % wordpress_namespace, 'foo')
+ item, '{{{0}}}attachment_url'.format(wordpress_namespace), 'foo')
+ link = get_text_tag(item, '{{{0}}}link'.format(wordpress_namespace),
+ 'foo')
path = urlparse(url).path
dst_path = os.path.join(*([self.output_folder, 'files']
+ list(path.split('/'))))
dst_dir = os.path.dirname(dst_path)
if not os.path.isdir(dst_dir):
os.makedirs(dst_dir)
- print("Downloading %s => %s" % (url, dst_path))
+ print("Downloading {0} => {1}".format(url, dst_path))
self.download_url_content_to_file(url, dst_path)
dst_url = '/'.join(dst_path.split(os.sep)[2:])
links[link] = '/' + dst_url
@@ -173,10 +271,18 @@ class CommandImportWordpress(Command):
return new_caption
- @classmethod
- def transform_content(cls, content):
- new_content = cls.transform_sourcecode(content)
- return cls.transform_caption(new_content)
+ def transform_multiple_newlines(self, content):
+ """Replaces multiple newlines with only two."""
+ if self.squash_newlines:
+ return re.sub(r'\n{3,}', r'\n\n', content)
+ else:
+ return content
+
+ def transform_content(self, content):
+ new_content = self.transform_sourcecode(content)
+ new_content = self.transform_caption(new_content)
+ new_content = self.transform_multiple_newlines(new_content)
+ return new_content
@classmethod
def write_content(cls, filename, content):
@@ -188,13 +294,16 @@ class CommandImportWordpress(Command):
@staticmethod
def write_metadata(filename, title, slug, post_date, description, tags):
+ if not description:
+ description = ""
+
with codecs.open(filename, "w+", "utf8") as fd:
- fd.write('%s\n' % title)
- fd.write('%s\n' % slug)
- fd.write('%s\n' % post_date)
- fd.write('%s\n' % ','.join(tags))
+ fd.write('{0}\n'.format(title))
+ fd.write('{0}\n'.format(slug))
+ fd.write('{0}\n'.format(post_date))
+ fd.write('{0}\n'.format(','.join(tags)))
fd.write('\n')
- fd.write('%s\n' % description)
+ fd.write('{0}\n'.format(description))
def import_item(self, item, wordpress_namespace, out_folder=None):
"""Takes an item from the feed and creates a post file."""
@@ -208,19 +317,19 @@ class CommandImportWordpress(Command):
slug = utils.slugify(urlparse(link).path)
if not slug: # it happens if the post has no "nice" URL
slug = get_text_tag(
- item, '{%s}post_name' % wordpress_namespace, None)
+ item, '{{{0}}}post_name'.format(wordpress_namespace), None)
if not slug: # it *may* happen
slug = get_text_tag(
- item, '{%s}post_id' % wordpress_namespace, None)
+ item, '{{{0}}}post_id'.format(wordpress_namespace), None)
if not slug: # should never happen
print("Error converting post:", title)
return
description = get_text_tag(item, 'description', '')
post_date = get_text_tag(
- item, '{%s}post_date' % wordpress_namespace, None)
+ item, '{{{0}}}post_date'.format(wordpress_namespace), None)
status = get_text_tag(
- item, '{%s}status' % wordpress_namespace, 'publish')
+ item, '{{{0}}}status'.format(wordpress_namespace), 'publish')
content = get_text_tag(
item, '{http://purl.org/rss/1.0/modules/content/}encoded', '')
@@ -237,13 +346,13 @@ class CommandImportWordpress(Command):
continue
tags.append(text)
- self.url_map[link] = self.context['BLOG_URL'] + '/' + \
- out_folder + '/' + slug + '.html'
-
if is_draft and self.exclude_drafts:
- print('Draft "%s" will not be imported.' % (title, ))
+ print('Draft "{0}" will not be imported.'.format(title))
elif content.strip():
# If no content is found, no files are written.
+ self.url_map[link] = self.context['SITE_URL'] + '/' + \
+ out_folder + '/' + slug + '.html'
+
content = self.transform_content(content)
self.write_metadata(os.path.join(self.output_folder, out_folder,
@@ -253,15 +362,15 @@ class CommandImportWordpress(Command):
os.path.join(self.output_folder, out_folder, slug + '.wp'),
content)
else:
- print('Not going to import "%s" because it seems to contain'
- ' no content.' % (title, ))
+ print('Not going to import "{0}" because it seems to contain'
+ ' no content.'.format(title))
def process_item(self, item):
# The namespace usually is something like:
# http://wordpress.org/export/1.2/
wordpress_namespace = item.nsmap['wp']
post_type = get_text_tag(
- item, '{%s}post_type' % wordpress_namespace, 'post')
+ item, '{{{0}}}post_type'.format(wordpress_namespace), 'post')
if post_type == 'attachment':
self.import_attachment(item, wordpress_namespace)
@@ -285,10 +394,10 @@ class CommandImportWordpress(Command):
if not self.import_into_existing_site:
filename = 'conf.py'
else:
- filename = 'conf.py.wordpress_import-%s' % datetime.datetime.now(
- ).strftime('%Y%m%d_%H%M%s')
+ filename = 'conf.py.wordpress_import-{0}'.format(
+ datetime.datetime.now().strftime('%Y%m%d_%H%M%s'))
config_output_path = os.path.join(self.output_folder, filename)
- print('Configuration will be written to: %s' % config_output_path)
+ print('Configuration will be written to:', config_output_path)
return config_output_path
@@ -297,53 +406,6 @@ class CommandImportWordpress(Command):
with codecs.open(filename, 'w+', 'utf8') as fd:
fd.write(rendered_template)
- def run(self, *arguments):
- """Import a Wordpress blog from an export file into a Nikola site."""
- # Parse the data
- if requests is None:
- print('To use the import_wordpress command,'
- ' you have to install the "requests" package.')
- return
-
- parser = OptionParser(usage="nikola %s [options] "
- "wordpress_export_file" % self.name)
- parser.add_option('-f', '--filename', dest='filename',
- help='WordPress export file from which the import '
- 'made.')
- parser.add_option('-o', '--output-folder', dest='output_folder',
- default='new_site', help='The location into which '
- 'the imported content will be written')
- parser.add_option('-d', '--no-drafts', dest='exclude_drafts',
- default=False, action="store_true", help='Do not '
- 'import drafts.')
-
- (options, args) = parser.parse_args(list(arguments))
-
- if not options.filename and args:
- options.filename = args[0]
-
- if not options.filename:
- parser.print_usage()
- return
-
- self.wordpress_export_file = options.filename
- self.output_folder = options.output_folder
- self.import_into_existing_site = False
- self.exclude_drafts = options.exclude_drafts
- self.url_map = {}
- channel = self.get_channel_from_file(self.wordpress_export_file)
- self.context = self.populate_context(channel)
- conf_template = self.generate_base_site()
- self.context['REDIRECTIONS'] = self.configure_redirections(
- self.url_map)
-
- self.import_posts(channel)
- self.write_urlmap_csv(
- os.path.join(self.output_folder, 'url_map.csv'), self.url_map)
-
- self.write_configuration(self.get_configuration_output_path(
- ), conf_template.render(**self.context))
-
def replacer(dst):
return links.get(dst, dst)