From 0f2c04e70a0ffdd0892d6970cafbcd952d221db5 Mon Sep 17 00:00:00 2001 From: Agustin Henze Date: Wed, 12 Dec 2012 20:15:48 -0300 Subject: Imported Upstream version 5 --- nikola/plugins/command_import_wordpress.py | 163 +++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 nikola/plugins/command_import_wordpress.py (limited to 'nikola/plugins/command_import_wordpress.py') diff --git a/nikola/plugins/command_import_wordpress.py b/nikola/plugins/command_import_wordpress.py new file mode 100644 index 0000000..e75d022 --- /dev/null +++ b/nikola/plugins/command_import_wordpress.py @@ -0,0 +1,163 @@ +import codecs +import os +from urlparse import urlparse +from urllib import urlopen + +from lxml import etree, html +from mako.template import Template + +from nikola.plugin_categories import Command +from nikola import utils + +links = {} + + +class CommandImportWordpress(Command): + """Import a wordpress dump.""" + + name = "import_wordpress" + + def run(self, fname=None): + # Parse the data + if fname is None: + print "Usage: nikola import_wordpress wordpress_dump.xml" + return + context = {} + with open(fname) as fd: + xml = [] + for line in fd: + # These explode etree and are useless + if ' %s" % (url, dst_path) + with open(dst_path, 'wb+') as fd: + fd.write(urlopen(url).read()) + dst_url = '/'.join(dst_path.split(os.sep)[2:]) + links[link] = '/' + dst_url + links[url] = '/' + dst_url + return + + +def import_item(item): + """Takes an item from the feed and creates a post file.""" + title = get_text_tag(item, 'title', 'NO TITLE') + # link is something like http://foo.com/2012/09/01/hello-world/ + # So, take the path, utils.slugify it, and that's our slug + slug = utils.slugify(urlparse(get_text_tag(item, 'link', None)).path) + description = get_text_tag(item, 'description', '') + post_date = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_date', None) + post_type = get_text_tag(item, + '{http://wordpress.org/export/1.2/}post_type', 'post') + status = get_text_tag(item, + '{http://wordpress.org/export/1.2/}status', 'publish') + content = get_text_tag(item, + '{http://purl.org/rss/1.0/modules/content/}encoded', '') + + tags = [] + if status != 'publish': + tags.append('draft') + for tag in item.findall('category'): + text = tag.text + if text == 'Uncategorized': + continue + tags.append(text) + + if post_type == 'attachment': + return + elif post_type == 'post': + out_folder = 'posts' + else: + out_folder = 'stories' + # Write metadata + with codecs.open(os.path.join('new_site', out_folder, slug + '.meta'), + "w+", "utf8") as fd: + fd.write(u'%s\n' % title) + fd.write(u'%s\n' % slug) + fd.write(u'%s\n' % post_date) + fd.write(u'%s\n' % ','.join(tags)) + fd.write(u'\n') + fd.write(u'%s\n' % description) + with open(os.path.join( + 'new_site', out_folder, slug + '.wp'), "wb+") as fd: + if content.strip(): + try: + doc = html.document_fromstring(content) + doc.rewrite_links(replacer) + fd.write(html.tostring(doc, encoding='utf8')) + except: + import pdb + pdb.set_trace() -- cgit v1.2.3