summaryrefslogtreecommitdiffstats
path: root/nikola/plugins/command_import_blogger.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/plugins/command_import_blogger.py')
-rw-r--r--nikola/plugins/command_import_blogger.py300
1 files changed, 300 insertions, 0 deletions
diff --git a/nikola/plugins/command_import_blogger.py b/nikola/plugins/command_import_blogger.py
new file mode 100644
index 0000000..aea210a
--- /dev/null
+++ b/nikola/plugins/command_import_blogger.py
@@ -0,0 +1,300 @@
+# Copyright (c) 2012 Roberto Alsina y otros.
+
+# Permission is hereby granted, free of charge, to any
+# person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the
+# Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the
+# Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice
+# shall be included in all copies or substantial portions of
+# the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from __future__ import unicode_literals, print_function
+import codecs
+import csv
+import datetime
+import os
+from optparse import OptionParser
+import time
+
+try:
+ from urlparse import urlparse
+except ImportError:
+ from urllib.parse import urlparse # NOQA
+
+try:
+ import feedparser
+except ImportError:
+ feedparser = None # NOQA
+from lxml import html
+from mako.template import Template
+
+from nikola.plugin_categories import Command
+from nikola import utils
+
+links = {}
+
+
+class CommandImportBlogger(Command):
+ """Import a blogger dump."""
+
+ name = "import_blogger"
+
+ @classmethod
+ def get_channel_from_file(cls, filename):
+ return feedparser.parse(filename)
+
+ @staticmethod
+ def configure_redirections(url_map):
+ redirections = []
+ for k, v in url_map.items():
+ # remove the initial "/" because src is a relative file path
+ src = (urlparse(k).path + 'index.html')[1:]
+ dst = (urlparse(v).path)
+ if src == 'index.html':
+ print("Can't do a redirect for: %r" % k)
+ else:
+ redirections.append((src, dst))
+
+ return redirections
+
+ def generate_base_site(self):
+ if not os.path.exists(self.output_folder):
+ os.system('nikola init --empty %s' % (self.output_folder, ))
+ else:
+ self.import_into_existing_site = True
+ print('The folder %s already exists - assuming that this is a '
+ 'already existing nikola site.' % self.output_folder)
+
+ conf_template = Template(filename=os.path.join(
+ os.path.dirname(utils.__file__), 'conf.py.in'))
+
+ return conf_template
+
+ @staticmethod
+ def populate_context(channel):
+ context = {}
+ context['DEFAULT_LANG'] = 'en' # blogger doesn't include the language
+ # in the dump
+ context['BLOG_TITLE'] = channel.feed.title
+
+ context['BLOG_DESCRIPTION'] = '' # Missing in the dump
+ context['BLOG_URL'] = channel.feed.link.rstrip('/')
+ context['BLOG_EMAIL'] = channel.feed.author_detail.email
+ context['BLOG_AUTHOR'] = channel.feed.author_detail.name
+ context['POST_PAGES'] = '''(
+ ("posts/*.html", "posts", "post.tmpl", True),
+ ("stories/*.html", "stories", "story.tmpl", False),
+ )'''
+ context['POST_COMPILERS'] = '''{
+ "rest": ('.txt', '.rst'),
+ "markdown": ('.md', '.mdown', '.markdown', '.wp'),
+ "html": ('.html', '.htm')
+ }
+ '''
+
+ return context
+
+ @classmethod
+ def transform_content(cls, content):
+ # No transformations yet
+ return content
+
+ @classmethod
+ def write_content(cls, filename, content):
+ doc = html.document_fromstring(content)
+ doc.rewrite_links(replacer)
+
+ with open(filename, "wb+") as fd:
+ fd.write(html.tostring(doc, encoding='utf8'))
+
+ @staticmethod
+ def write_metadata(filename, title, slug, post_date, description, tags):
+ with codecs.open(filename, "w+", "utf8") as fd:
+ fd.write('%s\n' % title)
+ fd.write('%s\n' % slug)
+ fd.write('%s\n' % post_date)
+ fd.write('%s\n' % ','.join(tags))
+ fd.write('\n')
+ fd.write('%s\n' % description)
+
+ def import_item(self, item, out_folder=None):
+ """Takes an item from the feed and creates a post file."""
+ if out_folder is None:
+ out_folder = 'posts'
+
+ # link is something like http://foo.com/2012/09/01/hello-world/
+ # So, take the path, utils.slugify it, and that's our slug
+ link = item.link
+ link_path = urlparse(link).path
+
+ title = item.title
+
+ # blogger supports empty titles, which Nikola doesn't
+ if not title:
+ print("Warning: Empty title in post with URL %s. Using NO_TITLE "
+ "as placeholder, please fix." % link)
+ title = "NO_TITLE"
+
+ if link_path.lower().endswith('.html'):
+ link_path = link_path[:-5]
+
+ slug = utils.slugify(link_path)
+
+ if not slug: # should never happen
+ print("Error converting post:", title)
+ return
+
+ description = ''
+ post_date = datetime.datetime.fromtimestamp(time.mktime(
+ item.published_parsed))
+
+ for candidate in item.content:
+ if candidate.type == 'text/html':
+ content = candidate.value
+ break
+ # FIXME: handle attachments
+
+ tags = []
+ for tag in item.tags:
+ if tag.scheme == 'http://www.blogger.com/atom/ns#':
+ tags.append(tag.term)
+
+ if item.get('app_draft'):
+ tags.append('draft')
+ is_draft = True
+ else:
+ is_draft = False
+
+ self.url_map[link] = self.context['BLOG_URL'] + '/' + \
+ out_folder + '/' + slug + '.html'
+
+ if is_draft and self.exclude_drafts:
+ print('Draft "%s" will not be imported.' % (title, ))
+ elif content.strip():
+ # If no content is found, no files are written.
+ content = self.transform_content(content)
+
+ self.write_metadata(os.path.join(self.output_folder, out_folder,
+ slug + '.meta'),
+ title, slug, post_date, description, tags)
+ self.write_content(
+ os.path.join(self.output_folder, out_folder, slug + '.html'),
+ content)
+ else:
+ print('Not going to import "%s" because it seems to contain'
+ ' no content.' % (title, ))
+
+ def process_item(self, item):
+ post_type = item.tags[0].term
+
+ if post_type == 'http://schemas.google.com/blogger/2008/kind#post':
+ self.import_item(item, 'posts')
+ elif post_type == 'http://schemas.google.com/blogger/2008/kind#page':
+ self.import_item(item, 'stories')
+ elif post_type == ('http://schemas.google.com/blogger/2008/kind'
+ '#settings'):
+ # Ignore settings
+ pass
+ elif post_type == ('http://schemas.google.com/blogger/2008/kind'
+ '#template'):
+ # Ignore template
+ pass
+ elif post_type == ('http://schemas.google.com/blogger/2008/kind'
+ '#comment'):
+ # FIXME: not importing comments. Does blogger support "pages"?
+ pass
+ else:
+ print("Unknown post_type:", post_type)
+
+ def import_posts(self, channel):
+ for item in channel.entries:
+ self.process_item(item)
+
+ @staticmethod
+ def write_urlmap_csv(output_file, url_map):
+ with codecs.open(output_file, 'w+', 'utf8') as fd:
+ csv_writer = csv.writer(fd)
+ for item in url_map.items():
+ csv_writer.writerow(item)
+
+ def get_configuration_output_path(self):
+ if not self.import_into_existing_site:
+ filename = 'conf.py'
+ else:
+ filename = 'conf.py.wordpress_import-%s' % datetime.datetime.now(
+ ).strftime('%Y%m%d_%H%M%s')
+ config_output_path = os.path.join(self.output_folder, filename)
+ print('Configuration will be written to: %s' % config_output_path)
+
+ return config_output_path
+
+ @staticmethod
+ def write_configuration(filename, rendered_template):
+ with codecs.open(filename, 'w+', 'utf8') as fd:
+ fd.write(rendered_template)
+
+ def run(self, *arguments):
+ """Import a Wordpress blog from an export file into a Nikola site."""
+ # Parse the data
+ if feedparser is None:
+ print('To use the import_blogger command,'
+ ' you have to install the "feedparser" package.')
+ return
+
+ parser = OptionParser(
+ usage="nikola %s [options] blogger_export_file" % self.name)
+ parser.add_option('-f', '--filename', dest='filename',
+ help='Blogger export file from which the import is '
+ 'made.')
+ parser.add_option('-o', '--output-folder', dest='output_folder',
+ default='new_site',
+ help='The location into which the imported content '
+ 'will be written')
+ parser.add_option('-d', '--no-drafts', dest='exclude_drafts',
+ default=False, action="store_true", help='Do not '
+ 'import drafts.')
+
+ (options, args) = parser.parse_args(list(arguments))
+
+ if not options.filename and args:
+ options.filename = args[0]
+
+ if not options.filename:
+ parser.print_usage()
+ return
+
+ self.blogger_export_file = options.filename
+ self.output_folder = options.output_folder
+ self.import_into_existing_site = False
+ self.exclude_drafts = options.exclude_drafts
+ self.url_map = {}
+ channel = self.get_channel_from_file(self.blogger_export_file)
+ self.context = self.populate_context(channel)
+ conf_template = self.generate_base_site()
+ self.context['REDIRECTIONS'] = self.configure_redirections(
+ self.url_map)
+
+ self.import_posts(channel)
+ self.write_urlmap_csv(
+ os.path.join(self.output_folder, 'url_map.csv'), self.url_map)
+
+ self.write_configuration(self.get_configuration_output_path(
+ ), conf_template.render(**self.context))
+
+
+def replacer(dst):
+ return links.get(dst, dst)