diff options
| author | 2013-11-20 16:58:50 -0300 | |
|---|---|---|
| committer | 2013-11-20 16:58:50 -0300 | |
| commit | ca94afc07df55cb7fc6fe3b4f3011877b7881195 (patch) | |
| tree | d81e1f275aa77545f33740723f307a83dde2e0b4 /nikola/plugins/command_import_blogger.py | |
| parent | f794eee787e9cde54e6b8f53e45d69c9ddc9936a (diff) | |
Imported Upstream version 6.2.1upstream/6.2.1
Diffstat (limited to 'nikola/plugins/command_import_blogger.py')
| -rw-r--r-- | nikola/plugins/command_import_blogger.py | 308 |
1 files changed, 0 insertions, 308 deletions
diff --git a/nikola/plugins/command_import_blogger.py b/nikola/plugins/command_import_blogger.py deleted file mode 100644 index ecc4676..0000000 --- a/nikola/plugins/command_import_blogger.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright (c) 2012 Roberto Alsina y otros. - -# Permission is hereby granted, free of charge, to any -# person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the -# Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the -# Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice -# shall be included in all copies or substantial portions of -# the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY -# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS -# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -from __future__ import unicode_literals, print_function -import codecs -import csv -import datetime -import os -import time - -try: - from urlparse import urlparse -except ImportError: - from urllib.parse import urlparse # NOQA - -try: - import feedparser -except ImportError: - feedparser = None # NOQA -from lxml import html -from mako.template import Template - -from nikola.plugin_categories import Command -from nikola import utils - -links = {} - - -class CommandImportBlogger(Command): - """Import a blogger dump.""" - - name = "import_blogger" - needs_config = False - doc_usage = "[options] blogger_export_file" - doc_purpose = "Import a blogger dump." - cmd_options = [ - { - 'name': 'output_folder', - 'long': 'output-folder', - 'short': 'o', - 'default': 'new_site', - 'help': 'Location to write imported content.' - }, - { - 'name': 'exclude_drafts', - 'long': 'no-drafts', - 'short': 'd', - 'default': False, - 'type': bool, - 'help': "Don't import drafts", - }, - ] - - def _execute(self, options, args): - """Import a Blogger blog from an export file into a Nikola site.""" - - # Parse the data - if feedparser is None: - print('To use the import_blogger command,' - ' you have to install the "feedparser" package.') - return - - if not args: - print(self.help()) - return - - options['filename'] = args[0] - self.blogger_export_file = options['filename'] - self.output_folder = options['output_folder'] - self.import_into_existing_site = False - self.exclude_drafts = options['exclude_drafts'] - self.url_map = {} - channel = self.get_channel_from_file(self.blogger_export_file) - self.context = self.populate_context(channel) - conf_template = self.generate_base_site() - self.context['REDIRECTIONS'] = self.configure_redirections( - self.url_map) - - self.import_posts(channel) - self.write_urlmap_csv( - os.path.join(self.output_folder, 'url_map.csv'), self.url_map) - - self.write_configuration(self.get_configuration_output_path( - ), conf_template.render(**self.context)) - - @classmethod - def get_channel_from_file(cls, filename): - if not os.path.isfile(filename): - raise Exception("Missing file: %s" % filename) - return feedparser.parse(filename) - - @staticmethod - def configure_redirections(url_map): - redirections = [] - for k, v in url_map.items(): - # remove the initial "/" because src is a relative file path - src = (urlparse(k).path + 'index.html')[1:] - dst = (urlparse(v).path) - if src == 'index.html': - print("Can't do a redirect for: {0!r}".format(k)) - else: - redirections.append((src, dst)) - - return redirections - - def generate_base_site(self): - if not os.path.exists(self.output_folder): - os.system('nikola init ' + self.output_folder) - else: - self.import_into_existing_site = True - print('The folder {0} already exists - assuming that this is a ' - 'already existing nikola site.'.format(self.output_folder)) - - conf_template = Template(filename=os.path.join( - os.path.dirname(utils.__file__), 'conf.py.in')) - - return conf_template - - @staticmethod - def populate_context(channel): - context = {} - context['DEFAULT_LANG'] = 'en' # blogger doesn't include the language - # in the dump - context['BLOG_TITLE'] = channel.feed.title - - context['BLOG_DESCRIPTION'] = '' # Missing in the dump - context['SITE_URL'] = channel.feed.link.rstrip('/') - context['BLOG_EMAIL'] = channel.feed.author_detail.email - context['BLOG_AUTHOR'] = channel.feed.author_detail.name - context['POST_PAGES'] = '''( - ("posts/*.html", "posts", "post.tmpl", True), - ("stories/*.html", "stories", "story.tmpl", False), - )''' - context['POST_COMPILERS'] = '''{ - "rest": ('.txt', '.rst'), - "markdown": ('.md', '.mdown', '.markdown', '.wp'), - "html": ('.html', '.htm') - } - ''' - - return context - - @classmethod - def transform_content(cls, content): - # No transformations yet - return content - - @classmethod - def write_content(cls, filename, content): - doc = html.document_fromstring(content) - doc.rewrite_links(replacer) - - with open(filename, "wb+") as fd: - fd.write(html.tostring(doc, encoding='utf8')) - - @staticmethod - def write_metadata(filename, title, slug, post_date, description, tags): - if not description: - description = "" - - with codecs.open(filename, "w+", "utf8") as fd: - fd.write('{0}\n'.format(title)) - fd.write('{0}\n'.format(slug)) - fd.write('{0}\n'.format(post_date)) - fd.write('{0}\n'.format(','.join(tags))) - fd.write('\n') - fd.write('{0}\n'.format(description)) - - def import_item(self, item, out_folder=None): - """Takes an item from the feed and creates a post file.""" - if out_folder is None: - out_folder = 'posts' - - # link is something like http://foo.com/2012/09/01/hello-world/ - # So, take the path, utils.slugify it, and that's our slug - link = item.link - link_path = urlparse(link).path - - title = item.title - - # blogger supports empty titles, which Nikola doesn't - if not title: - print("Warning: Empty title in post with URL {0}. Using NO_TITLE " - "as placeholder, please fix.".format(link)) - title = "NO_TITLE" - - if link_path.lower().endswith('.html'): - link_path = link_path[:-5] - - slug = utils.slugify(link_path) - - if not slug: # should never happen - print("Error converting post:", title) - return - - description = '' - post_date = datetime.datetime.fromtimestamp(time.mktime( - item.published_parsed)) - - for candidate in item.content: - if candidate.type == 'text/html': - content = candidate.value - break - # FIXME: handle attachments - - tags = [] - for tag in item.tags: - if tag.scheme == 'http://www.blogger.com/atom/ns#': - tags.append(tag.term) - - if item.get('app_draft'): - tags.append('draft') - is_draft = True - else: - is_draft = False - - self.url_map[link] = self.context['SITE_URL'] + '/' + \ - out_folder + '/' + slug + '.html' - - if is_draft and self.exclude_drafts: - print('Draft "{0}" will not be imported.'.format(title)) - elif content.strip(): - # If no content is found, no files are written. - content = self.transform_content(content) - - self.write_metadata(os.path.join(self.output_folder, out_folder, - slug + '.meta'), - title, slug, post_date, description, tags) - self.write_content( - os.path.join(self.output_folder, out_folder, slug + '.html'), - content) - else: - print('Not going to import "{0}" because it seems to contain' - ' no content.'.format(title)) - - def process_item(self, item): - post_type = item.tags[0].term - - if post_type == 'http://schemas.google.com/blogger/2008/kind#post': - self.import_item(item, 'posts') - elif post_type == 'http://schemas.google.com/blogger/2008/kind#page': - self.import_item(item, 'stories') - elif post_type == ('http://schemas.google.com/blogger/2008/kind' - '#settings'): - # Ignore settings - pass - elif post_type == ('http://schemas.google.com/blogger/2008/kind' - '#template'): - # Ignore template - pass - elif post_type == ('http://schemas.google.com/blogger/2008/kind' - '#comment'): - # FIXME: not importing comments. Does blogger support "pages"? - pass - else: - print("Unknown post_type:", post_type) - - def import_posts(self, channel): - for item in channel.entries: - self.process_item(item) - - @staticmethod - def write_urlmap_csv(output_file, url_map): - with codecs.open(output_file, 'w+', 'utf8') as fd: - csv_writer = csv.writer(fd) - for item in url_map.items(): - csv_writer.writerow(item) - - def get_configuration_output_path(self): - if not self.import_into_existing_site: - filename = 'conf.py' - else: - filename = 'conf.py.blogger_import-{0}'.format( - datetime.datetime.now().strftime('%Y%m%d_%H%M%s')) - config_output_path = os.path.join(self.output_folder, filename) - print('Configuration will be written to: ' + config_output_path) - - return config_output_path - - @staticmethod - def write_configuration(filename, rendered_template): - with codecs.open(filename, 'w+', 'utf8') as fd: - fd.write(rendered_template) - - -def replacer(dst): - return links.get(dst, dst) |
