diff options
Diffstat (limited to 'nikola/plugins/command/planetoid')
| -rw-r--r-- | nikola/plugins/command/planetoid/__init__.py | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/nikola/plugins/command/planetoid/__init__.py b/nikola/plugins/command/planetoid/__init__.py new file mode 100644 index 0000000..369862b --- /dev/null +++ b/nikola/plugins/command/planetoid/__init__.py @@ -0,0 +1,289 @@ +# -*- coding: utf-8 -*- + +# Copyright © 2012-2013 Roberto Alsina and others. + +# Permission is hereby granted, free of charge, to any +# person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of +# the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from __future__ import print_function, unicode_literals +import codecs +import datetime +import hashlib +from optparse import OptionParser +import os +import sys + +from doit.tools import timeout +from nikola.plugin_categories import Command, Task +from nikola.utils import config_changed, req_missing, get_logger, STDERR_HANDLER + +LOGGER = get_logger('planetoid', STDERR_HANDLER) + +try: + import feedparser +except ImportError: + feedparser = None # NOQA + +try: + import peewee +except ImportError: + peewee = None + + +if peewee is not None: + class Feed(peewee.Model): + name = peewee.CharField() + url = peewee.CharField(max_length=200) + last_status = peewee.CharField(null=True) + etag = peewee.CharField(max_length=200) + last_modified = peewee.DateTimeField() + + class Entry(peewee.Model): + date = peewee.DateTimeField() + feed = peewee.ForeignKeyField(Feed) + content = peewee.TextField(max_length=20000) + link = peewee.CharField(max_length=200) + title = peewee.CharField(max_length=200) + guid = peewee.CharField(max_length=200) + + +class Planetoid(Command, Task): + """Maintain a planet-like thing.""" + name = "planetoid" + + def init_db(self): + # setup database + Feed.create_table(fail_silently=True) + Entry.create_table(fail_silently=True) + + def gen_tasks(self): + if peewee is None or sys.version_info[0] == 3: + if sys.version_info[0] == 3: + message = 'Peewee, a requirement of the "planetoid" command, is currently incompatible with Python 3.' + else: + req_missing('peewee', 'use the "planetoid" command') + message = '' + yield { + 'basename': self.name, + 'name': '', + 'verbosity': 2, + 'actions': ['echo "%s"' % message] + } + else: + self.init_db() + self.load_feeds() + for task in self.task_update_feeds(): + yield task + for task in self.task_generate_posts(): + yield task + yield { + 'basename': self.name, + 'name': '', + 'actions': [], + 'file_dep': ['feeds'], + 'task_dep': [ + self.name + "_fetch_feed", + self.name + "_generate_posts", + ] + } + + def run(self, *args): + parser = OptionParser(usage="nikola %s [options]" % self.name) + (options, args) = parser.parse_args(list(args)) + + def load_feeds(self): + "Read the feeds file, add it to the database." + feeds = [] + feed = name = None + for line in codecs.open('feeds', 'r', 'utf-8'): + line = line.strip() + if line.startswith("#"): + continue + elif line.startswith('http'): + feed = line + elif line: + name = line + if feed and name: + feeds.append([feed, name]) + feed = name = None + + def add_feed(name, url): + f = Feed.create( + name=name, + url=url, + etag='foo', + last_modified=datetime.datetime(1970, 1, 1), + ) + f.save() + + def update_feed_url(feed, url): + feed.url = url + feed.save() + + for feed, name in feeds: + f = Feed.select().where(Feed.name == name) + if not list(f): + add_feed(name, feed) + elif list(f)[0].url != feed: + update_feed_url(list(f)[0], feed) + + def task_update_feeds(self): + """Download feed contents, add entries to the database.""" + def update_feed(feed): + modified = feed.last_modified.timetuple() + etag = feed.etag + try: + parsed = feedparser.parse( + feed.url, + etag=etag, + modified=modified + ) + feed.last_status = str(parsed.status) + except: # Probably a timeout + # TODO: log failure + return + if parsed.feed.get('title'): + LOGGER.notice(parsed.feed.title) + else: + LOGGER.notice(feed.url) + feed.etag = parsed.get('etag', 'foo') + modified = tuple(parsed.get('date_parsed', (1970, 1, 1)))[:6] + LOGGER.notice("==========>", modified) + modified = datetime.datetime(*modified) + feed.last_modified = modified + feed.save() + # No point in adding items from missinfg feeds + if parsed.status > 400: + # TODO log failure + return + for entry_data in parsed.entries: + LOGGER.notice("=========================================") + date = entry_data.get('published_parsed', None) + if date is None: + date = entry_data.get('updated_parsed', None) + if date is None: + LOGGER.error("Can't parse date from:\n", entry_data) + return False + LOGGER.notice("DATE:===>", date) + date = datetime.datetime(*(date[:6])) + title = "%s: %s" % (feed.name, entry_data.get('title', 'Sin título')) + content = entry_data.get('content', None) + if content: + content = content[0].value + if not content: + content = entry_data.get('description', None) + if not content: + content = entry_data.get('summary', 'Sin contenido') + guid = str(entry_data.get('guid', entry_data.link)) + link = entry_data.link + LOGGER.notice(repr([date, title])) + e = list(Entry.select().where(Entry.guid == guid)) + LOGGER.notice( + repr(dict( + date=date, + title=title, + content=content, + guid=guid, + feed=feed, + link=link, + )) + ) + if not e: + entry = Entry.create( + date=date, + title=title, + content=content, + guid=guid, + feed=feed, + link=link, + ) + else: + entry = e[0] + entry.date = date + entry.title = title + entry.content = content + entry.link = link + entry.save() + flag = False + for feed in Feed.select(): + flag = True + task = { + 'basename': self.name + "_fetch_feed", + 'name': str(feed.url), + 'actions': [(update_feed, (feed, ))], + 'uptodate': [timeout(datetime.timedelta(minutes= + self.site.config.get('PLANETOID_REFRESH', 60)))], + } + yield task + if not flag: + yield { + 'basename': self.name + "_fetch_feed", + 'name': '', + 'actions': [], + } + + def task_generate_posts(self): + """Generate post files for the blog entries.""" + def gen_id(entry): + h = hashlib.md5() + h.update(entry.feed.name.encode('utf8')) + h.update(entry.guid) + return h.hexdigest() + + def generate_post(entry): + unique_id = gen_id(entry) + meta_path = os.path.join('posts', unique_id + '.meta') + post_path = os.path.join('posts', unique_id + '.txt') + with codecs.open(meta_path, 'wb+', 'utf8') as fd: + fd.write('%s\n' % entry.title.replace('\n', ' ')) + fd.write('%s\n' % unique_id) + fd.write('%s\n' % entry.date.strftime('%Y/%m/%d %H:%M')) + fd.write('\n') + fd.write('%s\n' % entry.link) + with codecs.open(post_path, 'wb+', 'utf8') as fd: + fd.write('.. raw:: html\n\n') + content = entry.content + if not content: + content = 'Sin contenido' + for line in content.splitlines(): + fd.write(' %s\n' % line) + + if not os.path.isdir('posts'): + os.mkdir('posts') + flag = False + for entry in Entry.select().order_by(Entry.date.desc()): + flag = True + entry_id = gen_id(entry) + yield { + 'basename': self.name + "_generate_posts", + 'targets': [os.path.join('posts', entry_id + '.meta'), os.path.join('posts', entry_id + '.txt')], + 'name': entry_id, + 'actions': [(generate_post, (entry,))], + 'uptodate': [config_changed({1: entry})], + 'task_dep': [self.name + "_fetch_feed"], + } + if not flag: + yield { + 'basename': self.name + "_generate_posts", + 'name': '', + 'actions': [], + } |
