summaryrefslogtreecommitdiffstats
path: root/nikola/plugins/command/planetoid/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/plugins/command/planetoid/__init__.py')
-rw-r--r--nikola/plugins/command/planetoid/__init__.py289
1 files changed, 289 insertions, 0 deletions
diff --git a/nikola/plugins/command/planetoid/__init__.py b/nikola/plugins/command/planetoid/__init__.py
new file mode 100644
index 0000000..369862b
--- /dev/null
+++ b/nikola/plugins/command/planetoid/__init__.py
@@ -0,0 +1,289 @@
+# -*- coding: utf-8 -*-
+
+# Copyright © 2012-2013 Roberto Alsina and others.
+
+# Permission is hereby granted, free of charge, to any
+# person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the
+# Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the
+# Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice
+# shall be included in all copies or substantial portions of
+# the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from __future__ import print_function, unicode_literals
+import codecs
+import datetime
+import hashlib
+from optparse import OptionParser
+import os
+import sys
+
+from doit.tools import timeout
+from nikola.plugin_categories import Command, Task
+from nikola.utils import config_changed, req_missing, get_logger, STDERR_HANDLER
+
+LOGGER = get_logger('planetoid', STDERR_HANDLER)
+
+try:
+ import feedparser
+except ImportError:
+ feedparser = None # NOQA
+
+try:
+ import peewee
+except ImportError:
+ peewee = None
+
+
+if peewee is not None:
+ class Feed(peewee.Model):
+ name = peewee.CharField()
+ url = peewee.CharField(max_length=200)
+ last_status = peewee.CharField(null=True)
+ etag = peewee.CharField(max_length=200)
+ last_modified = peewee.DateTimeField()
+
+ class Entry(peewee.Model):
+ date = peewee.DateTimeField()
+ feed = peewee.ForeignKeyField(Feed)
+ content = peewee.TextField(max_length=20000)
+ link = peewee.CharField(max_length=200)
+ title = peewee.CharField(max_length=200)
+ guid = peewee.CharField(max_length=200)
+
+
+class Planetoid(Command, Task):
+ """Maintain a planet-like thing."""
+ name = "planetoid"
+
+ def init_db(self):
+ # setup database
+ Feed.create_table(fail_silently=True)
+ Entry.create_table(fail_silently=True)
+
+ def gen_tasks(self):
+ if peewee is None or sys.version_info[0] == 3:
+ if sys.version_info[0] == 3:
+ message = 'Peewee, a requirement of the "planetoid" command, is currently incompatible with Python 3.'
+ else:
+ req_missing('peewee', 'use the "planetoid" command')
+ message = ''
+ yield {
+ 'basename': self.name,
+ 'name': '',
+ 'verbosity': 2,
+ 'actions': ['echo "%s"' % message]
+ }
+ else:
+ self.init_db()
+ self.load_feeds()
+ for task in self.task_update_feeds():
+ yield task
+ for task in self.task_generate_posts():
+ yield task
+ yield {
+ 'basename': self.name,
+ 'name': '',
+ 'actions': [],
+ 'file_dep': ['feeds'],
+ 'task_dep': [
+ self.name + "_fetch_feed",
+ self.name + "_generate_posts",
+ ]
+ }
+
+ def run(self, *args):
+ parser = OptionParser(usage="nikola %s [options]" % self.name)
+ (options, args) = parser.parse_args(list(args))
+
+ def load_feeds(self):
+ "Read the feeds file, add it to the database."
+ feeds = []
+ feed = name = None
+ for line in codecs.open('feeds', 'r', 'utf-8'):
+ line = line.strip()
+ if line.startswith("#"):
+ continue
+ elif line.startswith('http'):
+ feed = line
+ elif line:
+ name = line
+ if feed and name:
+ feeds.append([feed, name])
+ feed = name = None
+
+ def add_feed(name, url):
+ f = Feed.create(
+ name=name,
+ url=url,
+ etag='foo',
+ last_modified=datetime.datetime(1970, 1, 1),
+ )
+ f.save()
+
+ def update_feed_url(feed, url):
+ feed.url = url
+ feed.save()
+
+ for feed, name in feeds:
+ f = Feed.select().where(Feed.name == name)
+ if not list(f):
+ add_feed(name, feed)
+ elif list(f)[0].url != feed:
+ update_feed_url(list(f)[0], feed)
+
+ def task_update_feeds(self):
+ """Download feed contents, add entries to the database."""
+ def update_feed(feed):
+ modified = feed.last_modified.timetuple()
+ etag = feed.etag
+ try:
+ parsed = feedparser.parse(
+ feed.url,
+ etag=etag,
+ modified=modified
+ )
+ feed.last_status = str(parsed.status)
+ except: # Probably a timeout
+ # TODO: log failure
+ return
+ if parsed.feed.get('title'):
+ LOGGER.notice(parsed.feed.title)
+ else:
+ LOGGER.notice(feed.url)
+ feed.etag = parsed.get('etag', 'foo')
+ modified = tuple(parsed.get('date_parsed', (1970, 1, 1)))[:6]
+ LOGGER.notice("==========>", modified)
+ modified = datetime.datetime(*modified)
+ feed.last_modified = modified
+ feed.save()
+ # No point in adding items from missinfg feeds
+ if parsed.status > 400:
+ # TODO log failure
+ return
+ for entry_data in parsed.entries:
+ LOGGER.notice("=========================================")
+ date = entry_data.get('published_parsed', None)
+ if date is None:
+ date = entry_data.get('updated_parsed', None)
+ if date is None:
+ LOGGER.error("Can't parse date from:\n", entry_data)
+ return False
+ LOGGER.notice("DATE:===>", date)
+ date = datetime.datetime(*(date[:6]))
+ title = "%s: %s" % (feed.name, entry_data.get('title', 'Sin título'))
+ content = entry_data.get('content', None)
+ if content:
+ content = content[0].value
+ if not content:
+ content = entry_data.get('description', None)
+ if not content:
+ content = entry_data.get('summary', 'Sin contenido')
+ guid = str(entry_data.get('guid', entry_data.link))
+ link = entry_data.link
+ LOGGER.notice(repr([date, title]))
+ e = list(Entry.select().where(Entry.guid == guid))
+ LOGGER.notice(
+ repr(dict(
+ date=date,
+ title=title,
+ content=content,
+ guid=guid,
+ feed=feed,
+ link=link,
+ ))
+ )
+ if not e:
+ entry = Entry.create(
+ date=date,
+ title=title,
+ content=content,
+ guid=guid,
+ feed=feed,
+ link=link,
+ )
+ else:
+ entry = e[0]
+ entry.date = date
+ entry.title = title
+ entry.content = content
+ entry.link = link
+ entry.save()
+ flag = False
+ for feed in Feed.select():
+ flag = True
+ task = {
+ 'basename': self.name + "_fetch_feed",
+ 'name': str(feed.url),
+ 'actions': [(update_feed, (feed, ))],
+ 'uptodate': [timeout(datetime.timedelta(minutes=
+ self.site.config.get('PLANETOID_REFRESH', 60)))],
+ }
+ yield task
+ if not flag:
+ yield {
+ 'basename': self.name + "_fetch_feed",
+ 'name': '',
+ 'actions': [],
+ }
+
+ def task_generate_posts(self):
+ """Generate post files for the blog entries."""
+ def gen_id(entry):
+ h = hashlib.md5()
+ h.update(entry.feed.name.encode('utf8'))
+ h.update(entry.guid)
+ return h.hexdigest()
+
+ def generate_post(entry):
+ unique_id = gen_id(entry)
+ meta_path = os.path.join('posts', unique_id + '.meta')
+ post_path = os.path.join('posts', unique_id + '.txt')
+ with codecs.open(meta_path, 'wb+', 'utf8') as fd:
+ fd.write('%s\n' % entry.title.replace('\n', ' '))
+ fd.write('%s\n' % unique_id)
+ fd.write('%s\n' % entry.date.strftime('%Y/%m/%d %H:%M'))
+ fd.write('\n')
+ fd.write('%s\n' % entry.link)
+ with codecs.open(post_path, 'wb+', 'utf8') as fd:
+ fd.write('.. raw:: html\n\n')
+ content = entry.content
+ if not content:
+ content = 'Sin contenido'
+ for line in content.splitlines():
+ fd.write(' %s\n' % line)
+
+ if not os.path.isdir('posts'):
+ os.mkdir('posts')
+ flag = False
+ for entry in Entry.select().order_by(Entry.date.desc()):
+ flag = True
+ entry_id = gen_id(entry)
+ yield {
+ 'basename': self.name + "_generate_posts",
+ 'targets': [os.path.join('posts', entry_id + '.meta'), os.path.join('posts', entry_id + '.txt')],
+ 'name': entry_id,
+ 'actions': [(generate_post, (entry,))],
+ 'uptodate': [config_changed({1: entry})],
+ 'task_dep': [self.name + "_fetch_feed"],
+ }
+ if not flag:
+ yield {
+ 'basename': self.name + "_generate_posts",
+ 'name': '',
+ 'actions': [],
+ }