# -*- coding: utf-8 -*- # Copyright (c) 2012 Roberto Alsina y otros. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated # documentation files (the "Software"), to deal in the # Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the # Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice # shall be included in all copies or substantial portions of # the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS # OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from __future__ import print_function, unicode_literals import codecs import datetime import hashlib from optparse import OptionParser import os from doit.tools import timeout import feedparser import peewee from nikola.plugin_categories import Command, Task from nikola.utils import config_changed class Feed(peewee.Model): name = peewee.CharField() url = peewee.CharField(max_length=200) last_status = peewee.CharField(null=True) etag = peewee.CharField(max_length=200) last_modified = peewee.DateTimeField() class Entry(peewee.Model): date = peewee.DateTimeField() feed = peewee.ForeignKeyField(Feed) content = peewee.TextField(max_length=20000) link = peewee.CharField(max_length=200) title = peewee.CharField(max_length=200) guid = peewee.CharField(max_length=200) class Planetoid(Command, Task): """Maintain a planet-like thing.""" name = "planetoid" def init_db(self): # setup database Feed.create_table(fail_silently=True) Entry.create_table(fail_silently=True) def gen_tasks(self): self.init_db() for task in self.task_load_feeds(): yield task for task in self.task_update_feeds(): yield task for task in self.task_generate_posts(): yield task def run(self, *args): self.init_db() parser = OptionParser(usage="nikola %s [options]" % self.name) (options, args) = parser.parse_args(list(args)) def task_load_feeds(self): "Read the feeds file, add it to the database." feeds = [] feed = name = None for line in codecs.open('feeds', 'r', 'utf-8'): line = line.strip() if line.startswith("#"): continue elif line.startswith('http'): feed = line elif line: name = line if feed and name: feeds.append([feed, name]) feed = name = None def add_feed(name, url): f = Feed.create( name=name, url=url, etag='foo', last_modified=datetime.datetime(1970, 1, 1), ) f.save() def update_feed_url(feed, url): feed.url = url feed.save() for feed, name in feeds: f = Feed.select().where(Feed.name == name) if not list(f): yield { 'basename': self.name, 'name': name.encode('utf8'), 'actions': ((add_feed, (name, feed)), ), 'file_dep': ['feeds'], } elif list(f)[0].url != feed: yield { 'basename': self.name, 'name': ('updating_' + name).encode('utf8'), 'actions': ((update_feed_url, (list(f)[0], feed)), ), } def task_update_feeds(self): """Download feed contents, add entries to the database.""" def update_feed(feed): modified = feed.last_modified.timetuple() etag = feed.etag try: parsed = feedparser.parse( feed.url, etag=etag, modified=modified ) feed.last_status = str(parsed.status) except: # Probably a timeout # TODO: log failure return if parsed.feed.get('title'): print(parsed.feed.title) else: print(feed.url) feed.etag = parsed.get('etag', 'foo') modified = tuple(parsed.get('date_parsed', (1970, 1, 1)))[:6] print("==========>", modified) modified = datetime.datetime(*modified) feed.last_modified = modified feed.save() # No point in adding items from missinfg feeds if parsed.status > 400: # TODO log failure return for entry_data in parsed.entries: print("=========================================") date = entry_data.get('published_parsed', None) if date is None: date = entry_data.get('updated_parsed', None) if date is None: print("Can't parse date from:") print(entry_data) return False print("DATE:===>", date) date = datetime.datetime(*(date[:6])) title = "%s: %s" % (feed.name, entry_data.get('title', 'Sin título')) content = entry_data.get('content', None) if content: content = content[0].value if not content: content = entry_data.get('description', None) if not content: content = entry_data.get('summary', 'Sin contenido') guid = str(entry_data.get('guid', entry_data.link)) link = entry_data.link print(repr([date, title])) e = list(Entry.select().where(Entry.guid == guid)) print( repr(dict( date=date, title=title, content=content, guid=guid, feed=feed, link=link, )) ) if not e: entry = Entry.create( date=date, title=title, content=content, guid=guid, feed=feed, link=link, ) else: entry = e[0] entry.date = date entry.title = title entry.content = content entry.link = link entry.save() for feed in Feed.select(): task = { 'basename': self.name, 'name': str(feed.url), 'actions': [(update_feed, (feed, ))], 'uptodate': [timeout(datetime.timedelta(minutes= self.site.config.get('PLANETOID_REFRESH', 60)))], } yield task def task_generate_posts(self): """Generate post files for the blog entries.""" def gen_id(entry): h = hashlib.md5() h.update(entry.feed.name.encode('utf8')) h.update(entry.guid) return h.hexdigest() def generate_post(entry): unique_id = gen_id(entry) meta_path = os.path.join('posts', unique_id + '.meta') post_path = os.path.join('posts', unique_id + '.txt') with codecs.open(meta_path, 'wb+', 'utf8') as fd: fd.write('%s\n' % entry.title.replace('\n', ' ')) fd.write('%s\n' % unique_id) fd.write('%s\n' % entry.date.strftime('%Y/%m/%d %H:%M')) fd.write('\n') fd.write('%s\n' % entry.link) with codecs.open(post_path, 'wb+', 'utf8') as fd: fd.write('.. raw:: html\n\n') content = entry.content if not content: content = 'Sin contenido' for line in content.splitlines(): fd.write(' %s\n' % line) for entry in Entry.select().order_by(Entry.date.desc()): entry_id = gen_id(entry) yield { 'basename': self.name, 'targets': [os.path.join('posts', entry_id + '.meta'), os.path.join('posts', entry_id + '.txt')], 'name': entry_id, 'actions': [(generate_post, (entry,))], 'uptodate': [config_changed({1: entry})] }