diff options
Diffstat (limited to 'nikola/PyRSS2Gen.py')
| -rw-r--r-- | nikola/PyRSS2Gen.py | 444 |
1 files changed, 444 insertions, 0 deletions
diff --git a/nikola/PyRSS2Gen.py b/nikola/PyRSS2Gen.py new file mode 100644 index 0000000..6c4bda3 --- /dev/null +++ b/nikola/PyRSS2Gen.py @@ -0,0 +1,444 @@ +"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds.""" + +__name__ = "PyRSS2Gen" +__version__ = (1, 0, 0) +__author__ = "Andrew Dalke <dalke@dalkescientific.com>" + +_generator_name = __name__ + "-" + ".".join(map(str, __version__)) + +import datetime +try: + import cStringIO + StringIO = cStringIO +except ImportError: + import StringIO + +# Could make this the base class; will need to add 'publish' +class WriteXmlMixin: + def write_xml(self, outfile, encoding = "iso-8859-1"): + from xml.sax import saxutils + handler = saxutils.XMLGenerator(outfile, encoding) + handler.startDocument() + self.publish(handler) + handler.endDocument() + + def to_xml(self, encoding = "iso-8859-1"): + f = StringIO.StringIO() + self.write_xml(f, encoding) + return f.getvalue() + + +def _element(handler, name, obj, d = {}): + if isinstance(obj, basestring) or obj is None: + # special-case handling to make the API easier + # to use for the common case. + handler.startElement(name, d) + if obj is not None: + handler.characters(obj) + handler.endElement(name) + else: + # It better know how to emit the correct XML. + obj.publish(handler) + +def _opt_element(handler, name, obj): + if obj is None: + return + _element(handler, name, obj) + + +def _format_date(dt): + """convert a datetime into an RFC 822 formatted date + + Input date must be in GMT. + """ + # Looks like: + # Sat, 07 Sep 2002 00:00:01 GMT + # Can't use strftime because that's locale dependent + # + # Isn't there a standard way to do this for Python? The + # rfc822 and email.Utils modules assume a timestamp. The + # following is based on the rfc822 module. + return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( + ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], + dt.day, + ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], + dt.year, dt.hour, dt.minute, dt.second) + + +## +# A couple simple wrapper objects for the fields which +# take a simple value other than a string. +class IntElement: + """implements the 'publish' API for integers + + Takes the tag name and the integer value to publish. + + (Could be used for anything which uses str() to be published + to text for XML.) + """ + element_attrs = {} + def __init__(self, name, val): + self.name = name + self.val = val + def publish(self, handler): + handler.startElement(self.name, self.element_attrs) + handler.characters(str(self.val)) + handler.endElement(self.name) + +class DateElement: + """implements the 'publish' API for a datetime.datetime + + Takes the tag name and the datetime to publish. + + Converts the datetime to RFC 2822 timestamp (4-digit year). + """ + def __init__(self, name, dt): + self.name = name + self.dt = dt + def publish(self, handler): + _element(handler, self.name, _format_date(self.dt)) +#### + +class Category: + """Publish a category element""" + def __init__(self, category, domain = None): + self.category = category + self.domain = domain + def publish(self, handler): + d = {} + if self.domain is not None: + d["domain"] = self.domain + _element(handler, "category", self.category, d) + +class Cloud: + """Publish a cloud""" + def __init__(self, domain, port, path, + registerProcedure, protocol): + self.domain = domain + self.port = port + self.path = path + self.registerProcedure = registerProcedure + self.protocol = protocol + def publish(self, handler): + _element(handler, "cloud", None, { + "domain": self.domain, + "port": str(self.port), + "path": self.path, + "registerProcedure": self.registerProcedure, + "protocol": self.protocol}) + +class Image: + """Publish a channel Image""" + element_attrs = {} + def __init__(self, url, title, link, + width = None, height = None, description = None): + self.url = url + self.title = title + self.link = link + self.width = width + self.height = height + self.description = description + + def publish(self, handler): + handler.startElement("image", self.element_attrs) + + _element(handler, "url", self.url) + _element(handler, "title", self.title) + _element(handler, "link", self.link) + + width = self.width + if isinstance(width, int): + width = IntElement("width", width) + _opt_element(handler, "width", width) + + height = self.height + if isinstance(height, int): + height = IntElement("height", height) + _opt_element(handler, "height", height) + + _opt_element(handler, "description", self.description) + + handler.endElement("image") + +class Guid: + """Publish a guid + + Defaults to being a permalink, which is the assumption if it's + omitted. Hence strings are always permalinks. + """ + def __init__(self, guid, isPermaLink = 1): + self.guid = guid + self.isPermaLink = isPermaLink + def publish(self, handler): + d = {} + if self.isPermaLink: + d["isPermaLink"] = "true" + else: + d["isPermaLink"] = "false" + _element(handler, "guid", self.guid, d) + +class TextInput: + """Publish a textInput + + Apparently this is rarely used. + """ + element_attrs = {} + def __init__(self, title, description, name, link): + self.title = title + self.description = description + self.name = name + self.link = link + + def publish(self, handler): + handler.startElement("textInput", self.element_attrs) + _element(handler, "title", self.title) + _element(handler, "description", self.description) + _element(handler, "name", self.name) + _element(handler, "link", self.link) + handler.endElement("textInput") + + +class Enclosure: + """Publish an enclosure""" + def __init__(self, url, length, type): + self.url = url + self.length = length + self.type = type + def publish(self, handler): + _element(handler, "enclosure", None, + {"url": self.url, + "length": str(self.length), + "type": self.type, + }) + +class Source: + """Publish the item's original source, used by aggregators""" + def __init__(self, name, url): + self.name = name + self.url = url + def publish(self, handler): + _element(handler, "source", self.name, {"url": self.url}) + +class SkipHours: + """Publish the skipHours + + This takes a list of hours, as integers. + """ + element_attrs = {} + def __init__(self, hours): + self.hours = hours + def publish(self, handler): + if self.hours: + handler.startElement("skipHours", self.element_attrs) + for hour in self.hours: + _element(handler, "hour", str(hour)) + handler.endElement("skipHours") + +class SkipDays: + """Publish the skipDays + + This takes a list of days as strings. + """ + element_attrs = {} + def __init__(self, days): + self.days = days + def publish(self, handler): + if self.days: + handler.startElement("skipDays", self.element_attrs) + for day in self.days: + _element(handler, "day", day) + handler.endElement("skipDays") + +class RSS2(WriteXmlMixin): + """The main RSS class. + + Stores the channel attributes, with the "category" elements under + ".categories" and the RSS items under ".items". + """ + + rss_attrs = {"version": "2.0"} + element_attrs = {} + def __init__(self, + title, + link, + description, + + language = None, + copyright = None, + managingEditor = None, + webMaster = None, + pubDate = None, # a datetime, *in* *GMT* + lastBuildDate = None, # a datetime + + categories = None, # list of strings or Category + generator = _generator_name, + docs = "http://blogs.law.harvard.edu/tech/rss", + cloud = None, # a Cloud + ttl = None, # integer number of minutes + + image = None, # an Image + rating = None, # a string; I don't know how it's used + textInput = None, # a TextInput + skipHours = None, # a SkipHours with a list of integers + skipDays = None, # a SkipDays with a list of strings + + items = None, # list of RSSItems + ): + self.title = title + self.link = link + self.description = description + self.language = language + self.copyright = copyright + self.managingEditor = managingEditor + + self.webMaster = webMaster + self.pubDate = pubDate + self.lastBuildDate = lastBuildDate + + if categories is None: + categories = [] + self.categories = categories + self.generator = generator + self.docs = docs + self.cloud = cloud + self.ttl = ttl + self.image = image + self.rating = rating + self.textInput = textInput + self.skipHours = skipHours + self.skipDays = skipDays + + if items is None: + items = [] + self.items = items + + def publish(self, handler): + handler.startElement("rss", self.rss_attrs) + handler.startElement("channel", self.element_attrs) + _element(handler, "title", self.title) + _element(handler, "link", self.link) + _element(handler, "description", self.description) + + self.publish_extensions(handler) + + _opt_element(handler, "language", self.language) + _opt_element(handler, "copyright", self.copyright) + _opt_element(handler, "managingEditor", self.managingEditor) + _opt_element(handler, "webMaster", self.webMaster) + + pubDate = self.pubDate + if isinstance(pubDate, datetime.datetime): + pubDate = DateElement("pubDate", pubDate) + _opt_element(handler, "pubDate", pubDate) + + lastBuildDate = self.lastBuildDate + if isinstance(lastBuildDate, datetime.datetime): + lastBuildDate = DateElement("lastBuildDate", lastBuildDate) + _opt_element(handler, "lastBuildDate", lastBuildDate) + + for category in self.categories: + if isinstance(category, basestring): + category = Category(category) + category.publish(handler) + + _opt_element(handler, "generator", self.generator) + _opt_element(handler, "docs", self.docs) + + if self.cloud is not None: + self.cloud.publish(handler) + + ttl = self.ttl + if isinstance(self.ttl, int): + ttl = IntElement("ttl", ttl) + _opt_element(handler, "tt", ttl) + + if self.image is not None: + self.image.publish(handler) + + _opt_element(handler, "rating", self.rating) + if self.textInput is not None: + self.textInput.publish(handler) + if self.skipHours is not None: + self.skipHours.publish(handler) + if self.skipDays is not None: + self.skipDays.publish(handler) + + for item in self.items: + item.publish(handler) + + handler.endElement("channel") + handler.endElement("rss") + + def publish_extensions(self, handler): + # Derived classes can hook into this to insert + # output after the three required fields. + pass + + + +class RSSItem(WriteXmlMixin): + """Publish an RSS Item""" + element_attrs = {} + def __init__(self, + title = None, # string + link = None, # url as string + description = None, # string + author = None, # email address as string + categories = None, # list of string or Category + comments = None, # url as string + enclosure = None, # an Enclosure + guid = None, # a unique string + pubDate = None, # a datetime + source = None, # a Source + ): + + if title is None and description is None: + raise TypeError( + "must define at least one of 'title' or 'description'") + self.title = title + self.link = link + self.description = description + self.author = author + if categories is None: + categories = [] + self.categories = categories + self.comments = comments + self.enclosure = enclosure + self.guid = guid + self.pubDate = pubDate + self.source = source + # It sure does get tedious typing these names three times... + + def publish(self, handler): + handler.startElement("item", self.element_attrs) + _opt_element(handler, "title", self.title) + _opt_element(handler, "link", self.link) + self.publish_extensions(handler) + _opt_element(handler, "description", self.description) + _opt_element(handler, "author", self.author) + + for category in self.categories: + if isinstance(category, basestring): + category = Category(category) + category.publish(handler) + + _opt_element(handler, "comments", self.comments) + if self.enclosure is not None: + self.enclosure.publish(handler) + _opt_element(handler, "guid", self.guid) + + pubDate = self.pubDate + if isinstance(pubDate, datetime.datetime): + pubDate = DateElement("pubDate", pubDate) + _opt_element(handler, "pubDate", pubDate) + + if self.source is not None: + self.source.publish(handler) + + handler.endElement("item") + + def publish_extensions(self, handler): + # Derived classes can hook into this to insert + # output after the title and link elements + pass |
