From 9c5708cc92af894e414bc76ee35ec2230de5d288 Mon Sep 17 00:00:00 2001 From: Agustin Henze Date: Wed, 2 Jan 2013 08:35:03 -0300 Subject: Imported Upstream version 5.1 --- tests/context.py | 9 + tests/rss-2_0.xsd | 500 +++++++++++++++++++++++++++++++++ tests/test_command_import_wordpress.py | 92 ++++++ tests/test_rss_feeds.py | 104 +++++++ tests/wordpress_export_example.xml | 174 ++++++++++++ tests/wordpress_unicode_export.xml | 114 ++++++++ 6 files changed, 993 insertions(+) create mode 100644 tests/context.py create mode 100644 tests/rss-2_0.xsd create mode 100644 tests/test_command_import_wordpress.py create mode 100644 tests/test_rss_feeds.py create mode 100644 tests/wordpress_export_example.xml create mode 100644 tests/wordpress_unicode_export.xml (limited to 'tests') diff --git a/tests/context.py b/tests/context.py new file mode 100644 index 0000000..f292b79 --- /dev/null +++ b/tests/context.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +# Path hack as shown by Kenneth Reitz at http://kennethreitz.com/repository-structure-and-python.html + +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +import nikola diff --git a/tests/rss-2_0.xsd b/tests/rss-2_0.xsd new file mode 100644 index 0000000..d7ddaee --- /dev/null +++ b/tests/rss-2_0.xsd @@ -0,0 +1,500 @@ + + + + + XML Schema for RSS v2.0 feed files. + Project home: http://www.codeplex.com/rss2schema/ + Based on the RSS 2.0 specification document at http://cyber.law.harvard.edu/rss/rss.html + Author: Jorgen Thelin + Revision: 16 + Date: 01-Nov-2008 + Feedback to: http://www.codeplex.com/rss2schema/WorkItem/List.aspx + + + + + + + + + + + + + + An item may represent a "story" -- much like a story in a newspaper or magazine; if so its description is a synopsis of the story, and the link points to the full story. An item may also be complete in itself, if so, the description contains the text (entity-encoded HTML is allowed), and the link and title may be omitted. + + + + + + The title of the item. + + + + + The item synopsis. + + + + + The URL of the item. + + + + + Email address of the author of the item. + + + + + Includes the item in one or more categories. + + + + + URL of a page for comments relating to the item. + + + + + Describes a media object that is attached to the item. + + + + + guid or permalink URL for this entry + + + + + Indicates when the item was published. + + + + + The RSS channel that the item came from. + + + + + Extensibility element. + + + + + + + + + + + + The name of the channel. It's how people refer to your service. If you have an HTML website that contains the same information as your RSS file, the title of your channel should be the same as the title of your website. + + + + + The URL to the HTML website corresponding to the channel. + + + + + Phrase or sentence describing the channel. + + + + + The language the channel is written in. This allows aggregators to group all Italian language sites, for example, on a single page. A list of allowable values for this element, as provided by Netscape, is here. You may also use values defined by the W3C. + + + + + Copyright notice for content in the channel. + + + + + Email address for person responsible for editorial content. + + + + + Email address for person responsible for technical issues relating to channel. + + + + + The publication date for the content in the channel. All date-times in RSS conform to the Date and Time Specification of RFC 822, with the exception that the year may be expressed with two characters or four characters (four preferred). + + + + + The last time the content of the channel changed. + + + + + Specify one or more categories that the channel belongs to. + + + + + A string indicating the program used to generate the channel. + + + + + A URL that points to the documentation for the format used in the RSS file. It's probably a pointer to this page. It's for people who might stumble across an RSS file on a Web server 25 years from now and wonder what it is. + + + + + Allows processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds. + + + + + ttl stands for time to live. It's a number of minutes that indicates how long a channel can be cached before refreshing from the source. + + + + + Specifies a GIF, JPEG or PNG image that can be displayed with the channel. + + + + + The PICS rating for the channel. + + + + + Specifies a text input box that can be displayed with the channel. + + + + + A hint for aggregators telling them which hours they can skip. + + + + + A hint for aggregators telling them which days they can skip. + + + + + Extensibility element. + + + + + + + + + Extensibility element. + + + + + + + + A time in GMT when aggregators should not request the channel data. The hour beginning at midnight is hour zero. + + + + + + + + + + + + + + A day when aggregators should not request the channel data. + + + + + + + + + + + + + + + + A time in GMT, when aggregators should not request the channel data. The hour beginning at midnight is hour zero. + + + + + + + + + + + + + + + + The URL of the image file. + + + + + Describes the image, it's used in the ALT attribute of the HTML <img> tag when the channel is rendered in HTML. + + + + + The URL of the site, when the channel is rendered, the image is a link to the site. (Note, in practice the image <title> and <link> should have the same value as the channel's <title> and <link>. + + + + + The width of the image in pixels. + + + + + The height of the image in pixels. + + + + + Text that is included in the TITLE attribute of the link formed around the image in the HTML rendering. + + + + + + + The height of the image in pixels. + + + + + + + + The width of the image in pixels. + + + + + + + + Specifies a web service that supports the rssCloud interface which can be implemented in HTTP-POST, XML-RPC or SOAP 1.1. Its purpose is to allow processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds. + + + + + + + + + + + + + + + + + The purpose of this element is something of a mystery! You can use it to specify a search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it. + + + + + The label of the Submit button in the text input area. + + + + + Explains the text input area. + + + + + The name of the text object in the text input area. + + + + + The URL of the CGI script that processes text input requests. + + + + + + + Using the regexp definiton of E-Mail Address by Lucadean from the .NET RegExp Pattern Repository at http://www.3leaf.com/default/NetRegExpRepository.aspx + + + + + + + + A date-time displayed in RFC-822 format. + Using the regexp definiton of rfc-822 date by Sam Ruby at http://www.intertwingly.net/blog/1360.html + + + + + + + + + + + + + + + + + + URL where the enclosure is located + + + + + Size in bytes + + + + + MIME media-type of the enclosure + + + + + + + + + + + + + + + + + + diff --git a/tests/test_command_import_wordpress.py b/tests/test_command_import_wordpress.py new file mode 100644 index 0000000..4a30dba --- /dev/null +++ b/tests/test_command_import_wordpress.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from context import nikola +import os +import unittest +import mock + + +class CommandImportWordpressTest(unittest.TestCase): + def setUp(self): + self.import_command = nikola.plugins.command_import_wordpress.CommandImportWordpress() + self.import_filename = os.path.abspath( + os.path.join(os.path.dirname(__file__), + 'wordpress_export_example.xml')) + + def tearDown(self): + del self.import_command + del self.import_filename + + def test_create_import_work_without_argument(self): + # Running this without an argument must not fail. + # It should show the proper usage of the command. + self.import_command.run() + + def test_create_import(self): + data_import = mock.MagicMock() + site_generation = mock.MagicMock() + write_urlmap = mock.MagicMock() + write_configuration = mock.MagicMock() + + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.generate_base_site', site_generation): + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.import_posts', data_import): + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.write_urlmap_csv', write_urlmap): + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.write_configuration', write_configuration): + self.import_command.run(self.import_filename) + + self.assertTrue(site_generation.called) + self.assertTrue(data_import.called) + + def test_populate_context(self): + channel = self.import_command.get_channel_from_file( + self.import_filename) + context = self.import_command.populate_context(channel) + + for required_key in ('POST_PAGES', 'POST_COMPILERS'): + self.assertTrue(required_key in context) + + self.assertEqual('de', context['DEFAULT_LANG']) + self.assertEqual('Wordpress blog title', context['BLOG_TITLE']) + self.assertEqual('Nikola test blog ;) - with moré Ümläüts', context['BLOG_DESCRIPTION']) + self.assertEqual('http://some.blog', context['BLOG_URL']) + self.assertEqual('mail@some.blog', context['BLOG_EMAIL']) + self.assertEqual('Niko', context['BLOG_AUTHOR']) + + def test_importing_posts_and_attachments(self): + channel = self.import_command.get_channel_from_file( + self.import_filename) + self.import_command.context = self.import_command.populate_context( + channel) + self.import_command.url_map = {} # For testing we use an empty one. + + write_metadata = mock.MagicMock() + write_content = mock.MagicMock() + download_mock = mock.MagicMock() + + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.write_content', write_content): + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.write_metadata', write_metadata): + with mock.patch('nikola.plugins.command_import_wordpress.CommandImportWordpress.download_url_content_to_file', download_mock): + with mock.patch('nikola.plugins.command_import_wordpress.os.makedirs'): + self.import_command.import_posts(channel) + + self.assertTrue(download_mock.called) + download_mock.assert_any_call(u'http://some.blog/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png', u'new_site/files/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png') + + self.assertTrue(write_metadata.called) + write_metadata.assert_any_call(u'new_site/stories/kontakt.meta', 'Kontakt', u'kontakt', '2009-07-16 20:20:32', None, []) + + self.assertTrue(write_content.called) + write_content.assert_any_call(u'new_site/posts/200704hoert.wp', '...!\n\n\n\n[caption id="attachment_16" align="alignnone" width="739" caption="caption test"]caption test[/caption]\n\n\n\nNicht, dass daran jemals Zweifel bestanden.') + write_content.assert_any_call(u'new_site/posts/200807arzt-und-pfusch-s-i-c-k.wp', u'Arzt+Pfusch - S.I.C.K.Arzt+Pfusch - S.I.C.K.Gerade bin ich \xfcber das Album S.I.C.K von Arzt+Pfusch gestolpert, welches Arzt+Pfusch zum Download f\xfcr lau anbieten. Das Album steht unter einer Creative Commons BY-NC-ND-Lizenz.\n\nDie Ladung noisebmstupidevildustrial gibts als MP3s mit 64kbps und VBR, als Ogg Vorbis und als FLAC (letztere hier). Artwork und Lyrics gibts nochmal einzeln zum Download.') + write_content.assert_any_call(u'new_site/stories/kontakt.wp', u'

Datenschutz

\n\nIch erhebe und speichere automatisch in meine Server Log Files Informationen, die dein Browser an mich \xfcbermittelt. Dies sind:\n\n\n\nDiese Daten sind f\xfcr mich nicht bestimmten Personen zuordenbar. Eine Zusammenf\xfchrung dieser Daten mit anderen Datenquellen wird nicht vorgenommen, die Daten werden einzig zu statistischen Zwecken erhoben.') + + self.assertTrue(len(self.import_command.url_map) > 0) + + self.assertEqual(self.import_command.url_map['http://some.blog/2007/04/hoert/'], u'http://some.blog/posts/200704hoert.html') + self.assertEqual(self.import_command.url_map['http://some.blog/2008/07/arzt-und-pfusch-s-i-c-k/'], u'http://some.blog/posts/200807arzt-und-pfusch-s-i-c-k.html') + self.assertEqual(self.import_command.url_map['http://some.blog/kontakt/'], u'http://some.blog/stories/kontakt.html') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_rss_feeds.py b/tests/test_rss_feeds.py new file mode 100644 index 0000000..2b48f36 --- /dev/null +++ b/tests/test_rss_feeds.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- + +import unittest +import os +import re +from StringIO import StringIO + +import mock + +from context import nikola +from lxml import etree + + +class RSSFeedTest(unittest.TestCase): + def setUp(self): + self.blog_url = "http://some.blog" + + with mock.patch('nikola.nikola.utils.get_meta', + mock.Mock(return_value=('post title', + 'awesome_article', + '2012-10-01 22:41', 'tags', + 'link', 'description'))): + with mock.patch('nikola.nikola.utils.os.path.isdir', + mock.Mock(return_value=True)): + with mock.patch('nikola.nikola.Post.text', + mock.Mock(return_value='some long text')): + + example_post = nikola.nikola.Post('source.file', + 'cache', + 'blog_folder', + True, + {'en': ''}, + 'en', + self.blog_url, + 'unused message.') + + opener_mock = mock.mock_open() + + with mock.patch('nikola.nikola.utils.open', opener_mock, create=True): + nikola.nikola.utils.generic_rss_renderer('en', + "blog_title", + self.blog_url, + "blog_description", + [example_post, + ], + 'testfeed.rss') + + self.file_content = ''.join( + [call[1][0] for call in opener_mock.mock_calls[2:-1]]) + + def tearDown(self): + pass + + def test_feed_items_have_valid_URLs(self): + '''The items in the feed need to have valid urls in link and guid.''' + # This validation regex is taken from django.core.validators + url_validation_regex = re.compile(r'^(?:http|ftp)s?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... + r'localhost|' # localhost... + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4 + r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6 + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + + def is_valid_URL(url): + return url_validation_regex.match(url) is not None + + et = etree.parse(StringIO(self.file_content)) + channel = et.find('channel') + item = channel.find('item') + guid = item.find('guid') + link = item.find('link') + + # As stated by W3 FEED Validator: "link must be a full and valid URL" + self.assertTrue(is_valid_URL(link.text), + 'The following URL is not valid: %s' % link.text) + self.assertTrue(self.blog_url in link.text) + + # "guid must be a full URL, unless isPermaLink attribute + # is false: /weblog/posts/the-minimal-server.html " + self.assertTrue(is_valid_URL(guid.text), + 'The following URL is not valid: %s' % + guid.text) + self.assertTrue(self.blog_url in guid.text) + + def test_feed_is_valid(self): + ''' + A testcase to check if the generated feed is valid. + + Validation can be tested with W3 FEED Validator that can be found + at http://feedvalidator.org + ''' + rss_schema_filename = os.path.join(os.path.dirname(__file__), + 'rss-2_0.xsd') + with open(rss_schema_filename, 'r') as rss_schema_file: + xmlschema_doc = etree.parse(rss_schema_file) + + xmlschema = etree.XMLSchema(xmlschema_doc) + document = etree.parse(StringIO(self.file_content)) + + self.assertTrue(xmlschema.validate(document)) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/wordpress_export_example.xml b/tests/wordpress_export_example.xml new file mode 100644 index 0000000..7517193 --- /dev/null +++ b/tests/wordpress_export_example.xml @@ -0,0 +1,174 @@ + + + + + + Wordpress blog title + http://some.blog + Nikola test blog ;) - with moré Ümläüts + Wed, 25 Jul 2012 22:31:24 +0000 + de-DE + 1.2 + http://some.blog + http://some.blog + + 2Nikomail@some.blog + + 11programmierung + 501dotnet + + http://wordpress.org/?v=3.4.1 + + + Arzt+Pfusch - S.I.C.K. + http://some.blog/2008/07/arzt-und-pfusch-s-i-c-k/arzt_und_pfusch-sick-cover/ + Thu, 16 Jul 2009 19:40:37 +0000 + Niko + http://some.blog/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png + + + + 10 + 2009-07-16 21:40:37 + 2009-07-16 19:40:37 + open + open + arzt_und_pfusch-sick-cover + inherit + 6 + 0 + attachment + + 0 + http://some.blog/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png + + _wp_attached_file + + + + _wp_attachment_metadata + + + + + + Caption test + http://some.blog/2007/04/hoert/ + Fri, 27 Apr 2007 13:02:35 +0000 + Niko + http://some.blog/?p=17 + + [/caption] + +Nicht, dass daran jemals Zweifel bestanden.]]> + + 17 + 2007-04-27 15:02:35 + 2007-04-27 13:02:35 + open + open + hoert + publish + 0 + 0 + post + + 0 + + + + + + _edit_last + + + + + + Arzt+Pfusch - S.I.C.K. + http://some.blog/2008/07/arzt-und-pfusch-s-i-c-k/ + Sat, 12 Jul 2008 19:22:06 +0000 + Niko + http://some.blog/?p=6 + + Arzt+Pfusch - S.I.C.K.Gerade bin ich über das Album S.I.C.K von Arzt+Pfusch gestolpert, welches Arzt+Pfusch zum Download für lau anbieten. Das Album steht unter einer Creative Commons BY-NC-ND-Lizenz. +Die Ladung noisebmstupidevildustrial gibts als MP3s mit 64kbps und VBR, als Ogg Vorbis und als FLAC (letztere hier). Artwork und Lyrics gibts nochmal einzeln zum Download.]]> + + 6 + 2008-07-12 21:22:06 + 2008-07-12 19:22:06 + open + open + arzt-und-pfusch-s-i-c-k + publish + 0 + 0 + post + + 0 + + + + + + + + + + + + _edit_last + + + + + + Kontakt + http://some.blog/kontakt/ + Thu, 16 Jul 2009 18:20:32 +0000 + Niko + http://some.blog/?page_id=3 + + Datenschutz +Ich erhebe und speichere automatisch in meine Server Log Files Informationen, die dein Browser an mich übermittelt. Dies sind: +
    +
  • Browsertyp und -version
  • +
  • verwendetes Betriebssystem
  • +
  • Referrer URL (die zuvor besuchte Seite)
  • +
  • IP Adresse des zugreifenden Rechners
  • +
  • Uhrzeit der Serveranfrage.
  • +
+Diese Daten sind für mich nicht bestimmten Personen zuordenbar. Eine Zusammenführung dieser Daten mit anderen Datenquellen wird nicht vorgenommen, die Daten werden einzig zu statistischen Zwecken erhoben.]]>
+ + 3 + 2009-07-16 20:20:32 + 2009-07-16 18:20:32 + closed + closed + kontakt + publish + 0 + 0 + page + + 0 + + _edit_last + + + + _wp_page_template + + +
+ +
+
diff --git a/tests/wordpress_unicode_export.xml b/tests/wordpress_unicode_export.xml new file mode 100644 index 0000000..b2204fc --- /dev/null +++ b/tests/wordpress_unicode_export.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + Nikola Unicode Test ͵pó®t + http://nikolaunicode.wordpress.com + The greatest WordPress.com site in all the land! + Tue, 25 Dec 2012 21:39:30 +0000 + en + 1.2 + http://wordpress.com/ + http://nikolaunicode.wordpress.com + + 3804924ralsinaroberto.alsina@gmail.com + + 1uncategorized + 132937998thag1 + 132937999thag%c2%b2 + + http://wordpress.com/ + + + https://s2.wp.com/i/buttonw-com.png + Nikola Unicode Test ͵pó®t + http://nikolaunicode.wordpress.com + + + -- cgit v1.2.3