aboutsummaryrefslogtreecommitdiffstats
path: root/tests/data/wordpress_import
diff options
context:
space:
mode:
Diffstat (limited to 'tests/data/wordpress_import')
-rw-r--r--tests/data/wordpress_import/wordpress_export_example.xml322
-rw-r--r--tests/data/wordpress_import/wordpress_qtranslate_item_modernized.xml30
-rw-r--r--tests/data/wordpress_import/wordpress_qtranslate_item_raw_export.xml30
-rw-r--r--tests/data/wordpress_import/wordpress_unicode_export.xml114
4 files changed, 496 insertions, 0 deletions
diff --git a/tests/data/wordpress_import/wordpress_export_example.xml b/tests/data/wordpress_import/wordpress_export_example.xml
new file mode 100644
index 0000000..e2401f7
--- /dev/null
+++ b/tests/data/wordpress_import/wordpress_export_example.xml
@@ -0,0 +1,322 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- This is a WordPress eXtended RSS file generated by WordPress as an export of your site. -->
+<rss version="2.0"
+ xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
+ xmlns:content="http://purl.org/rss/1.0/modules/content/"
+ xmlns:wfw="http://wellformedweb.org/CommentAPI/"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:wp="http://wordpress.org/export/1.2/"
+>
+
+<channel>
+ <title>Wordpress blog title</title>
+ <link>http://some.blog</link>
+ <description>Nikola test blog ;) - with moré Ümläüts</description>
+ <pubDate>Wed, 25 Jul 2012 22:31:24 +0000</pubDate>
+ <language>de-DE</language>
+ <wp:wxr_version>1.2</wp:wxr_version>
+ <wp:base_site_url>http://some.blog</wp:base_site_url>
+ <wp:base_blog_url>http://some.blog</wp:base_blog_url>
+
+ <wp:author><wp:author_id>2</wp:author_id><wp:author_login>Niko</wp:author_login><wp:author_email>mail@some.blog</wp:author_email><wp:author_display_name><![CDATA[Niko]]></wp:author_display_name><wp:author_first_name><![CDATA[Niko]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>
+
+ <wp:category><wp:term_id>11</wp:term_id><wp:category_nicename>programmierung</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[c0ding]]></wp:cat_name></wp:category>
+ <wp:tag><wp:term_id>501</wp:term_id><wp:tag_slug>dotnet</wp:tag_slug><wp:tag_name><![CDATA[.Net]]></wp:tag_name></wp:tag>
+
+ <generator>http://wordpress.org/?v=3.4.1</generator>
+
+ <item>
+ <title>Arzt+Pfusch - S.I.C.K.</title>
+ <link>http://some.blog/2008/07/arzt-und-pfusch-s-i-c-k/arzt_und_pfusch-sick-cover/</link>
+ <pubDate>Thu, 16 Jul 2009 19:40:37 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png</guid>
+ <description></description>
+ <content:encoded><![CDATA[Das Cover von Arzt+Pfusch - S.I.C.K.]]></content:encoded>
+ <excerpt:encoded><![CDATA[Arzt+Pfusch - S.I.C.K.]]></excerpt:encoded>
+ <wp:post_id>10</wp:post_id>
+ <wp:post_date>2009-07-16 21:40:37</wp:post_date>
+ <wp:post_date_gmt>2009-07-16 19:40:37</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>arzt_und_pfusch-sick-cover</wp:post_name>
+ <wp:status>inherit</wp:status>
+ <wp:post_parent>6</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>attachment</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <wp:attachment_url>http://some.blog/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png</wp:attachment_url>
+ <wp:postmeta>
+ <wp:meta_key>_wp_attached_file</wp:meta_key>
+ <wp:meta_value><![CDATA[2008/07/arzt_und_pfusch-sick-cover.png]]></wp:meta_value>
+ </wp:postmeta>
+ <wp:postmeta>
+ <wp:meta_key>_wp_attachment_metadata</wp:meta_key>
+ <wp:meta_value><![CDATA[a:6:{s:5:"width";s:3:"300";s:6:"height";s:3:"299";s:14:"hwstring_small";s:22:"height='96' width='96'";s:4:"file";s:38:"2008/07/arzt_und_pfusch-sick-cover.png";s:5:"sizes";a:1:{s:9:"thumbnail";a:3:{s:4:"file";s:38:"arzt_und_pfusch-sick-cover-150x150.png";s:5:"width";s:3:"150";s:6:"height";s:3:"150";}}s:10:"image_meta";a:10:{s:8:"aperture";s:1:"0";s:6:"credit";s:0:"";s:6:"camera";s:0:"";s:7:"caption";s:0:"";s:17:"created_timestamp";s:1:"0";s:9:"copyright";s:0:"";s:12:"focal_length";s:1:"0";s:3:"iso";s:1:"0";s:13:"shutter_speed";s:1:"0";s:5:"title";s:0:"";}}]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+
+ <item>
+ <title>Transformation test</title>
+ <link>http://some.blog/2007/04/hoert/</link>
+ <pubDate>Fri, 27 Apr 2007 13:02:35 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/?p=17</guid>
+ <description></description>
+ <content:encoded><![CDATA[An image.
+
+[caption id="attachment_16" align="alignnone" width="739" caption="caption test"]<img class="size-full wp-image-16" title="caption test" src="http://some.blog/wp-content/uploads/2009/07/caption_test.jpg" alt="caption test" width="739" height="517" />[/caption]
+
+Some source code.
+
+[sourcecode language="Python"]
+import sys
+print sys.version
+[/sourcecode]
+
+The end.
+]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>17</wp:post_id>
+ <wp:post_date>2007-04-27 15:02:35</wp:post_date>
+ <wp:post_date_gmt>2007-04-27 13:02:35</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>hoert</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>post</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="post_tag" nicename="bild"><![CDATA[Bild]]></category>
+ <category domain="category" nicename="musik"><![CDATA[Musik]]></category>
+ <category domain="category" nicename="unterhaltung"><![CDATA[Unterhaltung]]></category>
+ <category domain="post_tag" nicename="werbung"><![CDATA[Werbung]]></category>
+ <wp:postmeta>
+ <wp:meta_key>_edit_last</wp:meta_key>
+ <wp:meta_value><![CDATA[2]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+
+ <item>
+ <title>Arzt+Pfusch - S.I.C.K.</title>
+ <link>http://some.blog/2008/07/arzt-und-pfusch-s-i-c-k/</link>
+ <pubDate>Sat, 12 Jul 2008 19:22:06 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/?p=6</guid>
+ <description></description>
+ <content:encoded><![CDATA[<img class="size-full wp-image-10 alignright" title="Arzt+Pfusch - S.I.C.K." src="http://some.blog/wp-content/uploads/2008/07/arzt_und_pfusch-sick-cover.png" alt="Arzt+Pfusch - S.I.C.K." width="210" height="209" />Arzt+Pfusch - S.I.C.K.Gerade bin ich über das Album <em>S.I.C.K</em> von <a title="Arzt+Pfusch" href="http://www.arztpfusch.com/" target="_blank">Arzt+Pfusch</a> gestolpert, welches Arzt+Pfusch zum Download für lau anbieten. Das Album steht unter einer Creative Commons <a href="http://creativecommons.org/licenses/by-nc-nd/3.0/de/">BY-NC-ND</a>-Lizenz.
+Die Ladung <em>noisebmstupidevildustrial</em> gibts als MP3s mit <a href="http://www.archive.org/download/dmp005/dmp005_64kb_mp3.zip">64kbps</a> und <a href="http://www.archive.org/download/dmp005/dmp005_vbr_mp3.zip">VBR</a>, als Ogg Vorbis und als FLAC (letztere <a href="http://www.archive.org/details/dmp005">hier</a>). <a href="http://www.archive.org/download/dmp005/dmp005-artwork.zip">Artwork</a> und <a href="http://www.archive.org/download/dmp005/dmp005-lyrics.txt">Lyrics</a> gibts nochmal einzeln zum Download.]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>6</wp:post_id>
+ <wp:post_date>2008-07-12 21:22:06</wp:post_date>
+ <wp:post_date_gmt>2008-07-12 19:22:06</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>arzt-und-pfusch-s-i-c-k</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>post</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="post_tag" nicename="arzt-und-pfusch"><![CDATA[Arzt+Pfusch]]></category>
+ <category domain="post_tag" nicename="creative-commons"><![CDATA[Creative Commons]]></category>
+ <category domain="post_tag" nicename="download"><![CDATA[Download]]></category>
+ <category domain="post_tag" nicename="electronic-body-music"><![CDATA[EBM]]></category>
+ <category domain="post_tag" nicename="flac"><![CDATA[Free Lossless Audio Codec]]></category>
+ <category domain="post_tag" nicename="gratis"><![CDATA[Gratis]]></category>
+ <category domain="post_tag" nicename="industrial"><![CDATA[Industrial]]></category>
+ <category domain="post_tag" nicename="mp3"><![CDATA[MP3]]></category>
+ <category domain="category" nicename="musik"><![CDATA[Musik]]></category>
+ <category domain="post_tag" nicename="ogg"><![CDATA[Ogg]]></category>
+ <wp:postmeta>
+ <wp:meta_key>_edit_last</wp:meta_key>
+ <wp:meta_value><![CDATA[2]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+
+ <item>
+ <title>Kontakt</title>
+ <link>http://some.blog/kontakt/</link>
+ <pubDate>Thu, 16 Jul 2009 18:20:32 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/?page_id=3</guid>
+ <description></description>
+ <content:encoded><![CDATA[<h1>Datenschutz</h1>
+Ich erhebe und speichere automatisch in meine Server Log Files Informationen, die dein Browser an mich übermittelt. Dies sind:
+<ul>
+ <li>Browsertyp und -version</li>
+ <li>verwendetes Betriebssystem</li>
+ <li>Referrer URL (die zuvor besuchte Seite)</li>
+ <li>IP Adresse des zugreifenden Rechners</li>
+ <li>Uhrzeit der Serveranfrage.</li>
+</ul>
+Diese Daten sind für mich nicht bestimmten Personen zuordenbar. Eine Zusammenführung dieser Daten mit anderen Datenquellen wird nicht vorgenommen, die Daten werden einzig zu statistischen Zwecken erhoben.]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>3</wp:post_id>
+ <wp:post_date>2009-07-16 20:20:32</wp:post_date>
+ <wp:post_date_gmt>2009-07-16 18:20:32</wp:post_date_gmt>
+ <wp:comment_status>closed</wp:comment_status>
+ <wp:ping_status>closed</wp:ping_status>
+ <wp:post_name>kontakt</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>page</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <wp:postmeta>
+ <wp:meta_key>_edit_last</wp:meta_key>
+ <wp:meta_value><![CDATA[2]]></wp:meta_value>
+ </wp:postmeta>
+ <wp:postmeta>
+ <wp:meta_key>_wp_page_template</wp:meta_key>
+ <wp:meta_value><![CDATA[default]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+ <item>
+ <title>Indentation Test</title>
+ <link>http://some.blog/2012/04/indentation_test/</link>
+ <pubDate>Sun, 15 Apr 2012 11:44:59 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/?p=2077</guid>
+ <description></description>
+ <content:encoded><![CDATA[Some examples for indented code that should not be broken.
+
+You should see some Python code hereafter. The code should be one block.
+<pre>class Borg:
+ _state = {}
+ def __init__(self):
+ self.__dict__ = self._state</pre>
+&nbsp;
+
+Here is a listing made with HTML that should display without the HTML being visible to the visitor.
+<ul>
+ <li>to post: <strong>groupname@googlegroups.com</strong></li>
+ <li>to <em>subscribe</em>: <strong>groupname+subscribe@googlegroups.com</strong></li>
+ <li>to <em>unsubscribe</em>: <strong>groupname+unsubscribe@googlegroups.com</strong></li>
+</ul>
+
+A listing with another listing inside.
+<ul>
+<li> foo
+ <ul>
+ <li> bar
+ </ul>
+</ul>
+]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>2077</wp:post_id>
+ <wp:post_date>2012-04-15 12:44:59</wp:post_date>
+ <wp:post_date_gmt>2012-04-15 11:44:59</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>python-borg-pattern</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>post</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="category" nicename="programming"><![CDATA[programming]]></category>
+ <category domain="post_tag" nicename="design-patterns"><![CDATA[Design Patterns]]></category>
+ <category domain="post_tag" nicename="python"><![CDATA[Python]]></category>
+ <wp:postmeta>
+ <wp:meta_key>_edit_last</wp:meta_key>
+ <wp:meta_value><![CDATA[2]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+
+ <item>
+ <title>Screenshot - 2012-12-19</title>
+ <link>http://some.blog/2012/12/wintermodus/2012-12-19-1355925145_1024x600_scrot/</link>
+ <pubDate>Wed, 19 Dec 2012 13:53:19 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/wp-content/uploads/2012/12/2012-12-19-355925145_1024x600_scrot.png</guid>
+ <description></description>
+ <content:encoded><![CDATA[]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>2271</wp:post_id>
+ <wp:post_date>2012-12-19 14:53:19</wp:post_date>
+ <wp:post_date_gmt>2012-12-19 13:53:19</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>2012-12-19-1355925145_1024x600_scrot</wp:post_name>
+ <wp:status>inherit</wp:status>
+ <wp:post_parent>2270</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>attachment</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <wp:attachment_url>http://some.blog/wp-content/uploads/2012/12/2012-12-19-355925145_1024x600_scrot.png</wp:attachment_url>
+ <wp:postmeta>
+ <wp:meta_key>_wp_attached_file</wp:meta_key>
+ <wp:meta_value><![CDATA[2012/12/2012-12-19-1355925145_1024x600_scrot.png]]></wp:meta_value>
+ </wp:postmeta>
+ <wp:postmeta>
+ <wp:meta_key>_wp_attachment_metadata</wp:meta_key>
+ <wp:meta_value><![CDATA[a:5:{s:5:"width";i:1024;s:6:"height";i:600;s:4:"file";s:48:"2012/12/2012-12-19-1355925145_1024x600_scrot.png";s:5:"sizes";a:9:{s:9:"thumbnail";a:4:{s:4:"file";s:48:"2012-12-19-1355925145_1024x600_scrot-150x150.png";s:5:"width";i:150;s:6:"height";i:150;s:9:"mime-type";s:9:"image/png";}s:6:"medium";a:4:{s:4:"file";s:48:"2012-12-19-1355925145_1024x600_scrot-300x175.png";s:5:"width";i:300;s:6:"height";i:175;s:9:"mime-type";s:9:"image/png";}s:12:"mosaic-thumb";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-96x96.png";s:5:"width";i:96;s:6:"height";i:96;s:9:"mime-type";s:9:"image/png";}s:13:"gallery-thumb";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-96x96.png";s:5:"width";i:96;s:6:"height";i:96;s:9:"mime-type";s:9:"image/png";}s:9:"widget-24";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-24x24.png";s:5:"width";i:24;s:6:"height";i:24;s:9:"mime-type";s:9:"image/png";}s:9:"widget-32";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-36x36.png";s:5:"width";i:36;s:6:"height";i:36;s:9:"mime-type";s:9:"image/png";}s:9:"widget-48";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-48x48.png";s:5:"width";i:48;s:6:"height";i:48;s:9:"mime-type";s:9:"image/png";}s:9:"widget-64";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-64x64.png";s:5:"width";i:64;s:6:"height";i:64;s:9:"mime-type";s:9:"image/png";}s:9:"widget-96";a:4:{s:4:"file";s:46:"2012-12-19-1355925145_1024x600_scrot-96x96.png";s:5:"width";i:96;s:6:"height";i:96;s:9:"mime-type";s:9:"image/png";}}s:10:"image_meta";a:10:{s:8:"aperture";i:0;s:6:"credit";s:0:"";s:6:"camera";s:0:"";s:7:"caption";s:0:"";s:17:"created_timestamp";i:0;s:9:"copyright";s:0:"";s:12:"focal_length";i:0;s:3:"iso";i:0;s:13:"shutter_speed";i:0;s:5:"title";s:0:"";}}]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+
+ <item>
+ <title>NoirsEtPleinsDeLumière</title>
+ <link>http://some.blog/2011/04/noirs-et-pourtant-pleins-de-lumiere/noirsetpleinsdelumiere/#main</link>
+ <pubDate>Tue, 12 Apr 2011 21:56:05 +0000</pubDate>
+ <dc:creator><![CDATA[tibonihoo]]></dc:creator>
+ <guid isPermaLink="false">http://some.blog/wp-content/uploads/2011/04/NoirsEtPleinsDeLumière.jpg</guid>
+ <description></description>
+ <content:encoded><![CDATA[]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>724</wp:post_id>
+ <wp:post_date>2011-04-12 23:56:05</wp:post_date>
+ <wp:post_date_gmt>2011-04-12 21:56:05</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>closed</wp:ping_status>
+ <wp:post_name>noirsetpleinsdelumiere</wp:post_name>
+ <wp:status>inherit</wp:status>
+ <wp:post_parent>723</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>attachment</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <wp:attachment_url>http://some.blog/wp-content/uploads/2011/04/NoirsEtPleinsDeLumière.jpg</wp:attachment_url>
+ <wp:postmeta>
+ <wp:meta_key>_wp_attachment_metadata</wp:meta_key>
+ <wp:meta_value><![CDATA[a:6:{s:5:"width";s:3:"533";s:6:"height";s:3:"800";s:14:"hwstring_small";s:22:"height='96' width='63'";s:4:"file";s:35:"2011/04/NoirsEtPleinsDeLumière.jpg";s:5:"sizes";a:2:{s:9:"thumbnail";a:3:{s:4:"file";s:35:"NoirsEtPleinsDeLumière-150x150.jpg";s:5:"width";s:3:"150";s:6:"height";s:3:"150";}s:6:"medium";a:3:{s:4:"file";s:35:"NoirsEtPleinsDeLumière-199x300.jpg";s:5:"width";s:3:"199";s:6:"height";s:3:"300";}}s:10:"image_meta";a:10:{s:8:"aperture";s:1:"5";s:6:"credit";s:0:"";s:6:"camera";s:13:"Canon EOS 40D";s:7:"caption";s:0:"";s:17:"created_timestamp";s:1:"0";s:9:"copyright";s:0:"";s:12:"focal_length";s:3:"100";s:3:"iso";s:3:"200";s:13:"shutter_speed";s:9:"-42949672";s:5:"title";s:0:"";}}]]></wp:meta_value>
+ </wp:postmeta>
+ <wp:postmeta>
+ <wp:meta_key>_wp_attached_file</wp:meta_key>
+ <wp:meta_value><![CDATA[2011/04/NoirsEtPleinsDeLumière.jpg]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+ <item>
+ <title>Image Link Rewriting</title>
+ <link>http://some.blog/2012/12/wintermodus/</link>
+ <pubDate>Wed, 19 Dec 2012 13:55:10 +0000</pubDate>
+ <dc:creator>Niko</dc:creator>
+ <guid isPermaLink="false">http://some.blog/?p=2270</guid>
+ <description></description>
+ <content:encoded><![CDATA[Some image upload. The links to this and the src of the img-tag should be rewritten correctly.
+
+ <a href="http://some.blog/wp-content/uploads/2012/12/2012-12-19-1355925145_1024x600_scrot.png"><img class="aligncenter size-medium wp-image-2271" alt="Netbook Screenshot - 2012-12-19" src="http://some.blog/wp-content/uploads/2012/12/2012-12-19-1355925145_1024x600_scrot-300x175.ng" width="300" height="175" /></a>]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>2270</wp:post_id>
+ <wp:post_date>2012-12-19 14:55:10</wp:post_date>
+ <wp:post_date_gmt>2012-12-19 13:55:10</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>image-link-rewriting</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>post</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="category" nicename="linux"><![CDATA[Linux]]></category>
+ </item>
+ </channel>
+</rss>
diff --git a/tests/data/wordpress_import/wordpress_qtranslate_item_modernized.xml b/tests/data/wordpress_import/wordpress_qtranslate_item_modernized.xml
new file mode 100644
index 0000000..2622bfd
--- /dev/null
+++ b/tests/data/wordpress_import/wordpress_qtranslate_item_modernized.xml
@@ -0,0 +1,30 @@
+ <item>
+ <title>[:fr]Sous le ciel[:][:en]Under heaven[:]</title>
+ <link>http://www.tibonihoo.net/blog/2014/05/sous-le-ciel/</link>
+ <pubDate>Sat, 03 May 2014 13:20:32 +0000</pubDate>
+ <dc:creator><![CDATA[tibonihoo_admin]]></dc:creator>
+ <guid isPermaLink="false">http://www.tibonihoo.net/blog/?p=1585</guid>
+ <description></description>
+ <content:encoded><![CDATA[[:fr]<a href="http://www.tibonihoo.net/blog/wp-content/uploads/2014/05/IMG_68511.jpg"><img class="alignnone size-medium wp-image-1587" src="http://www.tibonihoo.net/blog/wp-content/uploads/2014/05/IMG_68511-300x199.jpg" alt="IMG_6851" width="300" height="199" /></a>[:][:en]<img class="alignnone size-medium wp-image-1587" src="http://www.tibonihoo.net/blog/wp-content/uploads/2014/05/IMG_68511-300x199.jpg" alt="IMG_6851" width="300" height="199" />[:]]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>1585</wp:post_id>
+ <wp:post_date><![CDATA[2014-05-03 14:20:32]]></wp:post_date>
+ <wp:post_date_gmt><![CDATA[2014-05-03 13:20:32]]></wp:post_date_gmt>
+ <wp:comment_status><![CDATA[open]]></wp:comment_status>
+ <wp:ping_status><![CDATA[closed]]></wp:ping_status>
+ <wp:post_name><![CDATA[sous-le-ciel]]></wp:post_name>
+ <wp:status><![CDATA[publish]]></wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type><![CDATA[post]]></wp:post_type>
+ <wp:post_password><![CDATA[]]></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="post_tag" nicename="chine"><![CDATA[Chine]]></category>
+ <category domain="category" nicename="creations"><![CDATA[créations]]></category>
+ <category domain="post_tag" nicename="photos"><![CDATA[photos]]></category>
+ <category domain="post_tag" nicename="roof"><![CDATA[roof]]></category>
+ <wp:postmeta>
+ <wp:meta_key><![CDATA[_edit_last]]></wp:meta_key>
+ <wp:meta_value><![CDATA[2]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
diff --git a/tests/data/wordpress_import/wordpress_qtranslate_item_raw_export.xml b/tests/data/wordpress_import/wordpress_qtranslate_item_raw_export.xml
new file mode 100644
index 0000000..50bac7b
--- /dev/null
+++ b/tests/data/wordpress_import/wordpress_qtranslate_item_raw_export.xml
@@ -0,0 +1,30 @@
+ <item>
+ <title><!--:fr-->Sous le ciel<!--:--><!--:en-->Under heaven<!--:--></title>
+ <link>http://www.tibonihoo.net/blog/2014/05/sous-le-ciel/</link>
+ <pubDate>Sat, 03 May 2014 13:20:32 +0000</pubDate>
+ <dc:creator><![CDATA[tibonihoo_admin]]></dc:creator>
+ <guid isPermaLink="false">http://www.tibonihoo.net/blog/?p=1585</guid>
+ <description></description>
+ <content:encoded><![CDATA[<!--:fr--><a href="http://www.tibonihoo.net/blog/wp-content/uploads/2014/05/IMG_68511.jpg"><img class="alignnone size-medium wp-image-1587" src="http://www.tibonihoo.net/blog/wp-content/uploads/2014/05/IMG_68511-300x199.jpg" alt="IMG_6851" width="300" height="199" /></a><!--:--><!--:en--><img class="alignnone size-medium wp-image-1587" src="http://www.tibonihoo.net/blog/wp-content/uploads/2014/05/IMG_68511-300x199.jpg" alt="IMG_6851" width="300" height="199" /><!--:-->]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>1585</wp:post_id>
+ <wp:post_date><![CDATA[2014-05-03 14:20:32]]></wp:post_date>
+ <wp:post_date_gmt><![CDATA[2014-05-03 13:20:32]]></wp:post_date_gmt>
+ <wp:comment_status><![CDATA[open]]></wp:comment_status>
+ <wp:ping_status><![CDATA[closed]]></wp:ping_status>
+ <wp:post_name><![CDATA[sous-le-ciel]]></wp:post_name>
+ <wp:status><![CDATA[publish]]></wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type><![CDATA[post]]></wp:post_type>
+ <wp:post_password><![CDATA[]]></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="post_tag" nicename="chine"><![CDATA[Chine]]></category>
+ <category domain="category" nicename="creations"><![CDATA[créations]]></category>
+ <category domain="post_tag" nicename="photos"><![CDATA[photos]]></category>
+ <category domain="post_tag" nicename="roof"><![CDATA[roof]]></category>
+ <wp:postmeta>
+ <wp:meta_key><![CDATA[_edit_last]]></wp:meta_key>
+ <wp:meta_value><![CDATA[2]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
diff --git a/tests/data/wordpress_import/wordpress_unicode_export.xml b/tests/data/wordpress_import/wordpress_unicode_export.xml
new file mode 100644
index 0000000..b2204fc
--- /dev/null
+++ b/tests/data/wordpress_import/wordpress_unicode_export.xml
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- This is a WordPress eXtended RSS file generated by WordPress as an export of your site. -->
+<!-- It contains information about your site's posts, pages, comments, categories, and other content. -->
+<!-- You may use this file to transfer that content from one site to another. -->
+<!-- This file is not intended to serve as a complete backup of your site. -->
+
+<!-- To import this information into a WordPress site follow these steps: -->
+<!-- 1. Log in to that site as an administrator. -->
+<!-- 2. Go to Tools: Import in the WordPress admin panel. -->
+<!-- 3. Install the "WordPress" importer from the list. -->
+<!-- 4. Activate & Run Importer. -->
+<!-- 5. Upload this file using the form provided on that page. -->
+<!-- 6. You will first be asked to map the authors in this export file to users -->
+<!-- on the site. For each author, you may choose to map to an -->
+<!-- existing user on the site or to create a new user. -->
+<!-- 7. WordPress will then import each of the posts, pages, comments, categories, etc. -->
+<!-- contained in this file into your site. -->
+
+<!-- generator="WordPress.com" created="2012-12-25 21:39"-->
+<rss version="2.0"
+ xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
+ xmlns:content="http://purl.org/rss/1.0/modules/content/"
+ xmlns:wfw="http://wellformedweb.org/CommentAPI/"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:wp="http://wordpress.org/export/1.2/"
+>
+
+<channel>
+ <title>Nikola Unicode Test ͵pó®t</title>
+ <link>http://nikolaunicode.wordpress.com</link>
+ <description>The greatest WordPress.com site in all the land!</description>
+ <pubDate>Tue, 25 Dec 2012 21:39:30 +0000</pubDate>
+ <language>en</language>
+ <wp:wxr_version>1.2</wp:wxr_version>
+ <wp:base_site_url>http://wordpress.com/</wp:base_site_url>
+ <wp:base_blog_url>http://nikolaunicode.wordpress.com</wp:base_blog_url>
+
+ <wp:author><wp:author_id>3804924</wp:author_id><wp:author_login>ralsina</wp:author_login><wp:author_email>roberto.alsina@gmail.com</wp:author_email><wp:author_display_name><![CDATA[ralsina]]></wp:author_display_name><wp:author_first_name><![CDATA[]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>
+
+ <wp:category><wp:term_id>1</wp:term_id><wp:category_nicename>uncategorized</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[Uncategorized]]></wp:cat_name></wp:category>
+ <wp:tag><wp:term_id>132937998</wp:term_id><wp:tag_slug>thag1</wp:tag_slug><wp:tag_name><![CDATA[þág1]]></wp:tag_name></wp:tag>
+ <wp:tag><wp:term_id>132937999</wp:term_id><wp:tag_slug>thag%c2%b2</wp:tag_slug><wp:tag_name><![CDATA[þág²]]></wp:tag_name></wp:tag>
+
+ <generator>http://wordpress.com/</generator>
+<cloud domain='nikolaunicode.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
+<image>
+ <url>https://s2.wp.com/i/buttonw-com.png</url>
+ <title>Nikola Unicode Test ͵pó®t</title>
+ <link>http://nikolaunicode.wordpress.com</link>
+ </image>
+ <atom:link rel="search" type="application/opensearchdescription+xml" href="http://nikolaunicode.wordpress.com/osd.xml" title="Nikola Unicode Test ͵pó®t" />
+ <atom:link rel='hub' href='http://nikolaunicode.wordpress.com/?pushpress=hub'/>
+
+ <item>
+ <title>About</title>
+ <link>http://nikolaunicode.wordpress.com/about/</link>
+ <pubDate>Tue, 25 Dec 2012 21:36:15 +0000</pubDate>
+ <dc:creator>ralsina</dc:creator>
+ <guid isPermaLink="false">http://nikolaunicode.wordpress.com/?page_id=1</guid>
+ <description></description>
+ <content:encoded><![CDATA[This is an example of a page. Unlike posts, which are displayed on your blog’s front page in the order they’re published, pages are better suited for more timeless content that you want to be easily accessible, like your About or Contact information. Click the Edit link to make changes to this page or <a title="Direct link to Add New in the Admin Dashboard" href="/wp-admin/post-new.php?post_type=page">add another page</a>.]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>1</wp:post_id>
+ <wp:post_date>2012-12-25 21:36:15</wp:post_date>
+ <wp:post_date_gmt>2012-12-25 21:36:15</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>about</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>page</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ </item>
+ <item>
+ <title>Úñî©òðé title</title>
+ <link>http://nikolaunicode.wordpress.com/2012/12/25/uniode-title/</link>
+ <pubDate>Tue, 25 Dec 2012 21:38:37 +0000</pubDate>
+ <dc:creator>ralsina</dc:creator>
+ <guid isPermaLink="false">http://nikolaunicode.wordpress.com/?p=3</guid>
+ <description></description>
+ <content:encoded><![CDATA[Mó®é úñí©óðé]]></content:encoded>
+ <excerpt:encoded><![CDATA[]]></excerpt:encoded>
+ <wp:post_id>3</wp:post_id>
+ <wp:post_date>2012-12-25 21:38:37</wp:post_date>
+ <wp:post_date_gmt>2012-12-25 21:38:37</wp:post_date_gmt>
+ <wp:comment_status>open</wp:comment_status>
+ <wp:ping_status>open</wp:ping_status>
+ <wp:post_name>uniode-title</wp:post_name>
+ <wp:status>publish</wp:status>
+ <wp:post_parent>0</wp:post_parent>
+ <wp:menu_order>0</wp:menu_order>
+ <wp:post_type>post</wp:post_type>
+ <wp:post_password></wp:post_password>
+ <wp:is_sticky>0</wp:is_sticky>
+ <category domain="post_tag" nicename="thag1"><![CDATA[þág1]]></category>
+ <category domain="post_tag" nicename="thag%c2%b2"><![CDATA[þág²]]></category>
+ <category domain="category" nicename="uncategorized"><![CDATA[Uncategorized]]></category>
+ <wp:postmeta>
+ <wp:meta_key>_edit_last</wp:meta_key>
+ <wp:meta_value><![CDATA[3804924]]></wp:meta_value>
+ </wp:postmeta>
+ <wp:postmeta>
+ <wp:meta_key>_publicize_pending</wp:meta_key>
+ <wp:meta_value><![CDATA[1]]></wp:meta_value>
+ </wp:postmeta>
+ <wp:postmeta>
+ <wp:meta_key>jabber_published</wp:meta_key>
+ <wp:meta_value><![CDATA[1356471518]]></wp:meta_value>
+ </wp:postmeta>
+ </item>
+</channel>
+</rss>