1 files changed, 202 insertions, 81 deletions
diff --git a/nikola/post.py b/nikola/post.py
index 5cf7236..3e3b608 100644
--- a/nikola/post.py
+++ b/nikola/post.py
@@ -37,12 +37,15 @@ try:
 except ImportError:
     from urllib.parse import urljoin  # NOQA
 
+import dateutil.tz
 import lxml.html
+import natsort
 try:
     import pyphen
 except ImportError:
     pyphen = None
-import pytz
+
+from math import ceil
 
 # for tearDown with _reload we cannot use 'from import' to get forLocaleBorg
 import nikola.utils
@@ -51,18 +54,19 @@ from .utils import (
     current_time,
     Functionary,
     LOGGER,
+    LocaleBorg,
     slugify,
     to_datetime,
     unicode_str,
     demote_headers,
     get_translation_candidate,
+    unslugify,
 )
 from .rc4 import rc4
 
 __all__ = ['Post']
 
 TEASER_REGEXP = re.compile('<!--\s*TEASER_END(:(.+))?\s*-->', re.IGNORECASE)
-READ_MORE_LINK = '<p class="more"><a href="{link}">{read_more}…</a></p>'
 
 
 class Post(object):
@@ -89,17 +93,17 @@ class Post(object):
         self.compiler = compiler
         self.compile_html = self.compiler.compile_html
         self.demote_headers = self.compiler.demote_headers and self.config['DEMOTE_HEADERS']
-        tzinfo = pytz.timezone(self.config['TIMEZONE'])
+        tzinfo = self.config['__tzinfo__']
         if self.config['FUTURE_IS_NOW']:
             self.current_time = None
         else:
-            self.current_time = current_time(tzinfo)
+            self.current_time = current_time()
         self.translated_to = set([])
         self._prev_post = None
         self._next_post = None
         self.base_url = self.config['BASE_URL']
         self.is_draft = False
-        self.is_retired = False
+        self.is_private = False
         self.is_mathjax = False
         self.strip_indexes = self.config['STRIP_INDEXES']
         self.index_file = self.config['INDEX_FILE']
@@ -115,29 +119,29 @@ class Post(object):
         self.translations = self.config['TRANSLATIONS']
         self.default_lang = self.config['DEFAULT_LANG']
         self.messages = messages
-        self.skip_untranslated = self.config['HIDE_UNTRANSLATED_POSTS']
+        self.skip_untranslated = not self.config['SHOW_UNTRANSLATED_POSTS']
         self._template_name = template_name
         self.is_two_file = True
         self.hyphenate = self.config['HYPHENATE']
         self._reading_time = None
+        self._remaining_reading_time = None
+        self._paragraph_count = None
+        self._remaining_paragraph_count = None
 
-        default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP'])
+        default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES'])
 
         self.meta = Functionary(lambda: None, self.default_lang)
         self.meta[self.default_lang] = default_metadata
 
         # Load internationalized metadata
         for lang in self.translations:
+            if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)):
+                self.translated_to.add(lang)
             if lang != self.default_lang:
-                if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)):
-                    self.translated_to.add(lang)
-
                 meta = defaultdict(lambda: '')
                 meta.update(default_metadata)
-                meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], lang))
+                meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES'], lang))
                 self.meta[lang] = meta
-            elif os.path.isfile(self.source_path):
-                self.translated_to.add(self.default_lang)
 
         if not self.is_translation_available(self.default_lang):
             # Special case! (Issue #373)
@@ -147,8 +151,11 @@ class Post(object):
 
         if 'date' not in default_metadata and not use_in_feeds:
             # For stories we don't *really* need a date
-            default_metadata['date'] = datetime.datetime.utcfromtimestamp(
-                os.stat(self.source_path).st_ctime).replace(tzinfo=pytz.UTC).astimezone(tzinfo)
+            if self.config['__invariant__']:
+                default_metadata['date'] = datetime.datetime(2013, 12, 31, 23, 59, 59, tzinfo=tzinfo)
+            else:
+                default_metadata['date'] = datetime.datetime.utcfromtimestamp(
+                    os.stat(self.source_path).st_ctime).replace(tzinfo=dateutil.tz.tzutc()).astimezone(tzinfo)
 
         if 'title' not in default_metadata or 'slug' not in default_metadata \
                 or 'date' not in default_metadata:
@@ -169,26 +176,34 @@ class Post(object):
         self.publish_later = False if self.current_time is None else self.date >= self.current_time
 
         is_draft = False
-        is_retired = False
+        is_private = False
         self._tags = {}
         for lang in self.translated_to:
-            self._tags[lang] = [x.strip() for x in self.meta[lang]['tags'].split(',')]
+            self._tags[lang] = natsort.natsorted(
+                list(set([x.strip() for x in self.meta[lang]['tags'].split(',')])))
             self._tags[lang] = [t for t in self._tags[lang] if t]
             if 'draft' in self._tags[lang]:
                 is_draft = True
+                LOGGER.debug('The post "{0}" is a draft.'.format(self.source_path))
                 self._tags[lang].remove('draft')
+
+            # TODO: remove in v8
             if 'retired' in self._tags[lang]:
-                is_retired = True
+                is_private = True
+                LOGGER.warning('The "retired" tag in post "{0}" is now deprecated and will be removed in v8.  Use "private" instead.'.format(self.source_path))
                 self._tags[lang].remove('retired')
+            # end remove in v8
+
             if 'private' in self._tags[lang]:
-                is_retired = True
+                is_private = True
+                LOGGER.debug('The post "{0}" is private.'.format(self.source_path))
                 self._tags[lang].remove('private')
 
         # While draft comes from the tags, it's not really a tag
         self.is_draft = is_draft
-        self.is_retired = is_retired
+        self.is_private = is_private
         self.is_post = use_in_feeds
-        self.use_in_feeds = use_in_feeds and not is_draft and not is_retired \
+        self.use_in_feeds = use_in_feeds and not is_draft and not is_private \
             and not self.publish_later
 
         # If mathjax is a tag, then enable mathjax rendering support
@@ -277,6 +292,21 @@ class Post(object):
             lang = nikola.utils.LocaleBorg().current_lang
         return self.meta[lang]['title']
 
+    def author(self, lang=None):
+        """Return localized author or BLOG_AUTHOR if unspecified.
+
+        If lang is not specified, it defaults to the current language from
+        templates, as set in LocaleBorg.
+        """
+        if lang is None:
+            lang = nikola.utils.LocaleBorg().current_lang
+        if self.meta[lang]['author']:
+            author = self.meta[lang]['author']
+        else:
+            author = self.config['BLOG_AUTHOR'](lang)
+
+        return author
+
     def description(self, lang=None):
         """Return localized description."""
         if lang is None:
@@ -290,7 +320,6 @@ class Post(object):
             deps.append(self.base_path)
         if lang != self.default_lang:
             deps += [get_translation_candidate(self.config, self.base_path, lang)]
-        deps += self.fragment_deps(lang)
         return deps
 
     def compile(self, lang):
@@ -304,21 +333,31 @@ class Post(object):
             with codecs.open(path, 'wb+', 'utf8') as outf:
                 outf.write(data)
 
-        self.READ_MORE_LINK = self.config['READ_MORE_LINK']
         dest = self.translated_base_path(lang)
-        if not self.is_translation_available(lang) and self.config['HIDE_UNTRANSLATED_POSTS']:
+        if not self.is_translation_available(lang) and not self.config['SHOW_UNTRANSLATED_POSTS']:
             return
-        else:
-            self.compile_html(
-                self.translated_source_path(lang),
-                dest,
-                self.is_two_file),
+        # Set the language to the right thing
+        LocaleBorg().set_locale(lang)
+        self.compile_html(
+            self.translated_source_path(lang),
+            dest,
+            self.is_two_file),
         if self.meta('password'):
             wrap_encrypt(dest, self.meta('password'))
         if self.publish_later:
             LOGGER.notice('{0} is scheduled to be published in the future ({1})'.format(
                 self.source_path, self.date))
 
+    def extra_deps(self):
+        """get extra depepencies from .dep files
+        This file is created by ReST
+        """
+        dep_path = self.base_path + '.dep'
+        if os.path.isfile(dep_path):
+            with codecs.open(dep_path, 'rb+', 'utf8') as depf:
+                return [l.strip() for l in depf.readlines()]
+        return []
+
     def fragment_deps(self, lang):
         """Return a list of dependencies to build this post's fragment."""
         deps = []
@@ -326,10 +365,7 @@ class Post(object):
             deps.append(self.source_path)
         if os.path.isfile(self.metadata_path):
             deps.append(self.metadata_path)
-        dep_path = self.base_path + '.dep'
-        if os.path.isfile(dep_path):
-            with codecs.open(dep_path, 'rb+', 'utf8') as depf:
-                deps.extend([l.strip() for l in depf.readlines()])
+        deps.extend(self.extra_deps())
         lang_deps = []
         if lang != self.default_lang:
             lang_deps = [get_translation_candidate(self.config, d, lang) for d in deps]
@@ -354,10 +390,7 @@ class Post(object):
 
     def translated_base_path(self, lang):
         """Return path to the translation's base_path file."""
-        if lang == self.default_lang:
-            return self.base_path
-        else:
-            return get_translation_candidate(self.config, self.base_path, lang)
+        return get_translation_candidate(self.config, self.base_path, lang)
 
     def _translated_file_path(self, lang):
         """Return path to the translation's file, or to the original."""
@@ -371,16 +404,30 @@ class Post(object):
         else:
             return get_translation_candidate(self.config, self.base_path, sorted(self.translated_to)[0])
 
-    def text(self, lang=None, teaser_only=False, strip_html=False, really_absolute=False):
+    def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_link=True, rss_read_more_link=False):
         """Read the post file for that language and return its contents.
 
         teaser_only=True breaks at the teaser marker and returns only the teaser.
         strip_html=True removes HTML tags
+        show_read_more_link=False does not add the Read more... link
+        rss_read_more_link=True uses RSS_READ_MORE_LINK instead of INDEX_READ_MORE_LINK
         lang=None uses the last used to set locale
 
         All links in the returned HTML will be relative.
         The HTML returned is a bare fragment, not a full document.
         """
+        def strip_root_element(el):
+            ''' Strips root tag from an Element.
+
+            Required because lxml has an tendency to add <div>, <body>
+            root tags to strings which are generated by using
+            lxml.html.tostring()
+
+            :param Element el: the root element to strip
+            '''
+            return (el.text or '') + ''.join(
+                [lxml.html.tostring(child, encoding='unicode')
+                    for child in el.iterchildren()])
 
         if lang is None:
             lang = nikola.utils.LocaleBorg().current_lang
@@ -395,7 +442,7 @@ class Post(object):
                 return ""
             # let other errors raise
             raise(e)
-        base_url = self.permalink(lang=lang, absolute=really_absolute)
+        base_url = self.permalink(lang=lang)
         document.make_links_absolute(base_url)
 
         if self.hyphenate:
@@ -405,28 +452,34 @@ class Post(object):
         # data here is a full HTML doc, including HTML and BODY tags
         # which is not ideal (Issue #464)
         try:
-            body = document.body
-            data = (body.text or '') + ''.join(
-                [lxml.html.tostring(child, encoding='unicode')
-                    for child in body.iterchildren()])
+            data = strip_root_element(document.body)
         except IndexError:  # No body there, it happens sometimes
             pass
 
         if teaser_only:
             teaser = TEASER_REGEXP.split(data)[0]
             if teaser != data:
-                if not strip_html:
+                if not strip_html and show_read_more_link:
                     if TEASER_REGEXP.search(data).groups()[-1]:
                         teaser += '<p class="more"><a href="{0}">{1}</a></p>'.format(
-                            self.permalink(lang, absolute=really_absolute),
+                            self.permalink(lang),
                             TEASER_REGEXP.search(data).groups()[-1])
                     else:
-                        teaser += READ_MORE_LINK.format(
-                            link=self.permalink(lang, absolute=really_absolute),
-                            read_more=self.messages[lang]["Read more"])
+                        l = self.config['RSS_READ_MORE_LINK'](lang) if rss_read_more_link else self.config['INDEX_READ_MORE_LINK'](lang)
+                        teaser += l.format(
+                            link=self.permalink(lang),
+                            read_more=self.messages[lang]["Read more"],
+                            min_remaining_read=self.messages[lang]["%d min remaining to read"] % (self.remaining_reading_time),
+                            reading_time=self.reading_time,
+                            remaining_reading_time=self.remaining_reading_time,
+                            paragraph_count=self.paragraph_count,
+                            remaining_paragraph_count=self.remaining_paragraph_count)
                 # This closes all open tags and sanitizes the broken HTML
                 document = lxml.html.fromstring(teaser)
-                data = lxml.html.tostring(document, encoding='unicode')
+                try:
+                    data = strip_root_element(document)
+                except IndexError:
+                    data = lxml.html.tostring(document, encoding='unicode')
 
         if data and strip_html:
             try:
@@ -441,23 +494,71 @@ class Post(object):
                 try:
                     document = lxml.html.fromstring(data)
                     demote_headers(document, self.demote_headers)
+                    data = strip_root_element(document)
+                except (lxml.etree.ParserError, IndexError):
                     data = lxml.html.tostring(document, encoding='unicode')
-                except lxml.etree.ParserError:
-                    pass
 
         return data
 
     @property
     def reading_time(self):
-        """Reading time based on length of text.
-        """
+        """Reading time based on length of text."""
         if self._reading_time is None:
             text = self.text(strip_html=True)
-            words_per_minute = 180
+            words_per_minute = 220
             words = len(text.split())
-            self._reading_time = int(round(words / words_per_minute)) or 1
+            self._reading_time = int(ceil(words / words_per_minute)) or 1
         return self._reading_time
 
+    @property
+    def remaining_reading_time(self):
+        """Remaining reading time based on length of text (does not include teaser)."""
+        if self._remaining_reading_time is None:
+            text = self.text(teaser_only=True, strip_html=True)
+            words_per_minute = 220
+            words = len(text.split())
+            self._remaining_reading_time = self.reading_time - int(ceil(words / words_per_minute)) or 1
+        return self._remaining_reading_time
+
+    @property
+    def paragraph_count(self):
+        """Return the paragraph count for this post."""
+        if self._paragraph_count is None:
+            # duplicated with Post.text()
+            lang = nikola.utils.LocaleBorg().current_lang
+            file_name = self._translated_file_path(lang)
+            with codecs.open(file_name, "r", "utf8") as post_file:
+                data = post_file.read().strip()
+            try:
+                document = lxml.html.fragment_fromstring(data, "body")
+            except lxml.etree.ParserError as e:
+                # if we don't catch this, it breaks later (Issue #374)
+                if str(e) == "Document is empty":
+                    return ""
+                # let other errors raise
+                raise(e)
+
+            # output is a float, for no real reason at all
+            self._paragraph_count = int(document.xpath('count(//p)'))
+        return self._paragraph_count
+
+    @property
+    def remaining_paragraph_count(self):
+        """Return the remaining paragraph count for this post (does not include teaser)."""
+        if self._remaining_paragraph_count is None:
+            try:
+                # Just asking self.text() is easier here.
+                document = lxml.html.fragment_fromstring(self.text(teaser_only=True, show_read_more_link=False), "body")
+            except lxml.etree.ParserError as e:
+                # if we don't catch this, it breaks later (Issue #374)
+                if str(e) == "Document is empty":
+                    return ""
+                # let other errors raise
+                raise(e)
+
+            self._remaining_paragraph_count = self.paragraph_count - int(document.xpath('count(//p)'))
+        return self._remaining_paragraph_count
+
     def source_link(self, lang=None):
         """Return absolute link to the post's source."""
         return "/" + self.destination_path(
@@ -524,7 +625,7 @@ def re_meta(line, match=None):
         return (None,)
 
 
-def _get_metadata_from_filename_by_regex(filename, metadata_regexp):
+def _get_metadata_from_filename_by_regex(filename, metadata_regexp, unslugify_titles):
     """
     Tries to ried the metadata from the filename based on the given re.
     This requires to use symbolic group names in the pattern.
@@ -538,7 +639,11 @@ def _get_metadata_from_filename_by_regex(filename, metadata_regexp):
     if match:
         # .items() for py3k compat.
         for key, value in match.groupdict().items():
-            meta[key.lower()] = value  # metadata must be lowercase
+            k = key.lower().strip()  # metadata must be lowercase
+            if k == 'title' and unslugify_titles:
+                meta[k] = unslugify(value, discard_numbers=False)
+            else:
+                meta[k] = value
 
     return meta
 
@@ -620,29 +725,43 @@ def get_metadata_from_meta_file(path, config=None, lang=None):
     if os.path.isfile(meta_path):
         with codecs.open(meta_path, "r", "utf8") as meta_file:
             meta_data = meta_file.readlines()
-        while len(meta_data) < 7:
-            meta_data.append("")
-        (title, slug, date, tags, link, description, _type) = [
-            x.strip() for x in meta_data][:7]
-
-        meta = {}
-
-        if title:
-            meta['title'] = title
-        if slug:
-            meta['slug'] = slug
-        if date:
-            meta['date'] = date
-        if tags:
-            meta['tags'] = tags
-        if link:
-            meta['link'] = link
-        if description:
-            meta['description'] = description
-        if _type:
-            meta['type'] = _type
 
-        return meta
+        # Detect new-style metadata.
+        newstyleregexp = re.compile(r'\.\. .*?: .*')
+        newstylemeta = False
+        for l in meta_data:
+            if l.strip():
+                if re.match(newstyleregexp, l):
+                    newstylemeta = True
+
+        if newstylemeta:
+            # New-style metadata is basically the same as reading metadata from
+            # a 1-file post.
+            return get_metadata_from_file(path, config, lang)
+        else:
+            while len(meta_data) < 7:
+                meta_data.append("")
+            (title, slug, date, tags, link, description, _type) = [
+                x.strip() for x in meta_data][:7]
+
+            meta = {}
+
+            if title:
+                meta['title'] = title
+            if slug:
+                meta['slug'] = slug
+            if date:
+                meta['date'] = date
+            if tags:
+                meta['tags'] = tags
+            if link:
+                meta['link'] = link
+            if description:
+                meta['description'] = description
+            if _type:
+                meta['type'] = _type
+
+            return meta
 
     elif lang:
         # Metadata file doesn't exist, but not default language,
@@ -653,11 +772,12 @@ def get_metadata_from_meta_file(path, config=None, lang=None):
         return {}
 
 
-def get_meta(post, file_metadata_regexp=None, lang=None):
+def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
     """Get post's meta from source.
 
     If ``file_metadata_regexp`` is given it will be tried to read
     metadata from the filename.
+    If ``unslugify_titles`` is True, the extracted title (if any) will be unslugified, as is done in galleries.
     If any metadata is then found inside the file the metadata from the
     file will override previous findings.
     """
@@ -676,7 +796,8 @@ def get_meta(post, file_metadata_regexp=None, lang=None):
 
     if file_metadata_regexp is not None:
         meta.update(_get_metadata_from_filename_by_regex(post.source_path,
-                                                         file_metadata_regexp))
+                                                         file_metadata_regexp,
+                                                         unslugify_titles))
 
     meta.update(get_metadata_from_file(post.source_path, config, lang))