diff options
| author | 2014-06-13 21:51:02 -0300 | |
|---|---|---|
| committer | 2014-06-13 21:51:02 -0300 | |
| commit | 58c4878526dec5510f23c812274686787d8724ba (patch) | |
| tree | 5f2374bc17adb10e15f7e5b4576595d9cc2ef17e /nikola/post.py | |
| parent | fa50632a9d87c3989566fed3e49c160a132e0d14 (diff) | |
Imported Upstream version 7.0.1upstream/7.0.1
Diffstat (limited to 'nikola/post.py')
| -rw-r--r-- | nikola/post.py | 283 |
1 files changed, 202 insertions, 81 deletions
diff --git a/nikola/post.py b/nikola/post.py index 5cf7236..3e3b608 100644 --- a/nikola/post.py +++ b/nikola/post.py @@ -37,12 +37,15 @@ try: except ImportError: from urllib.parse import urljoin # NOQA +import dateutil.tz import lxml.html +import natsort try: import pyphen except ImportError: pyphen = None -import pytz + +from math import ceil # for tearDown with _reload we cannot use 'from import' to get forLocaleBorg import nikola.utils @@ -51,18 +54,19 @@ from .utils import ( current_time, Functionary, LOGGER, + LocaleBorg, slugify, to_datetime, unicode_str, demote_headers, get_translation_candidate, + unslugify, ) from .rc4 import rc4 __all__ = ['Post'] TEASER_REGEXP = re.compile('<!--\s*TEASER_END(:(.+))?\s*-->', re.IGNORECASE) -READ_MORE_LINK = '<p class="more"><a href="{link}">{read_more}…</a></p>' class Post(object): @@ -89,17 +93,17 @@ class Post(object): self.compiler = compiler self.compile_html = self.compiler.compile_html self.demote_headers = self.compiler.demote_headers and self.config['DEMOTE_HEADERS'] - tzinfo = pytz.timezone(self.config['TIMEZONE']) + tzinfo = self.config['__tzinfo__'] if self.config['FUTURE_IS_NOW']: self.current_time = None else: - self.current_time = current_time(tzinfo) + self.current_time = current_time() self.translated_to = set([]) self._prev_post = None self._next_post = None self.base_url = self.config['BASE_URL'] self.is_draft = False - self.is_retired = False + self.is_private = False self.is_mathjax = False self.strip_indexes = self.config['STRIP_INDEXES'] self.index_file = self.config['INDEX_FILE'] @@ -115,29 +119,29 @@ class Post(object): self.translations = self.config['TRANSLATIONS'] self.default_lang = self.config['DEFAULT_LANG'] self.messages = messages - self.skip_untranslated = self.config['HIDE_UNTRANSLATED_POSTS'] + self.skip_untranslated = not self.config['SHOW_UNTRANSLATED_POSTS'] self._template_name = template_name self.is_two_file = True self.hyphenate = self.config['HYPHENATE'] self._reading_time = None + self._remaining_reading_time = None + self._paragraph_count = None + self._remaining_paragraph_count = None - default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP']) + default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES']) self.meta = Functionary(lambda: None, self.default_lang) self.meta[self.default_lang] = default_metadata # Load internationalized metadata for lang in self.translations: + if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)): + self.translated_to.add(lang) if lang != self.default_lang: - if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)): - self.translated_to.add(lang) - meta = defaultdict(lambda: '') meta.update(default_metadata) - meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], lang)) + meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES'], lang)) self.meta[lang] = meta - elif os.path.isfile(self.source_path): - self.translated_to.add(self.default_lang) if not self.is_translation_available(self.default_lang): # Special case! (Issue #373) @@ -147,8 +151,11 @@ class Post(object): if 'date' not in default_metadata and not use_in_feeds: # For stories we don't *really* need a date - default_metadata['date'] = datetime.datetime.utcfromtimestamp( - os.stat(self.source_path).st_ctime).replace(tzinfo=pytz.UTC).astimezone(tzinfo) + if self.config['__invariant__']: + default_metadata['date'] = datetime.datetime(2013, 12, 31, 23, 59, 59, tzinfo=tzinfo) + else: + default_metadata['date'] = datetime.datetime.utcfromtimestamp( + os.stat(self.source_path).st_ctime).replace(tzinfo=dateutil.tz.tzutc()).astimezone(tzinfo) if 'title' not in default_metadata or 'slug' not in default_metadata \ or 'date' not in default_metadata: @@ -169,26 +176,34 @@ class Post(object): self.publish_later = False if self.current_time is None else self.date >= self.current_time is_draft = False - is_retired = False + is_private = False self._tags = {} for lang in self.translated_to: - self._tags[lang] = [x.strip() for x in self.meta[lang]['tags'].split(',')] + self._tags[lang] = natsort.natsorted( + list(set([x.strip() for x in self.meta[lang]['tags'].split(',')]))) self._tags[lang] = [t for t in self._tags[lang] if t] if 'draft' in self._tags[lang]: is_draft = True + LOGGER.debug('The post "{0}" is a draft.'.format(self.source_path)) self._tags[lang].remove('draft') + + # TODO: remove in v8 if 'retired' in self._tags[lang]: - is_retired = True + is_private = True + LOGGER.warning('The "retired" tag in post "{0}" is now deprecated and will be removed in v8. Use "private" instead.'.format(self.source_path)) self._tags[lang].remove('retired') + # end remove in v8 + if 'private' in self._tags[lang]: - is_retired = True + is_private = True + LOGGER.debug('The post "{0}" is private.'.format(self.source_path)) self._tags[lang].remove('private') # While draft comes from the tags, it's not really a tag self.is_draft = is_draft - self.is_retired = is_retired + self.is_private = is_private self.is_post = use_in_feeds - self.use_in_feeds = use_in_feeds and not is_draft and not is_retired \ + self.use_in_feeds = use_in_feeds and not is_draft and not is_private \ and not self.publish_later # If mathjax is a tag, then enable mathjax rendering support @@ -277,6 +292,21 @@ class Post(object): lang = nikola.utils.LocaleBorg().current_lang return self.meta[lang]['title'] + def author(self, lang=None): + """Return localized author or BLOG_AUTHOR if unspecified. + + If lang is not specified, it defaults to the current language from + templates, as set in LocaleBorg. + """ + if lang is None: + lang = nikola.utils.LocaleBorg().current_lang + if self.meta[lang]['author']: + author = self.meta[lang]['author'] + else: + author = self.config['BLOG_AUTHOR'](lang) + + return author + def description(self, lang=None): """Return localized description.""" if lang is None: @@ -290,7 +320,6 @@ class Post(object): deps.append(self.base_path) if lang != self.default_lang: deps += [get_translation_candidate(self.config, self.base_path, lang)] - deps += self.fragment_deps(lang) return deps def compile(self, lang): @@ -304,21 +333,31 @@ class Post(object): with codecs.open(path, 'wb+', 'utf8') as outf: outf.write(data) - self.READ_MORE_LINK = self.config['READ_MORE_LINK'] dest = self.translated_base_path(lang) - if not self.is_translation_available(lang) and self.config['HIDE_UNTRANSLATED_POSTS']: + if not self.is_translation_available(lang) and not self.config['SHOW_UNTRANSLATED_POSTS']: return - else: - self.compile_html( - self.translated_source_path(lang), - dest, - self.is_two_file), + # Set the language to the right thing + LocaleBorg().set_locale(lang) + self.compile_html( + self.translated_source_path(lang), + dest, + self.is_two_file), if self.meta('password'): wrap_encrypt(dest, self.meta('password')) if self.publish_later: LOGGER.notice('{0} is scheduled to be published in the future ({1})'.format( self.source_path, self.date)) + def extra_deps(self): + """get extra depepencies from .dep files + This file is created by ReST + """ + dep_path = self.base_path + '.dep' + if os.path.isfile(dep_path): + with codecs.open(dep_path, 'rb+', 'utf8') as depf: + return [l.strip() for l in depf.readlines()] + return [] + def fragment_deps(self, lang): """Return a list of dependencies to build this post's fragment.""" deps = [] @@ -326,10 +365,7 @@ class Post(object): deps.append(self.source_path) if os.path.isfile(self.metadata_path): deps.append(self.metadata_path) - dep_path = self.base_path + '.dep' - if os.path.isfile(dep_path): - with codecs.open(dep_path, 'rb+', 'utf8') as depf: - deps.extend([l.strip() for l in depf.readlines()]) + deps.extend(self.extra_deps()) lang_deps = [] if lang != self.default_lang: lang_deps = [get_translation_candidate(self.config, d, lang) for d in deps] @@ -354,10 +390,7 @@ class Post(object): def translated_base_path(self, lang): """Return path to the translation's base_path file.""" - if lang == self.default_lang: - return self.base_path - else: - return get_translation_candidate(self.config, self.base_path, lang) + return get_translation_candidate(self.config, self.base_path, lang) def _translated_file_path(self, lang): """Return path to the translation's file, or to the original.""" @@ -371,16 +404,30 @@ class Post(object): else: return get_translation_candidate(self.config, self.base_path, sorted(self.translated_to)[0]) - def text(self, lang=None, teaser_only=False, strip_html=False, really_absolute=False): + def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_link=True, rss_read_more_link=False): """Read the post file for that language and return its contents. teaser_only=True breaks at the teaser marker and returns only the teaser. strip_html=True removes HTML tags + show_read_more_link=False does not add the Read more... link + rss_read_more_link=True uses RSS_READ_MORE_LINK instead of INDEX_READ_MORE_LINK lang=None uses the last used to set locale All links in the returned HTML will be relative. The HTML returned is a bare fragment, not a full document. """ + def strip_root_element(el): + ''' Strips root tag from an Element. + + Required because lxml has an tendency to add <div>, <body> + root tags to strings which are generated by using + lxml.html.tostring() + + :param Element el: the root element to strip + ''' + return (el.text or '') + ''.join( + [lxml.html.tostring(child, encoding='unicode') + for child in el.iterchildren()]) if lang is None: lang = nikola.utils.LocaleBorg().current_lang @@ -395,7 +442,7 @@ class Post(object): return "" # let other errors raise raise(e) - base_url = self.permalink(lang=lang, absolute=really_absolute) + base_url = self.permalink(lang=lang) document.make_links_absolute(base_url) if self.hyphenate: @@ -405,28 +452,34 @@ class Post(object): # data here is a full HTML doc, including HTML and BODY tags # which is not ideal (Issue #464) try: - body = document.body - data = (body.text or '') + ''.join( - [lxml.html.tostring(child, encoding='unicode') - for child in body.iterchildren()]) + data = strip_root_element(document.body) except IndexError: # No body there, it happens sometimes pass if teaser_only: teaser = TEASER_REGEXP.split(data)[0] if teaser != data: - if not strip_html: + if not strip_html and show_read_more_link: if TEASER_REGEXP.search(data).groups()[-1]: teaser += '<p class="more"><a href="{0}">{1}</a></p>'.format( - self.permalink(lang, absolute=really_absolute), + self.permalink(lang), TEASER_REGEXP.search(data).groups()[-1]) else: - teaser += READ_MORE_LINK.format( - link=self.permalink(lang, absolute=really_absolute), - read_more=self.messages[lang]["Read more"]) + l = self.config['RSS_READ_MORE_LINK'](lang) if rss_read_more_link else self.config['INDEX_READ_MORE_LINK'](lang) + teaser += l.format( + link=self.permalink(lang), + read_more=self.messages[lang]["Read more"], + min_remaining_read=self.messages[lang]["%d min remaining to read"] % (self.remaining_reading_time), + reading_time=self.reading_time, + remaining_reading_time=self.remaining_reading_time, + paragraph_count=self.paragraph_count, + remaining_paragraph_count=self.remaining_paragraph_count) # This closes all open tags and sanitizes the broken HTML document = lxml.html.fromstring(teaser) - data = lxml.html.tostring(document, encoding='unicode') + try: + data = strip_root_element(document) + except IndexError: + data = lxml.html.tostring(document, encoding='unicode') if data and strip_html: try: @@ -441,23 +494,71 @@ class Post(object): try: document = lxml.html.fromstring(data) demote_headers(document, self.demote_headers) + data = strip_root_element(document) + except (lxml.etree.ParserError, IndexError): data = lxml.html.tostring(document, encoding='unicode') - except lxml.etree.ParserError: - pass return data @property def reading_time(self): - """Reading time based on length of text. - """ + """Reading time based on length of text.""" if self._reading_time is None: text = self.text(strip_html=True) - words_per_minute = 180 + words_per_minute = 220 words = len(text.split()) - self._reading_time = int(round(words / words_per_minute)) or 1 + self._reading_time = int(ceil(words / words_per_minute)) or 1 return self._reading_time + @property + def remaining_reading_time(self): + """Remaining reading time based on length of text (does not include teaser).""" + if self._remaining_reading_time is None: + text = self.text(teaser_only=True, strip_html=True) + words_per_minute = 220 + words = len(text.split()) + self._remaining_reading_time = self.reading_time - int(ceil(words / words_per_minute)) or 1 + return self._remaining_reading_time + + @property + def paragraph_count(self): + """Return the paragraph count for this post.""" + if self._paragraph_count is None: + # duplicated with Post.text() + lang = nikola.utils.LocaleBorg().current_lang + file_name = self._translated_file_path(lang) + with codecs.open(file_name, "r", "utf8") as post_file: + data = post_file.read().strip() + try: + document = lxml.html.fragment_fromstring(data, "body") + except lxml.etree.ParserError as e: + # if we don't catch this, it breaks later (Issue #374) + if str(e) == "Document is empty": + return "" + # let other errors raise + raise(e) + + # output is a float, for no real reason at all + self._paragraph_count = int(document.xpath('count(//p)')) + return self._paragraph_count + + @property + def remaining_paragraph_count(self): + """Return the remaining paragraph count for this post (does not include teaser).""" + if self._remaining_paragraph_count is None: + try: + # Just asking self.text() is easier here. + document = lxml.html.fragment_fromstring(self.text(teaser_only=True, show_read_more_link=False), "body") + except lxml.etree.ParserError as e: + # if we don't catch this, it breaks later (Issue #374) + if str(e) == "Document is empty": + return "" + # let other errors raise + raise(e) + + self._remaining_paragraph_count = self.paragraph_count - int(document.xpath('count(//p)')) + return self._remaining_paragraph_count + def source_link(self, lang=None): """Return absolute link to the post's source.""" return "/" + self.destination_path( @@ -524,7 +625,7 @@ def re_meta(line, match=None): return (None,) -def _get_metadata_from_filename_by_regex(filename, metadata_regexp): +def _get_metadata_from_filename_by_regex(filename, metadata_regexp, unslugify_titles): """ Tries to ried the metadata from the filename based on the given re. This requires to use symbolic group names in the pattern. @@ -538,7 +639,11 @@ def _get_metadata_from_filename_by_regex(filename, metadata_regexp): if match: # .items() for py3k compat. for key, value in match.groupdict().items(): - meta[key.lower()] = value # metadata must be lowercase + k = key.lower().strip() # metadata must be lowercase + if k == 'title' and unslugify_titles: + meta[k] = unslugify(value, discard_numbers=False) + else: + meta[k] = value return meta @@ -620,29 +725,43 @@ def get_metadata_from_meta_file(path, config=None, lang=None): if os.path.isfile(meta_path): with codecs.open(meta_path, "r", "utf8") as meta_file: meta_data = meta_file.readlines() - while len(meta_data) < 7: - meta_data.append("") - (title, slug, date, tags, link, description, _type) = [ - x.strip() for x in meta_data][:7] - - meta = {} - - if title: - meta['title'] = title - if slug: - meta['slug'] = slug - if date: - meta['date'] = date - if tags: - meta['tags'] = tags - if link: - meta['link'] = link - if description: - meta['description'] = description - if _type: - meta['type'] = _type - return meta + # Detect new-style metadata. + newstyleregexp = re.compile(r'\.\. .*?: .*') + newstylemeta = False + for l in meta_data: + if l.strip(): + if re.match(newstyleregexp, l): + newstylemeta = True + + if newstylemeta: + # New-style metadata is basically the same as reading metadata from + # a 1-file post. + return get_metadata_from_file(path, config, lang) + else: + while len(meta_data) < 7: + meta_data.append("") + (title, slug, date, tags, link, description, _type) = [ + x.strip() for x in meta_data][:7] + + meta = {} + + if title: + meta['title'] = title + if slug: + meta['slug'] = slug + if date: + meta['date'] = date + if tags: + meta['tags'] = tags + if link: + meta['link'] = link + if description: + meta['description'] = description + if _type: + meta['type'] = _type + + return meta elif lang: # Metadata file doesn't exist, but not default language, @@ -653,11 +772,12 @@ def get_metadata_from_meta_file(path, config=None, lang=None): return {} -def get_meta(post, file_metadata_regexp=None, lang=None): +def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None): """Get post's meta from source. If ``file_metadata_regexp`` is given it will be tried to read metadata from the filename. + If ``unslugify_titles`` is True, the extracted title (if any) will be unslugified, as is done in galleries. If any metadata is then found inside the file the metadata from the file will override previous findings. """ @@ -676,7 +796,8 @@ def get_meta(post, file_metadata_regexp=None, lang=None): if file_metadata_regexp is not None: meta.update(_get_metadata_from_filename_by_regex(post.source_path, - file_metadata_regexp)) + file_metadata_regexp, + unslugify_titles)) meta.update(get_metadata_from_file(post.source_path, config, lang)) |
