diff options
Diffstat (limited to 'nikola/post.py')
| -rw-r--r-- | nikola/post.py | 287 |
1 files changed, 240 insertions, 47 deletions
diff --git a/nikola/post.py b/nikola/post.py index 350014a..466d5e0 100644 --- a/nikola/post.py +++ b/nikola/post.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright © 2012-2014 Roberto Alsina and others. +# Copyright © 2012-2015 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -29,6 +29,8 @@ from __future__ import unicode_literals, print_function, absolute_import import io from collections import defaultdict import datetime +import hashlib +import json import os import re import string @@ -37,6 +39,8 @@ try: except ImportError: from urllib.parse import urljoin # NOQA +from . import utils + import dateutil.tz import lxml.html import natsort @@ -67,6 +71,7 @@ from .rc4 import rc4 __all__ = ['Post'] TEASER_REGEXP = re.compile('<!--\s*TEASER_END(:(.+))?\s*-->', re.IGNORECASE) +_UPGRADE_METADATA_ADVERTISED = False class Post(object): @@ -122,13 +127,18 @@ class Post(object): self.skip_untranslated = not self.config['SHOW_UNTRANSLATED_POSTS'] self._template_name = template_name self.is_two_file = True + self.newstylemeta = True self.hyphenate = self.config['HYPHENATE'] self._reading_time = None self._remaining_reading_time = None self._paragraph_count = None self._remaining_paragraph_count = None + self._dependency_file_fragment = defaultdict(list) + self._dependency_file_page = defaultdict(list) + self._dependency_uptodate_fragment = defaultdict(list) + self._dependency_uptodate_page = defaultdict(list) - default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES']) + default_metadata, self.newstylemeta = get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES']) self.meta = Functionary(lambda: None, self.default_lang) self.meta[self.default_lang] = default_metadata @@ -140,7 +150,9 @@ class Post(object): if lang != self.default_lang: meta = defaultdict(lambda: '') meta.update(default_metadata) - meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES'], lang)) + _meta, _nsm = get_meta(self, self.config['FILE_METADATA_REGEXP'], self.config['UNSLUGIFY_TITLES'], lang) + self.newstylemeta = self.newstylemeta and _nsm + meta.update(_meta) self.meta[lang] = meta if not self.is_translation_available(self.default_lang): @@ -157,6 +169,14 @@ class Post(object): default_metadata['date'] = datetime.datetime.utcfromtimestamp( os.stat(self.source_path).st_ctime).replace(tzinfo=dateutil.tz.tzutc()).astimezone(tzinfo) + # If time zone is set, build localized datetime. + self.date = to_datetime(self.meta[self.default_lang]['date'], tzinfo) + + if 'updated' not in default_metadata: + default_metadata['updated'] = default_metadata.get('date', None) + + self.updated = to_datetime(default_metadata['updated']) + if 'title' not in default_metadata or 'slug' not in default_metadata \ or 'date' not in default_metadata: raise OSError("You must set a title (found '{0}'), a slug (found " @@ -170,9 +190,6 @@ class Post(object): # default value is 'text' default_metadata['type'] = 'text' - # If time zone is set, build localized datetime. - self.date = to_datetime(self.meta[self.default_lang]['date'], tzinfo) - self.publish_later = False if self.current_time is None else self.date >= self.current_time is_draft = False @@ -180,9 +197,10 @@ class Post(object): self._tags = {} for lang in self.translated_to: self._tags[lang] = natsort.natsorted( - list(set([x.strip() for x in self.meta[lang]['tags'].split(',')]))) + list(set([x.strip() for x in self.meta[lang]['tags'].split(',')])), + alg=natsort.ns.F | natsort.ns.IC) self._tags[lang] = [t for t in self._tags[lang] if t] - if 'draft' in self._tags[lang]: + if 'draft' in [_.lower() for _ in self._tags[lang]]: is_draft = True LOGGER.debug('The post "{0}" is a draft.'.format(self.source_path)) self._tags[lang].remove('draft') @@ -206,11 +224,26 @@ class Post(object): self.use_in_feeds = use_in_feeds and not is_draft and not is_private \ and not self.publish_later - # If mathjax is a tag, then enable mathjax rendering support - self.is_mathjax = 'mathjax' in self.tags + # If mathjax is a tag, or it's a ipynb post, then enable mathjax rendering support + self.is_mathjax = ('mathjax' in self.tags) or (self.compiler.name == 'ipynb') + + # Register potential extra dependencies + self.compiler.register_extra_dependencies(self) def __repr__(self): - return '<Post: {0}>'.format(self.source_path) + # Calculate a hash that represents most data about the post + m = hashlib.md5() + # source_path modification date (to avoid reading it) + m.update(utils.unicode_str(os.stat(self.source_path).st_mtime).encode('utf-8')) + clean_meta = {} + for k, v in self.meta.items(): + sub_meta = {} + clean_meta[k] = sub_meta + for kk, vv in v.items(): + if vv: + sub_meta[kk] = vv + m.update(utils.unicode_str(json.dumps(clean_meta, cls=utils.CustomEncoder, sort_keys=True)).encode('utf-8')) + return '<Post: {0!r} {1}>'.format(self.source_path, m.hexdigest()) def _has_pretty_url(self, lang): if self.pretty_urls and \ @@ -274,14 +307,20 @@ class Post(object): def template_name(self): return self.meta('template') or self._template_name - def formatted_date(self, date_format): + def formatted_date(self, date_format, date=None): """Return the formatted date, as unicode.""" - fmt_date = self.date.strftime(date_format) + if date: + fmt_date = date.strftime(date_format) + else: + fmt_date = self.date.strftime(date_format) # Issue #383, this changes from py2 to py3 if isinstance(fmt_date, bytes_str): fmt_date = fmt_date.decode('utf8') return fmt_date + def formatted_updated(self, date_format): + return self.formatted_date(date_format, self.updated) + def title(self, lang=None): """Return localized title. @@ -313,8 +352,76 @@ class Post(object): lang = nikola.utils.LocaleBorg().current_lang return self.meta[lang]['description'] + def add_dependency(self, dependency, add='both', lang=None): + """Adds a file dependency for tasks using that post. + + The ``dependency`` should be a string specifying a path, or a callable + which returns such a string or a list of strings. + + The ``add`` parameter can be 'both', 'fragment' or 'page', to indicate + that this dependency shall be used + * when rendering the fragment to HTML ('fragment' and 'both'), or + * when creating a page with parts of the ``Post`` embedded, which + includes the HTML resulting from compiling the fragment ('page' or + 'both'). + + If ``lang`` is not specified, this dependency is added for all languages.""" + if add not in {'fragment', 'page', 'both'}: + raise Exception("Add parameter is '{0}', but must be either 'fragment', 'page', or 'both'.".format(add)) + if add == 'fragment' or add == 'both': + self._dependency_file_fragment[lang].append((type(dependency) != str, dependency)) + if add == 'page' or add == 'both': + self._dependency_file_page[lang].append((type(dependency) != str, dependency)) + + def add_dependency_uptodate(self, dependency, is_callable=False, add='both', lang=None): + """Adds a dependency for task's ``uptodate`` for tasks using that post. + + This can be for example an ``utils.config_changed`` object, or a list of + such objects. + + The ``is_callable`` parameter specifies whether ``dependency`` is a + callable which generates an entry or a list of entries for the ``uptodate`` + list, or whether it is an entry which can directly be added (as a single + object or a list of objects). + + The ``add`` parameter can be 'both', 'fragment' or 'page', to indicate + that this dependency shall be used + * when rendering the fragment to HTML ('fragment' and 'both'), or + * when creating a page with parts of the ``Post`` embedded, which + includes the HTML resulting from compiling the fragment ('page' or + 'both'). + + If ``lang`` is not specified, this dependency is added for all languages. + + Example: + + post.add_dependency_uptodate( + utils.config_changed({1: some_data}, 'uniqueid'), False, 'page') + + """ + if add == 'fragment' or add == 'both': + self._dependency_uptodate_fragment[lang].append((is_callable, dependency)) + if add == 'page' or add == 'both': + self._dependency_uptodate_page[lang].append((is_callable, dependency)) + + def _get_dependencies(self, deps_list): + deps = [] + for dep in deps_list: + if dep[0]: + # callable + result = dep[1]() + else: + # can add directly + result = dep[1] + # if result is a list, add its contents + if type(result) == list: + deps.extend(result) + else: + deps.append(result) + return deps + def deps(self, lang): - """Return a list of dependencies to build this post's page.""" + """Return a list of file dependencies to build this post's page.""" deps = [] if self.default_lang in self.translated_to: deps.append(self.base_path) @@ -324,6 +431,19 @@ class Post(object): cand_2 = get_translation_candidate(self.config, self.base_path, lang) if os.path.exists(cand_1): deps.extend([cand_1, cand_2]) + deps += self._get_dependencies(self._dependency_file_page[lang]) + deps += self._get_dependencies(self._dependency_file_page[None]) + return deps + + def deps_uptodate(self, lang): + """Return a list of uptodate dependencies to build this post's page. + + These dependencies should be included in ``uptodate`` for the task + which generates the page.""" + deps = [] + deps += self._get_dependencies(self._dependency_uptodate_page[lang]) + deps += self._get_dependencies(self._dependency_uptodate_page[None]) + deps.append(utils.config_changed({1: sorted(self.compiler.config_dependencies)}, 'nikola.post.Post.deps_uptodate:compiler:' + self.source_path)) return deps def compile(self, lang): @@ -347,34 +467,41 @@ class Post(object): dest, self.is_two_file), if self.meta('password'): + # TODO: get rid of this feature one day (v8?; warning added in v7.3.0.) + LOGGER.warn("The post {0} is using the `password` attribute, which may stop working in the future.") + LOGGER.warn("Please consider switching to a more secure method of encryption.") + LOGGER.warn("More details: https://github.com/getnikola/nikola/issues/1547") wrap_encrypt(dest, self.meta('password')) if self.publish_later: LOGGER.notice('{0} is scheduled to be published in the future ({1})'.format( self.source_path, self.date)) - def extra_deps(self): - """get extra depepencies from .dep files - This file is created by ReST - """ - dep_path = self.base_path + '.dep' - if os.path.isfile(dep_path): - with io.open(dep_path, 'r+', encoding='utf8') as depf: - return [l.strip() for l in depf.readlines()] - return [] - def fragment_deps(self, lang): - """Return a list of dependencies to build this post's fragment.""" + """Return a list of uptodate dependencies to build this post's fragment. + + These dependencies should be included in ``uptodate`` for the task + which generates the fragment.""" deps = [] if self.default_lang in self.translated_to: deps.append(self.source_path) if os.path.isfile(self.metadata_path): deps.append(self.metadata_path) - deps.extend(self.extra_deps()) lang_deps = [] if lang != self.default_lang: lang_deps = [get_translation_candidate(self.config, d, lang) for d in deps] deps += lang_deps - return [d for d in deps if os.path.exists(d)] + deps = [d for d in deps if os.path.exists(d)] + deps += self._get_dependencies(self._dependency_file_fragment[lang]) + deps += self._get_dependencies(self._dependency_file_fragment[None]) + return deps + + def fragment_deps_uptodate(self, lang): + """Return a list of file dependencies to build this post's fragment.""" + deps = [] + deps += self._get_dependencies(self._dependency_uptodate_fragment[lang]) + deps += self._get_dependencies(self._dependency_uptodate_fragment[None]) + deps.append(utils.config_changed({1: sorted(self.compiler.config_dependencies)}, 'nikola.post.Post.deps_uptodate:compiler:' + self.source_path)) + return deps def is_translation_available(self, lang): """Return true if the translation actually exists.""" @@ -408,7 +535,8 @@ class Post(object): else: return get_translation_candidate(self.config, self.base_path, sorted(self.translated_to)[0]) - def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_link=True, rss_read_more_link=False): + def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_link=True, + rss_read_more_link=False, rss_links_append_query=None): """Read the post file for that language and return its contents. teaser_only=True breaks at the teaser marker and returns only the teaser. @@ -424,8 +552,16 @@ class Post(object): if lang is None: lang = nikola.utils.LocaleBorg().current_lang file_name = self._translated_file_path(lang) + + # Yes, we compile it and screw it. + # This may be controversial, but the user (or someone) is asking for the post text + # and the post should not just refuse to give it. + if not os.path.isfile(file_name): + self.compile(lang) + with io.open(file_name, "r", encoding="utf8") as post_file: data = post_file.read().strip() + if self.compiler.extension() == '.php': return data try: @@ -458,7 +594,7 @@ class Post(object): else: l = self.config['RSS_READ_MORE_LINK'](lang) if rss_read_more_link else self.config['INDEX_READ_MORE_LINK'](lang) teaser += l.format( - link=self.permalink(lang), + link=self.permalink(lang, query=rss_links_append_query), read_more=self.messages[lang]["Read more"], min_remaining_read=self.messages[lang]["%d min remaining to read"] % (self.remaining_reading_time), reading_time=self.reading_time, @@ -498,7 +634,12 @@ class Post(object): text = self.text(strip_html=True) words_per_minute = 220 words = len(text.split()) - self._reading_time = int(ceil(words / words_per_minute)) or 1 + markup = lxml.html.fromstring(self.text(strip_html=False)) + embeddables = [".//img", ".//picture", ".//video", ".//audio", ".//object", ".//iframe"] + media_time = 0 + for embedded in embeddables: + media_time += (len(markup.findall(embedded)) * 0.33) # +20 seconds + self._reading_time = int(ceil((words / words_per_minute) + media_time)) or 1 return self._reading_time @property @@ -574,9 +715,11 @@ class Post(object): self.folder, self.meta[lang]['slug'] + extension) if sep != os.sep: path = path.replace(os.sep, sep) + if path.startswith('./'): + path = path[2:] return path - def permalink(self, lang=None, absolute=False, extension='.html'): + def permalink(self, lang=None, absolute=False, extension='.html', query=None): if lang is None: lang = nikola.utils.LocaleBorg().current_lang @@ -596,9 +739,24 @@ class Post(object): link = urljoin(self.base_url, link[1:]) index_len = len(self.index_file) if self.strip_indexes and link[-(1 + index_len):] == '/' + self.index_file: - return link[:-index_len] - else: - return link + link = link[:-index_len] + if query: + link = link + "?" + query + return link + + @property + def previewimage(self, lang=None): + if lang is None: + lang = nikola.utils.LocaleBorg().current_lang + + image_path = self.meta[lang]['previewimage'] + + if not image_path: + return None + + # This is further parsed by the template, because we don’t have access + # to the URL replacer here. (Issue #1473) + return image_path def source_ext(self, prefix=False): """ @@ -679,6 +837,8 @@ def _get_metadata_from_file(meta_data): >>> g = _get_metadata_from_file >>> list(g([]).values()) [] + >>> str(g(["======","FooBar","======"])["title"]) + 'FooBar' >>> str(g(["FooBar","======"])["title"]) 'FooBar' >>> str(g(["#FooBar"])["title"]) @@ -714,6 +874,10 @@ def _get_metadata_from_file(meta_data): if re_rst_title.findall(line) and i > 0: meta['title'] = meta_data[i - 1].strip() if 'title' not in meta: + if (re_rst_title.findall(line) and i >= 0 and + re_rst_title.findall(meta_data[i + 2])): + meta['title'] = meta_data[i + 1].strip() + if 'title' not in meta: if re_md_title.findall(line): meta['title'] = re_md_title.findall(line)[0] @@ -726,6 +890,7 @@ def _get_metadata_from_file(meta_data): def get_metadata_from_meta_file(path, config=None, lang=None): """Takes a post path, and gets data from a matching .meta file.""" + global _UPGRADE_METADATA_ADVERTISED meta_path = os.path.splitext(path)[0] + '.meta' if lang and config: meta_path = get_translation_candidate(config, meta_path, lang) @@ -746,8 +911,12 @@ def get_metadata_from_meta_file(path, config=None, lang=None): if newstylemeta: # New-style metadata is basically the same as reading metadata from # a 1-file post. - return get_metadata_from_file(path, config, lang) + return get_metadata_from_file(path, config, lang), newstylemeta else: + if not _UPGRADE_METADATA_ADVERTISED: + LOGGER.warn("Some posts on your site have old-style metadata. You should upgrade them to the new format, with support for extra fields.") + LOGGER.warn("Install the 'upgrade_metadata' plugin (with 'nikola plugin -i upgrade_metadata') and run 'nikola upgrade_metadata'.") + _UPGRADE_METADATA_ADVERTISED = True while len(meta_data) < 7: meta_data.append("") (title, slug, date, tags, link, description, _type) = [ @@ -770,7 +939,7 @@ def get_metadata_from_meta_file(path, config=None, lang=None): if _type: meta['type'] = _type - return meta + return meta, newstylemeta elif lang: # Metadata file doesn't exist, but not default language, @@ -778,7 +947,7 @@ def get_metadata_from_meta_file(path, config=None, lang=None): # This makes the 2-file format detection more reliable (Issue #525) return get_metadata_from_meta_file(path, config, lang=None) else: - return {} + return {}, True def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None): @@ -797,18 +966,24 @@ def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None) except AttributeError: config = None - meta.update(get_metadata_from_meta_file(post.metadata_path, config, lang)) + _, newstylemeta = get_metadata_from_meta_file(post.metadata_path, config, lang) + meta.update(_) - if meta: - return meta - post.is_two_file = False + if not meta: + post.is_two_file = False if file_metadata_regexp is not None: meta.update(_get_metadata_from_filename_by_regex(post.source_path, file_metadata_regexp, unslugify_titles)) - meta.update(get_metadata_from_file(post.source_path, config, lang)) + if getattr(post, 'compiler', None): + compiler_meta = post.compiler.read_metadata(post, file_metadata_regexp, unslugify_titles, lang) + meta.update(compiler_meta) + + if not post.is_two_file: + # Meta file has precedence over file, which can contain garbage. + meta.update(get_metadata_from_file(post.source_path, config, lang)) if lang is None: # Only perform these checks for the default language @@ -823,15 +998,33 @@ def get_meta(post, file_metadata_regexp=None, unslugify_titles=False, lang=None) meta['title'] = os.path.splitext( os.path.basename(post.source_path))[0] - return meta + return meta, newstylemeta -def hyphenate(dom, lang): - if pyphen is not None: - hyphenator = pyphen.Pyphen(lang=lang) +def hyphenate(dom, _lang): + # circular import prevention + from .nikola import LEGAL_VALUES + lang = LEGAL_VALUES['PYPHEN_LOCALES'].get(_lang, pyphen.language_fallback(_lang)) + if pyphen is not None and lang is not None: + # If pyphen does exist, we tell the user when configuring the site. + # If it does not support a language, we ignore it quietly. + try: + hyphenator = pyphen.Pyphen(lang=lang) + except KeyError: + LOGGER.error("Cannot find hyphenation dictoniaries for {0} (from {1}).".format(lang, _lang)) + LOGGER.error("Pyphen cannot be installed to ~/.local (pip install --user).") for tag in ('p', 'li', 'span'): for node in dom.xpath("//%s[not(parent::pre)]" % tag): - insert_hyphens(node, hyphenator) + skip_node = False + skippable_nodes = ['kbd', 'code', 'samp', 'mark', 'math', 'data', 'ruby', 'svg'] + if node.getchildren(): + for child in node.getchildren(): + if child.tag in skippable_nodes or (child.tag == 'span' and 'math' in child.get('class', [])): + skip_node = True + elif 'math' in node.get('class', []): + skip_node = True + if not skip_node: + insert_hyphens(node, hyphenator) return dom |
