diff options
Diffstat (limited to 'nikola/post.py')
| -rw-r--r-- | nikola/post.py | 389 |
1 files changed, 319 insertions, 70 deletions
diff --git a/nikola/post.py b/nikola/post.py index ac97c73..a41901d 100644 --- a/nikola/post.py +++ b/nikola/post.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2012 Roberto Alsina y otros. + +# Copyright © 2012-2013 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -23,21 +24,43 @@ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from __future__ import unicode_literals, print_function +from __future__ import unicode_literals, print_function, absolute_import import codecs from collections import defaultdict +import datetime import os import re import string +try: + from urlparse import urljoin +except ImportError: + from urllib.parse import urljoin # NOQA import lxml.html - -from .utils import to_datetime, slugify, bytes_str, Functionary, LocaleBorg +try: + import pyphen +except ImportError: + pyphen = None +import pytz + +# for tearDown with _reload we cannot use 'from import' to get forLocaleBorg +import nikola.utils +from .utils import ( + bytes_str, + current_time, + Functionary, + LOGGER, + slugify, + to_datetime, + unicode_str, +) +from .rc4 import rc4 __all__ = ['Post'] TEASER_REGEXP = re.compile('<!--\s*TEASER_END(:(.+))?\s*-->', re.IGNORECASE) +READ_MORE_LINK = '<p class="more"><a href="{link}">{read_more}…</a></p>' class Post(object): @@ -45,60 +68,90 @@ class Post(object): """Represents a blog post or web page.""" def __init__( - self, source_path, cache_folder, destination, use_in_feeds, - translations, default_lang, base_url, messages, template_name, - file_metadata_regexp=None, strip_index_html=False, tzinfo=None, - skip_untranslated=False, + self, + source_path, + config, + destination, + use_in_feeds, + messages, + template_name, + compiler ): """Initialize post. - The source path is the .txt post file. From it we calculate + The source path is the user created post file. From it we calculate the meta file, as well as any translations available, and the .html fragment file path. """ + self.compiler = compiler + self.config = config + tzinfo = None + if self.config['TIMEZONE'] is not None: + tzinfo = pytz.timezone(self.config['TIMEZONE']) + if self.config['FUTURE_IS_NOW']: + self.current_time = None + else: + self.current_time = current_time(tzinfo) self.translated_to = set([]) self._prev_post = None self._next_post = None - self.base_url = base_url + self.base_url = self.config['BASE_URL'] self.is_draft = False + self.is_retired = False self.is_mathjax = False - self.strip_index_html = strip_index_html + self.strip_indexes = self.config['STRIP_INDEXES'] + self.index_file = self.config['INDEX_FILE'] + self.pretty_urls = self.config['PRETTY_URLS'] self.source_path = source_path # posts/blah.txt self.post_name = os.path.splitext(source_path)[0] # posts/blah + # cache[\/]posts[\/]blah.html + self.base_path = os.path.join(self.config['CACHE_FOLDER'], self.post_name + ".html") # cache/posts/blah.html - self.base_path = os.path.join(cache_folder, self.post_name + ".html") + self._base_path = self.base_path.replace('\\', '/') self.metadata_path = self.post_name + ".meta" # posts/blah.meta self.folder = destination - self.translations = translations - self.default_lang = default_lang + self.translations = self.config['TRANSLATIONS'] + self.default_lang = self.config['DEFAULT_LANG'] self.messages = messages - self.skip_untranslated = skip_untranslated + self.skip_untranslated = self.config['HIDE_UNTRANSLATED_POSTS'] self._template_name = template_name + self.is_two_file = True + self.hyphenate = self.config['HYPHENATE'] + self._reading_time = None - default_metadata = get_meta(self, file_metadata_regexp) + default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP']) self.meta = Functionary(lambda: None, self.default_lang) - self.meta[default_lang] = default_metadata + self.meta[self.default_lang] = default_metadata # Load internationalized metadata - for lang in translations: - if lang != default_lang: + for lang in self.translations: + if lang != self.default_lang: if os.path.isfile(self.source_path + "." + lang): self.translated_to.add(lang) meta = defaultdict(lambda: '') meta.update(default_metadata) - meta.update(get_meta(self, file_metadata_regexp, lang)) + meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], lang)) self.meta[lang] = meta elif os.path.isfile(self.source_path): - self.translated_to.add(default_lang) + self.translated_to.add(self.default_lang) - if not self.is_translation_available(default_lang): + if not self.is_translation_available(self.default_lang): # Special case! (Issue #373) # Fill default_metadata with stuff from the other languages for lang in sorted(self.translated_to): default_metadata.update(self.meta[lang]) + if 'date' not in default_metadata and not use_in_feeds: + # For stories we don't *really* need a date + default_metadata['date'] = datetime.datetime.utcfromtimestamp( + os.stat(self.source_path).st_ctime) + + if tzinfo: + default_metadata['date'] = default_metadata['date'].replace( + tzinfo=pytz.UTC).astimezone(tzinfo) + if 'title' not in default_metadata or 'slug' not in default_metadata \ or 'date' not in default_metadata: raise OSError("You must set a title (found '{0}'), a slug (found " @@ -109,7 +162,9 @@ class Post(object): source_path)) # If timezone is set, build localized datetime. - self.date = to_datetime(self.meta[default_lang]['date'], tzinfo) + self.date = to_datetime(self.meta[self.default_lang]['date'], tzinfo) + + self.publish_later = False if self.current_time is None else self.date >= self.current_time is_draft = False is_retired = False @@ -123,14 +178,27 @@ class Post(object): if 'retired' in self._tags[lang]: is_retired = True self._tags[lang].remove('retired') + if 'private' in self._tags[lang]: + is_retired = True + self._tags[lang].remove('private') # While draft comes from the tags, it's not really a tag self.is_draft = is_draft - self.use_in_feeds = use_in_feeds and not is_draft and not is_retired + self.is_retired = is_retired + self.use_in_feeds = use_in_feeds and not is_draft and not is_retired \ + and not self.publish_later # If mathjax is a tag, then enable mathjax rendering support self.is_mathjax = 'mathjax' in self.tags + def _has_pretty_url(self, lang): + if self.pretty_urls and \ + self.meta[lang].get('pretty_url', '') != 'False' and \ + self.meta[lang]['slug'] != 'index': + return True + else: + return False + @property def alltags(self): """This is ALL the tags for this post.""" @@ -141,7 +209,7 @@ class Post(object): @property def tags(self): - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang if lang in self._tags: return self._tags[lang] elif self.default_lang in self._tags: @@ -151,7 +219,7 @@ class Post(object): @property def prev_post(self): - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang rv = self._prev_post while self.skip_untranslated: if rv is None: @@ -167,7 +235,7 @@ class Post(object): @property def next_post(self): - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang rv = self._next_post while self.skip_untranslated: if rv is None: @@ -202,33 +270,20 @@ class Post(object): fmt_date = fmt_date.decode('utf8') return fmt_date - def current_lang(self): - """Return the currently set locale, if it's one of the - available translations, or default_lang.""" - lang = LocaleBorg().current_lang - if lang: - if lang in self.translations: - return lang - lang = lang.split('_')[0] - if lang in self.translations: - return lang - # whatever - return self.default_lang - def title(self, lang=None): """Return localized title. - If lang is not specified, it will use the currently set locale, - because templates set it. + If lang is not specified, it defaults to the current language from + templates, as set in LocaleBorg. """ if lang is None: - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang return self.meta[lang]['title'] def description(self, lang=None): """Return localized description.""" if lang is None: - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang return self.meta[lang]['description'] def deps(self, lang): @@ -241,6 +296,32 @@ class Post(object): deps += self.fragment_deps(lang) return deps + def compile(self, lang): + """Generate the cache/ file with the compiled post.""" + + def wrap_encrypt(path, password): + """Wrap a post with encryption.""" + with codecs.open(path, 'rb+', 'utf8') as inf: + data = inf.read() + "<!--tail-->" + data = CRYPT.substitute(data=rc4(password, data)) + with codecs.open(path, 'wb+', 'utf8') as outf: + outf.write(data) + + self.READ_MORE_LINK = self.config['READ_MORE_LINK'] + dest = self.translated_base_path(lang) + if not self.is_translation_available(lang) and self.config['HIDE_UNTRANSLATED_POSTS']: + return + else: + self.compiler( + self.translated_source_path(lang), + dest, + self.is_two_file), + if self.meta('password'): + wrap_encrypt(dest, self.meta('password')) + if self.publish_later: + LOGGER.notice('{0} is scheduled to be published in the future ({1})'.format( + self.source_path, self.date)) + def fragment_deps(self, lang): """Return a list of dependencies to build this post's fragment.""" deps = [] @@ -274,6 +355,13 @@ class Post(object): else: return '.'.join((self.source_path, sorted(self.translated_to)[0])) + def translated_base_path(self, lang): + """Return path to the translation's base_path file.""" + if lang == self.default_lang: + return self.base_path + else: + return '.'.join((self.base_path, lang)) + def _translated_file_path(self, lang): """Return path to the translation's file, or to the original.""" if lang in self.translated_to: @@ -286,60 +374,125 @@ class Post(object): else: return '.'.join((self.base_path, sorted(self.translated_to)[0])) - def text(self, lang=None, teaser_only=False, strip_html=False): - """Read the post file for that language and return its contents.""" + def text(self, lang=None, teaser_only=False, strip_html=False, really_absolute=False): + """Read the post file for that language and return its contents. + + teaser_only=True breaks at the teaser marker and returns only the teaser. + strip_html=True removes HTML tags + lang=None uses the last used to set locale + + All links in the returned HTML will be relative. + The HTML returned is a bare fragment, not a full document. + """ if lang is None: - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang file_name = self._translated_file_path(lang) with codecs.open(file_name, "r", "utf8") as post_file: data = post_file.read().strip() - try: - document = lxml.html.document_fromstring(data) + document = lxml.html.fragment_fromstring(data, "body") except lxml.etree.ParserError as e: # if we don't catch this, it breaks later (Issue #374) if str(e) == "Document is empty": return "" # let other errors raise raise(e) - document.make_links_absolute(self.permalink(lang=lang)) + base_url = self.permalink(lang=lang, absolute=really_absolute) + document.make_links_absolute(base_url) + + if self.hyphenate: + hyphenate(document, lang) + data = lxml.html.tostring(document, encoding='unicode') + # data here is a full HTML doc, including HTML and BODY tags + # which is not ideal (Issue #464) + try: + body = document.body + data = (body.text or '') + ''.join( + [lxml.html.tostring(child, encoding='unicode') + for child in body.iterchildren()]) + except IndexError: # No body there, it happens sometimes + pass + if teaser_only: teaser = TEASER_REGEXP.split(data)[0] if teaser != data: - teaser_str = self.messages[lang]["Read more"] + '...' - teaser += '<p><a href="{0}">{1}</a></p>'.format( - self.permalink(lang), teaser_str) + if not strip_html: + if TEASER_REGEXP.search(data).groups()[-1]: + teaser += '<p class="more"><a href="{0}">{1}</a></p>'.format( + self.permalink(lang, absolute=really_absolute), + TEASER_REGEXP.search(data).groups()[-1]) + else: + teaser += READ_MORE_LINK.format( + link=self.permalink(lang, absolute=really_absolute), + read_more=self.messages[lang]["Read more"]) # This closes all open tags and sanitizes the broken HTML document = lxml.html.fromstring(teaser) data = lxml.html.tostring(document, encoding='unicode') if data and strip_html: - content = lxml.html.fromstring(data) - data = content.text_content().strip() # No whitespace wanted. + try: + # Not all posts have a body. For example, you may have a page statically defined in the template that does not take content as input. + content = lxml.html.fromstring(data) + data = content.text_content().strip() # No whitespace wanted. + except lxml.etree.ParserError: + data = "" return data - def destination_path(self, lang, extension='.html'): - path = os.path.join(self.translations[lang], - self.folder, self.meta[lang]['slug'] + extension) + @property + def reading_time(self): + """Reading time based on length of text. + """ + if self._reading_time is None: + text = self.text(strip_html=True) + words_per_minute = 180 + words = len(text.split()) + self._reading_time = int(round(words / words_per_minute)) or 1 + return self._reading_time + + def source_link(self, lang=None): + """Return absolute link to the post's source.""" + return "/" + self.destination_path( + lang=lang, + extension=self.source_ext(), + sep='/') + + def destination_path(self, lang=None, extension='.html', sep=os.sep): + """Destination path for this post, relative to output/. + + If lang is not specified, it's the current language. + Extension is used in the path if specified. + """ + if lang is None: + lang = nikola.utils.LocaleBorg().current_lang + if self._has_pretty_url(lang): + path = os.path.join(self.translations[lang], + self.folder, self.meta[lang]['slug'], 'index' + extension) + else: + path = os.path.join(self.translations[lang], + self.folder, self.meta[lang]['slug'] + extension) + if sep != os.sep: + path = path.replace(os.sep, sep) return path def permalink(self, lang=None, absolute=False, extension='.html'): if lang is None: - lang = self.current_lang() + lang = nikola.utils.LocaleBorg().current_lang pieces = self.translations[lang].split(os.sep) pieces += self.folder.split(os.sep) - pieces += [self.meta[lang]['slug'] + extension] + if self._has_pretty_url(lang): + pieces += [self.meta[lang]['slug'], 'index' + extension] + else: + pieces += [self.meta[lang]['slug'] + extension] pieces = [_f for _f in pieces if _f and _f != '.'] + link = '/' + '/'.join(pieces) if absolute: - pieces = [self.base_url] + pieces - else: - pieces = [""] + pieces - link = "/".join(pieces) - if self.strip_index_html and link.endswith('/index.html'): - return link[:-10] + link = urljoin(self.base_url, link) + index_len = len(self.index_file) + if self.strip_indexes and link[-(1 + index_len):] == '/' + self.index_file: + return link[:-index_len] else: return link @@ -391,6 +544,8 @@ def get_metadata_from_file(source_path, lang=None): with codecs.open(source_path, "r", "utf8") as meta_file: meta_data = [x.strip() for x in meta_file.readlines()] return _get_metadata_from_file(meta_data) + except (UnicodeDecodeError, UnicodeEncodeError): + raise ValueError('Error reading {0}: Nikola only supports UTF-8 files'.format(source_path)) except Exception: # The file may not exist, for multilingual sites return {} @@ -407,8 +562,10 @@ def _get_metadata_from_file(meta_data): 'FooBar' >>> str(g([".. title: FooBar"])["title"]) 'FooBar' - >>> 'title' in g(["",".. title: FooBar"]) + >>> 'title' in g(["","",".. title: FooBar"]) False + >>> 'title' in g(["",".. title: FooBar"]) # for #520 + True """ meta = {} @@ -420,7 +577,11 @@ def _get_metadata_from_file(meta_data): string.punctuation))) for i, line in enumerate(meta_data): - if not line: + # txt2tags requires an empty line at the beginning + # and since we are here because it's a 1-file post + # let's be flexible on what we accept, so, skip empty + # first lines. + if not line and i > 0: break if 'title' not in meta: match = re_meta(line, 'title') @@ -469,6 +630,12 @@ def get_metadata_from_meta_file(path, lang=None): meta['description'] = description return meta + + elif lang: + # Metadata file doesn't exist, but not default language, + # So, if default language metadata exists, return that. + # This makes the 2-file format detection more reliable (Issue #525) + return get_metadata_from_meta_file(path, lang=None) else: return {} @@ -487,6 +654,7 @@ def get_meta(post, file_metadata_regexp=None, lang=None): if meta: return meta + post.is_two_file = False if file_metadata_regexp is not None: meta.update(_get_metadata_from_filename_by_regex(post.source_path, @@ -499,8 +667,8 @@ def get_meta(post, file_metadata_regexp=None, lang=None): if 'slug' not in meta: # If no slug is found in the metadata use the filename - meta['slug'] = slugify(os.path.splitext( - os.path.basename(post.source_path))[0]) + meta['slug'] = slugify(unicode_str(os.path.splitext( + os.path.basename(post.source_path))[0])) if 'title' not in meta: # If no title is found, use the filename without extension @@ -508,3 +676,84 @@ def get_meta(post, file_metadata_regexp=None, lang=None): os.path.basename(post.source_path))[0] return meta + + +def hyphenate(dom, lang): + if pyphen is not None: + hyphenator = pyphen.Pyphen(lang=lang) + for tag in ('p', 'li', 'span'): + for node in dom.xpath("//%s[not(parent::pre)]" % tag): + insert_hyphens(node, hyphenator) + return dom + + +def insert_hyphens(node, hyphenator): + textattrs = ('text', 'tail') + if isinstance(node, lxml.etree._Entity): + # HTML entities have no .text + textattrs = ('tail',) + for attr in textattrs: + text = getattr(node, attr) + if not text: + continue + new_data = ' '.join([hyphenator.inserted(w, hyphen='\u00AD') + for w in text.split(' ')]) + # Spaces are trimmed, we have to add them manually back + if text[0].isspace(): + new_data = ' ' + new_data + if text[-1].isspace(): + new_data += ' ' + setattr(node, attr, new_data) + + for child in node.iterchildren(): + insert_hyphens(child, hyphenator) + + +CRYPT = string.Template("""\ +<script> +function rc4(key, str) { + var s = [], j = 0, x, res = ''; + for (var i = 0; i < 256; i++) { + s[i] = i; + } + for (i = 0; i < 256; i++) { + j = (j + s[i] + key.charCodeAt(i % key.length)) % 256; + x = s[i]; + s[i] = s[j]; + s[j] = x; + } + i = 0; + j = 0; + for (var y = 0; y < str.length; y++) { + i = (i + 1) % 256; + j = (j + s[i]) % 256; + x = s[i]; + s[i] = s[j]; + s[j] = x; + res += String.fromCharCode(str.charCodeAt(y) ^ s[(s[i] + s[j]) % 256]); + } + return res; +} +function decrypt() { + key = $$("#key").val(); + crypt_div = $$("#encr") + crypted = crypt_div.html(); + decrypted = rc4(key, window.atob(crypted)); + if (decrypted.substr(decrypted.length - 11) == "<!--tail-->"){ + crypt_div.html(decrypted); + $$("#pwform").hide(); + crypt_div.show(); + } else { alert("Wrong password"); }; +} +</script> + +<div id="encr" style="display: none;">${data}</div> +<div id="pwform"> +<form onsubmit="javascript:decrypt(); return false;" class="form-inline"> +<fieldset> +<legend>This post is password-protected.</legend> +<input type="password" id="key" placeholder="Type password here"> +<button type="submit" class="btn">Show Content</button> +</fieldset> +</form> +</div>""") |
