aboutsummaryrefslogtreecommitdiffstats
path: root/nikola/post.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/post.py')
-rw-r--r--nikola/post.py389
1 files changed, 319 insertions, 70 deletions
diff --git a/nikola/post.py b/nikola/post.py
index ac97c73..a41901d 100644
--- a/nikola/post.py
+++ b/nikola/post.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright (c) 2012 Roberto Alsina y otros.
+
+# Copyright © 2012-2013 Roberto Alsina and others.
# Permission is hereby granted, free of charge, to any
# person obtaining a copy of this software and associated
@@ -23,21 +24,43 @@
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-from __future__ import unicode_literals, print_function
+from __future__ import unicode_literals, print_function, absolute_import
import codecs
from collections import defaultdict
+import datetime
import os
import re
import string
+try:
+ from urlparse import urljoin
+except ImportError:
+ from urllib.parse import urljoin # NOQA
import lxml.html
-
-from .utils import to_datetime, slugify, bytes_str, Functionary, LocaleBorg
+try:
+ import pyphen
+except ImportError:
+ pyphen = None
+import pytz
+
+# for tearDown with _reload we cannot use 'from import' to get forLocaleBorg
+import nikola.utils
+from .utils import (
+ bytes_str,
+ current_time,
+ Functionary,
+ LOGGER,
+ slugify,
+ to_datetime,
+ unicode_str,
+)
+from .rc4 import rc4
__all__ = ['Post']
TEASER_REGEXP = re.compile('<!--\s*TEASER_END(:(.+))?\s*-->', re.IGNORECASE)
+READ_MORE_LINK = '<p class="more"><a href="{link}">{read_more}…</a></p>'
class Post(object):
@@ -45,60 +68,90 @@ class Post(object):
"""Represents a blog post or web page."""
def __init__(
- self, source_path, cache_folder, destination, use_in_feeds,
- translations, default_lang, base_url, messages, template_name,
- file_metadata_regexp=None, strip_index_html=False, tzinfo=None,
- skip_untranslated=False,
+ self,
+ source_path,
+ config,
+ destination,
+ use_in_feeds,
+ messages,
+ template_name,
+ compiler
):
"""Initialize post.
- The source path is the .txt post file. From it we calculate
+ The source path is the user created post file. From it we calculate
the meta file, as well as any translations available, and
the .html fragment file path.
"""
+ self.compiler = compiler
+ self.config = config
+ tzinfo = None
+ if self.config['TIMEZONE'] is not None:
+ tzinfo = pytz.timezone(self.config['TIMEZONE'])
+ if self.config['FUTURE_IS_NOW']:
+ self.current_time = None
+ else:
+ self.current_time = current_time(tzinfo)
self.translated_to = set([])
self._prev_post = None
self._next_post = None
- self.base_url = base_url
+ self.base_url = self.config['BASE_URL']
self.is_draft = False
+ self.is_retired = False
self.is_mathjax = False
- self.strip_index_html = strip_index_html
+ self.strip_indexes = self.config['STRIP_INDEXES']
+ self.index_file = self.config['INDEX_FILE']
+ self.pretty_urls = self.config['PRETTY_URLS']
self.source_path = source_path # posts/blah.txt
self.post_name = os.path.splitext(source_path)[0] # posts/blah
+ # cache[\/]posts[\/]blah.html
+ self.base_path = os.path.join(self.config['CACHE_FOLDER'], self.post_name + ".html")
# cache/posts/blah.html
- self.base_path = os.path.join(cache_folder, self.post_name + ".html")
+ self._base_path = self.base_path.replace('\\', '/')
self.metadata_path = self.post_name + ".meta" # posts/blah.meta
self.folder = destination
- self.translations = translations
- self.default_lang = default_lang
+ self.translations = self.config['TRANSLATIONS']
+ self.default_lang = self.config['DEFAULT_LANG']
self.messages = messages
- self.skip_untranslated = skip_untranslated
+ self.skip_untranslated = self.config['HIDE_UNTRANSLATED_POSTS']
self._template_name = template_name
+ self.is_two_file = True
+ self.hyphenate = self.config['HYPHENATE']
+ self._reading_time = None
- default_metadata = get_meta(self, file_metadata_regexp)
+ default_metadata = get_meta(self, self.config['FILE_METADATA_REGEXP'])
self.meta = Functionary(lambda: None, self.default_lang)
- self.meta[default_lang] = default_metadata
+ self.meta[self.default_lang] = default_metadata
# Load internationalized metadata
- for lang in translations:
- if lang != default_lang:
+ for lang in self.translations:
+ if lang != self.default_lang:
if os.path.isfile(self.source_path + "." + lang):
self.translated_to.add(lang)
meta = defaultdict(lambda: '')
meta.update(default_metadata)
- meta.update(get_meta(self, file_metadata_regexp, lang))
+ meta.update(get_meta(self, self.config['FILE_METADATA_REGEXP'], lang))
self.meta[lang] = meta
elif os.path.isfile(self.source_path):
- self.translated_to.add(default_lang)
+ self.translated_to.add(self.default_lang)
- if not self.is_translation_available(default_lang):
+ if not self.is_translation_available(self.default_lang):
# Special case! (Issue #373)
# Fill default_metadata with stuff from the other languages
for lang in sorted(self.translated_to):
default_metadata.update(self.meta[lang])
+ if 'date' not in default_metadata and not use_in_feeds:
+ # For stories we don't *really* need a date
+ default_metadata['date'] = datetime.datetime.utcfromtimestamp(
+ os.stat(self.source_path).st_ctime)
+
+ if tzinfo:
+ default_metadata['date'] = default_metadata['date'].replace(
+ tzinfo=pytz.UTC).astimezone(tzinfo)
+
if 'title' not in default_metadata or 'slug' not in default_metadata \
or 'date' not in default_metadata:
raise OSError("You must set a title (found '{0}'), a slug (found "
@@ -109,7 +162,9 @@ class Post(object):
source_path))
# If timezone is set, build localized datetime.
- self.date = to_datetime(self.meta[default_lang]['date'], tzinfo)
+ self.date = to_datetime(self.meta[self.default_lang]['date'], tzinfo)
+
+ self.publish_later = False if self.current_time is None else self.date >= self.current_time
is_draft = False
is_retired = False
@@ -123,14 +178,27 @@ class Post(object):
if 'retired' in self._tags[lang]:
is_retired = True
self._tags[lang].remove('retired')
+ if 'private' in self._tags[lang]:
+ is_retired = True
+ self._tags[lang].remove('private')
# While draft comes from the tags, it's not really a tag
self.is_draft = is_draft
- self.use_in_feeds = use_in_feeds and not is_draft and not is_retired
+ self.is_retired = is_retired
+ self.use_in_feeds = use_in_feeds and not is_draft and not is_retired \
+ and not self.publish_later
# If mathjax is a tag, then enable mathjax rendering support
self.is_mathjax = 'mathjax' in self.tags
+ def _has_pretty_url(self, lang):
+ if self.pretty_urls and \
+ self.meta[lang].get('pretty_url', '') != 'False' and \
+ self.meta[lang]['slug'] != 'index':
+ return True
+ else:
+ return False
+
@property
def alltags(self):
"""This is ALL the tags for this post."""
@@ -141,7 +209,7 @@ class Post(object):
@property
def tags(self):
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
if lang in self._tags:
return self._tags[lang]
elif self.default_lang in self._tags:
@@ -151,7 +219,7 @@ class Post(object):
@property
def prev_post(self):
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
rv = self._prev_post
while self.skip_untranslated:
if rv is None:
@@ -167,7 +235,7 @@ class Post(object):
@property
def next_post(self):
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
rv = self._next_post
while self.skip_untranslated:
if rv is None:
@@ -202,33 +270,20 @@ class Post(object):
fmt_date = fmt_date.decode('utf8')
return fmt_date
- def current_lang(self):
- """Return the currently set locale, if it's one of the
- available translations, or default_lang."""
- lang = LocaleBorg().current_lang
- if lang:
- if lang in self.translations:
- return lang
- lang = lang.split('_')[0]
- if lang in self.translations:
- return lang
- # whatever
- return self.default_lang
-
def title(self, lang=None):
"""Return localized title.
- If lang is not specified, it will use the currently set locale,
- because templates set it.
+ If lang is not specified, it defaults to the current language from
+ templates, as set in LocaleBorg.
"""
if lang is None:
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
return self.meta[lang]['title']
def description(self, lang=None):
"""Return localized description."""
if lang is None:
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
return self.meta[lang]['description']
def deps(self, lang):
@@ -241,6 +296,32 @@ class Post(object):
deps += self.fragment_deps(lang)
return deps
+ def compile(self, lang):
+ """Generate the cache/ file with the compiled post."""
+
+ def wrap_encrypt(path, password):
+ """Wrap a post with encryption."""
+ with codecs.open(path, 'rb+', 'utf8') as inf:
+ data = inf.read() + "<!--tail-->"
+ data = CRYPT.substitute(data=rc4(password, data))
+ with codecs.open(path, 'wb+', 'utf8') as outf:
+ outf.write(data)
+
+ self.READ_MORE_LINK = self.config['READ_MORE_LINK']
+ dest = self.translated_base_path(lang)
+ if not self.is_translation_available(lang) and self.config['HIDE_UNTRANSLATED_POSTS']:
+ return
+ else:
+ self.compiler(
+ self.translated_source_path(lang),
+ dest,
+ self.is_two_file),
+ if self.meta('password'):
+ wrap_encrypt(dest, self.meta('password'))
+ if self.publish_later:
+ LOGGER.notice('{0} is scheduled to be published in the future ({1})'.format(
+ self.source_path, self.date))
+
def fragment_deps(self, lang):
"""Return a list of dependencies to build this post's fragment."""
deps = []
@@ -274,6 +355,13 @@ class Post(object):
else:
return '.'.join((self.source_path, sorted(self.translated_to)[0]))
+ def translated_base_path(self, lang):
+ """Return path to the translation's base_path file."""
+ if lang == self.default_lang:
+ return self.base_path
+ else:
+ return '.'.join((self.base_path, lang))
+
def _translated_file_path(self, lang):
"""Return path to the translation's file, or to the original."""
if lang in self.translated_to:
@@ -286,60 +374,125 @@ class Post(object):
else:
return '.'.join((self.base_path, sorted(self.translated_to)[0]))
- def text(self, lang=None, teaser_only=False, strip_html=False):
- """Read the post file for that language and return its contents."""
+ def text(self, lang=None, teaser_only=False, strip_html=False, really_absolute=False):
+ """Read the post file for that language and return its contents.
+
+ teaser_only=True breaks at the teaser marker and returns only the teaser.
+ strip_html=True removes HTML tags
+ lang=None uses the last used to set locale
+
+ All links in the returned HTML will be relative.
+ The HTML returned is a bare fragment, not a full document.
+ """
if lang is None:
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
file_name = self._translated_file_path(lang)
with codecs.open(file_name, "r", "utf8") as post_file:
data = post_file.read().strip()
-
try:
- document = lxml.html.document_fromstring(data)
+ document = lxml.html.fragment_fromstring(data, "body")
except lxml.etree.ParserError as e:
# if we don't catch this, it breaks later (Issue #374)
if str(e) == "Document is empty":
return ""
# let other errors raise
raise(e)
- document.make_links_absolute(self.permalink(lang=lang))
+ base_url = self.permalink(lang=lang, absolute=really_absolute)
+ document.make_links_absolute(base_url)
+
+ if self.hyphenate:
+ hyphenate(document, lang)
+
data = lxml.html.tostring(document, encoding='unicode')
+ # data here is a full HTML doc, including HTML and BODY tags
+ # which is not ideal (Issue #464)
+ try:
+ body = document.body
+ data = (body.text or '') + ''.join(
+ [lxml.html.tostring(child, encoding='unicode')
+ for child in body.iterchildren()])
+ except IndexError: # No body there, it happens sometimes
+ pass
+
if teaser_only:
teaser = TEASER_REGEXP.split(data)[0]
if teaser != data:
- teaser_str = self.messages[lang]["Read more"] + '...'
- teaser += '<p><a href="{0}">{1}</a></p>'.format(
- self.permalink(lang), teaser_str)
+ if not strip_html:
+ if TEASER_REGEXP.search(data).groups()[-1]:
+ teaser += '<p class="more"><a href="{0}">{1}</a></p>'.format(
+ self.permalink(lang, absolute=really_absolute),
+ TEASER_REGEXP.search(data).groups()[-1])
+ else:
+ teaser += READ_MORE_LINK.format(
+ link=self.permalink(lang, absolute=really_absolute),
+ read_more=self.messages[lang]["Read more"])
# This closes all open tags and sanitizes the broken HTML
document = lxml.html.fromstring(teaser)
data = lxml.html.tostring(document, encoding='unicode')
if data and strip_html:
- content = lxml.html.fromstring(data)
- data = content.text_content().strip() # No whitespace wanted.
+ try:
+ # Not all posts have a body. For example, you may have a page statically defined in the template that does not take content as input.
+ content = lxml.html.fromstring(data)
+ data = content.text_content().strip() # No whitespace wanted.
+ except lxml.etree.ParserError:
+ data = ""
return data
- def destination_path(self, lang, extension='.html'):
- path = os.path.join(self.translations[lang],
- self.folder, self.meta[lang]['slug'] + extension)
+ @property
+ def reading_time(self):
+ """Reading time based on length of text.
+ """
+ if self._reading_time is None:
+ text = self.text(strip_html=True)
+ words_per_minute = 180
+ words = len(text.split())
+ self._reading_time = int(round(words / words_per_minute)) or 1
+ return self._reading_time
+
+ def source_link(self, lang=None):
+ """Return absolute link to the post's source."""
+ return "/" + self.destination_path(
+ lang=lang,
+ extension=self.source_ext(),
+ sep='/')
+
+ def destination_path(self, lang=None, extension='.html', sep=os.sep):
+ """Destination path for this post, relative to output/.
+
+ If lang is not specified, it's the current language.
+ Extension is used in the path if specified.
+ """
+ if lang is None:
+ lang = nikola.utils.LocaleBorg().current_lang
+ if self._has_pretty_url(lang):
+ path = os.path.join(self.translations[lang],
+ self.folder, self.meta[lang]['slug'], 'index' + extension)
+ else:
+ path = os.path.join(self.translations[lang],
+ self.folder, self.meta[lang]['slug'] + extension)
+ if sep != os.sep:
+ path = path.replace(os.sep, sep)
return path
def permalink(self, lang=None, absolute=False, extension='.html'):
if lang is None:
- lang = self.current_lang()
+ lang = nikola.utils.LocaleBorg().current_lang
pieces = self.translations[lang].split(os.sep)
pieces += self.folder.split(os.sep)
- pieces += [self.meta[lang]['slug'] + extension]
+ if self._has_pretty_url(lang):
+ pieces += [self.meta[lang]['slug'], 'index' + extension]
+ else:
+ pieces += [self.meta[lang]['slug'] + extension]
pieces = [_f for _f in pieces if _f and _f != '.']
+ link = '/' + '/'.join(pieces)
if absolute:
- pieces = [self.base_url] + pieces
- else:
- pieces = [""] + pieces
- link = "/".join(pieces)
- if self.strip_index_html and link.endswith('/index.html'):
- return link[:-10]
+ link = urljoin(self.base_url, link)
+ index_len = len(self.index_file)
+ if self.strip_indexes and link[-(1 + index_len):] == '/' + self.index_file:
+ return link[:-index_len]
else:
return link
@@ -391,6 +544,8 @@ def get_metadata_from_file(source_path, lang=None):
with codecs.open(source_path, "r", "utf8") as meta_file:
meta_data = [x.strip() for x in meta_file.readlines()]
return _get_metadata_from_file(meta_data)
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ raise ValueError('Error reading {0}: Nikola only supports UTF-8 files'.format(source_path))
except Exception: # The file may not exist, for multilingual sites
return {}
@@ -407,8 +562,10 @@ def _get_metadata_from_file(meta_data):
'FooBar'
>>> str(g([".. title: FooBar"])["title"])
'FooBar'
- >>> 'title' in g(["",".. title: FooBar"])
+ >>> 'title' in g(["","",".. title: FooBar"])
False
+ >>> 'title' in g(["",".. title: FooBar"]) # for #520
+ True
"""
meta = {}
@@ -420,7 +577,11 @@ def _get_metadata_from_file(meta_data):
string.punctuation)))
for i, line in enumerate(meta_data):
- if not line:
+ # txt2tags requires an empty line at the beginning
+ # and since we are here because it's a 1-file post
+ # let's be flexible on what we accept, so, skip empty
+ # first lines.
+ if not line and i > 0:
break
if 'title' not in meta:
match = re_meta(line, 'title')
@@ -469,6 +630,12 @@ def get_metadata_from_meta_file(path, lang=None):
meta['description'] = description
return meta
+
+ elif lang:
+ # Metadata file doesn't exist, but not default language,
+ # So, if default language metadata exists, return that.
+ # This makes the 2-file format detection more reliable (Issue #525)
+ return get_metadata_from_meta_file(path, lang=None)
else:
return {}
@@ -487,6 +654,7 @@ def get_meta(post, file_metadata_regexp=None, lang=None):
if meta:
return meta
+ post.is_two_file = False
if file_metadata_regexp is not None:
meta.update(_get_metadata_from_filename_by_regex(post.source_path,
@@ -499,8 +667,8 @@ def get_meta(post, file_metadata_regexp=None, lang=None):
if 'slug' not in meta:
# If no slug is found in the metadata use the filename
- meta['slug'] = slugify(os.path.splitext(
- os.path.basename(post.source_path))[0])
+ meta['slug'] = slugify(unicode_str(os.path.splitext(
+ os.path.basename(post.source_path))[0]))
if 'title' not in meta:
# If no title is found, use the filename without extension
@@ -508,3 +676,84 @@ def get_meta(post, file_metadata_regexp=None, lang=None):
os.path.basename(post.source_path))[0]
return meta
+
+
+def hyphenate(dom, lang):
+ if pyphen is not None:
+ hyphenator = pyphen.Pyphen(lang=lang)
+ for tag in ('p', 'li', 'span'):
+ for node in dom.xpath("//%s[not(parent::pre)]" % tag):
+ insert_hyphens(node, hyphenator)
+ return dom
+
+
+def insert_hyphens(node, hyphenator):
+ textattrs = ('text', 'tail')
+ if isinstance(node, lxml.etree._Entity):
+ # HTML entities have no .text
+ textattrs = ('tail',)
+ for attr in textattrs:
+ text = getattr(node, attr)
+ if not text:
+ continue
+ new_data = ' '.join([hyphenator.inserted(w, hyphen='\u00AD')
+ for w in text.split(' ')])
+ # Spaces are trimmed, we have to add them manually back
+ if text[0].isspace():
+ new_data = ' ' + new_data
+ if text[-1].isspace():
+ new_data += ' '
+ setattr(node, attr, new_data)
+
+ for child in node.iterchildren():
+ insert_hyphens(child, hyphenator)
+
+
+CRYPT = string.Template("""\
+<script>
+function rc4(key, str) {
+ var s = [], j = 0, x, res = '';
+ for (var i = 0; i < 256; i++) {
+ s[i] = i;
+ }
+ for (i = 0; i < 256; i++) {
+ j = (j + s[i] + key.charCodeAt(i % key.length)) % 256;
+ x = s[i];
+ s[i] = s[j];
+ s[j] = x;
+ }
+ i = 0;
+ j = 0;
+ for (var y = 0; y < str.length; y++) {
+ i = (i + 1) % 256;
+ j = (j + s[i]) % 256;
+ x = s[i];
+ s[i] = s[j];
+ s[j] = x;
+ res += String.fromCharCode(str.charCodeAt(y) ^ s[(s[i] + s[j]) % 256]);
+ }
+ return res;
+}
+function decrypt() {
+ key = $$("#key").val();
+ crypt_div = $$("#encr")
+ crypted = crypt_div.html();
+ decrypted = rc4(key, window.atob(crypted));
+ if (decrypted.substr(decrypted.length - 11) == "<!--tail-->"){
+ crypt_div.html(decrypted);
+ $$("#pwform").hide();
+ crypt_div.show();
+ } else { alert("Wrong password"); };
+}
+</script>
+
+<div id="encr" style="display: none;">${data}</div>
+<div id="pwform">
+<form onsubmit="javascript:decrypt(); return false;" class="form-inline">
+<fieldset>
+<legend>This post is password-protected.</legend>
+<input type="password" id="key" placeholder="Type password here">
+<button type="submit" class="btn">Show Content</button>
+</fieldset>
+</form>
+</div>""")