summaryrefslogtreecommitdiffstats
path: root/nikola/post.py
diff options
context:
space:
mode:
Diffstat (limited to 'nikola/post.py')
-rw-r--r--nikola/post.py288
1 files changed, 229 insertions, 59 deletions
diff --git a/nikola/post.py b/nikola/post.py
index 809e5b7..5060583 100644
--- a/nikola/post.py
+++ b/nikola/post.py
@@ -27,32 +27,43 @@ from __future__ import unicode_literals, print_function
import codecs
import os
+import re
+import sys
+import string
+import unidecode
import lxml.html
-from . import utils
+from .utils import to_datetime, slugify
__all__ = ['Post']
+TEASER_REGEXP = re.compile('<!--\s*TEASER_END(:(.+))?\s*-->', re.IGNORECASE)
+
class Post(object):
"""Represents a blog post or web page."""
- def __init__(self, source_path, cache_folder, destination, use_in_feeds,
- translations, default_lang, blog_url, messages, template_name,
- file_metadata_regexp=None):
+ def __init__(
+ self, source_path, cache_folder, destination, use_in_feeds,
+ translations, default_lang, base_url, messages, template_name,
+ file_metadata_regexp=None, tzinfo=None
+ ):
"""Initialize post.
- The base path is the .txt post file. From it we calculate
+ The source path is the .txt post file. From it we calculate
the meta file, as well as any translations available, and
the .html fragment file path.
"""
self.translated_to = set([default_lang])
+ self.tags = ''
+ self.date = None
self.prev_post = None
self.next_post = None
- self.blog_url = blog_url
+ self.base_url = base_url
self.is_draft = False
+ self.is_mathjax = False
self.source_path = source_path # posts/blah.txt
self.post_name = os.path.splitext(source_path)[0] # posts/blah
# cache/posts/blah.html
@@ -63,24 +74,27 @@ class Post(object):
self.default_lang = default_lang
self.messages = messages
self.template_name = template_name
- if os.path.isfile(self.metadata_path):
- with codecs.open(self.metadata_path, "r", "utf8") as meta_file:
- meta_data = meta_file.readlines()
- while len(meta_data) < 6:
- meta_data.append("")
- (default_title, default_pagename, self.date, self.tags,
- self.link, default_description) = [x.strip() for x in
- meta_data][:6]
- else:
- (default_title, default_pagename, self.date, self.tags,
- self.link, default_description) = utils.get_meta(
- self.source_path, file_metadata_regexp)
+ self.meta = get_meta(self, file_metadata_regexp)
+
+ default_title = self.meta.get('title', '')
+ default_pagename = self.meta.get('slug', '')
+ default_description = self.meta.get('description', '')
+
+ for k, v in self.meta.items():
+ if k not in ['title', 'slug', 'description']:
+ if sys.version_info[0] == 2:
+ setattr(self, unidecode.unidecode(unicode(k)), v) # NOQA
+ else:
+ setattr(self, k, v)
if not default_title or not default_pagename or not self.date:
- raise OSError("You must set a title and slug and date! [%s]" %
- source_path)
+ raise OSError("You must set a title (found '{0}'), a slug (found "
+ "'{1}') and a date (found '{2}')! [in file "
+ "{3}]".format(default_title, default_pagename,
+ self.date, source_path))
- self.date = utils.to_datetime(self.date)
+ # If timezone is set, build localized datetime.
+ self.date = to_datetime(self.date, tzinfo)
self.tags = [x.strip() for x in self.tags.split(',')]
self.tags = [_f for _f in self.tags if _f]
@@ -89,45 +103,30 @@ class Post(object):
self.is_draft = 'draft' in self.tags
self.tags = [t for t in self.tags if t != 'draft']
+ # If mathjax is a tag, then enable mathjax rendering support
+ self.is_mathjax = 'mathjax' in self.tags
+
self.pagenames = {}
self.titles = {}
self.descriptions = {}
- # Load internationalized titles
- # TODO: this has gotten much too complicated. Rethink.
+
+ # Load internationalized metadata
for lang in translations:
if lang == default_lang:
self.titles[lang] = default_title
self.pagenames[lang] = default_pagename
self.descriptions[lang] = default_description
else:
- metadata_path = self.metadata_path + "." + lang
- source_path = self.source_path + "." + lang
- if os.path.isfile(source_path):
+ if os.path.isfile(self.source_path + "." + lang):
self.translated_to.add(lang)
- try:
- if os.path.isfile(metadata_path):
- with codecs.open(
- metadata_path, "r", "utf8") as meta_file:
- meta_data = [x.strip() for x in
- meta_file.readlines()]
- while len(meta_data) < 6:
- meta_data.append("")
- self.titles[lang] = meta_data[0] or default_title
- self.pagenames[lang] = meta_data[1] or\
- default_pagename
- self.descriptions[lang] = meta_data[5] or\
- default_description
- else:
- ttitle, ppagename, tmp1, tmp2, tmp3, ddescription = \
- utils.get_meta(source_path, file_metadata_regexp)
- self.titles[lang] = ttitle or default_title
- self.pagenames[lang] = ppagename or default_pagename
- self.descriptions[lang] = ddescription or\
- default_description
- except:
- self.titles[lang] = default_title
- self.pagenames[lang] = default_pagename
- self.descriptions[lang] = default_description
+
+ meta = self.meta.copy()
+ meta.update(get_meta(self, file_metadata_regexp, lang))
+
+ # FIXME this only gets three pieces of metadata from the i18n files
+ self.titles[lang] = meta.get('title', default_title)
+ self.pagenames[lang] = meta.get('slug', default_pagename)
+ self.descriptions[lang] = meta.get('description', default_description)
def title(self, lang):
"""Return localized title."""
@@ -164,12 +163,12 @@ class Post(object):
"""Return path to the translation's file, or to the original."""
file_name = self.base_path
if lang != self.default_lang:
- file_name_lang = file_name + ".%s" % lang
+ file_name_lang = '.'.join((file_name, lang))
if os.path.exists(file_name_lang):
file_name = file_name_lang
return file_name
- def text(self, lang, teaser_only=False):
+ def text(self, lang, teaser_only=False, strip_html=False):
"""Read the post file for that language and return its contents"""
file_name = self._translated_file_path(lang)
@@ -177,22 +176,30 @@ class Post(object):
data = post_file.read()
if data:
- data = lxml.html.make_links_absolute(data, self.permalink())
+ data = lxml.html.make_links_absolute(data, self.permalink(lang=lang))
if data and teaser_only:
e = lxml.html.fromstring(data)
teaser = []
+ teaser_str = self.messages[lang]["Read more"] + '...'
flag = False
for elem in e:
elem_string = lxml.html.tostring(elem).decode('utf8')
- if '<!-- TEASER_END -->' in elem_string.upper():
+ match = TEASER_REGEXP.match(elem_string)
+ if match:
flag = True
+ if match.group(2):
+ teaser_str = match.group(2)
break
teaser.append(elem_string)
if flag:
- teaser.append('<p><a href="%s">%s...</a></p>' %
- (self.permalink(lang),
- self.messages[lang]["Read more"]))
+ teaser.append('<p><a href="{0}">{1}</a></p>'.format(
+ self.permalink(lang), teaser_str))
data = ''.join(teaser)
+
+ if data and strip_html:
+ content = lxml.html.fromstring(data)
+ data = content.text_content().strip() # No whitespace wanted.
+
return data
def destination_path(self, lang, extension='.html'):
@@ -206,9 +213,9 @@ class Post(object):
pieces = list(os.path.split(self.translations[lang]))
pieces += list(os.path.split(self.folder))
pieces += [self.pagenames[lang] + extension]
- pieces = [_f for _f in pieces if _f]
+ pieces = [_f for _f in pieces if _f and _f != '.']
if absolute:
- pieces = [self.blog_url] + pieces
+ pieces = [self.base_url] + pieces
else:
pieces = [""] + pieces
link = "/".join(pieces)
@@ -216,3 +223,166 @@ class Post(object):
def source_ext(self):
return os.path.splitext(self.source_path)[1]
+
+# Code that fetches metadata from different places
+
+
+def re_meta(line, match=None):
+ """re.compile for meta"""
+ if match:
+ reStr = re.compile('^\.\. {0}: (.*)'.format(re.escape(match)))
+ else:
+ reStr = re.compile('^\.\. (.*?): (.*)')
+ result = reStr.findall(line.strip())
+ if match and result:
+ return (match, result[0])
+ elif not match and result:
+ return (result[0][0], result[0][1].strip())
+ else:
+ return (None,)
+
+
+def _get_metadata_from_filename_by_regex(filename, metadata_regexp):
+ """
+ Tries to ried the metadata from the filename based on the given re.
+ This requires to use symbolic group names in the pattern.
+
+ The part to read the metadata from the filename based on a regular
+ expression is taken from Pelican - pelican/readers.py
+ """
+ match = re.match(metadata_regexp, filename)
+ meta = {}
+
+ if match:
+ # .items() for py3k compat.
+ for key, value in match.groupdict().items():
+ meta[key.lower()] = value # metadata must be lowercase
+
+ return meta
+
+
+def get_metadata_from_file(source_path, lang=None):
+ """Extracts metadata from the file itself, by parsing contents."""
+ try:
+ if lang:
+ source_path = "{0}.{1}".format(source_path, lang)
+ with codecs.open(source_path, "r", "utf8") as meta_file:
+ meta_data = [x.strip() for x in meta_file.readlines()]
+ return _get_metadata_from_file(meta_data)
+ except Exception: # The file may not exist, for multilingual sites
+ return {}
+
+
+def _get_metadata_from_file(meta_data):
+ """Parse file contents and obtain metadata.
+
+ >>> g = _get_metadata_from_file
+ >>> list(g([]).values())
+ []
+ >>> str(g(["FooBar","======"])["title"])
+ 'FooBar'
+ >>> str(g(["#FooBar"])["title"])
+ 'FooBar'
+ >>> str(g([".. title: FooBar"])["title"])
+ 'FooBar'
+ >>> 'title' in g(["",".. title: FooBar"])
+ False
+
+ """
+ meta = {}
+
+ re_md_title = re.compile(r'^{0}([^{0}].*)'.format(re.escape('#')))
+ # Assuming rst titles are going to be at least 4 chars long
+ # otherwise this detects things like ''' wich breaks other markups.
+ re_rst_title = re.compile(r'^([{0}]{{4,}})'.format(re.escape(
+ string.punctuation)))
+
+ for i, line in enumerate(meta_data):
+ if not line:
+ break
+ if 'title' not in meta:
+ match = re_meta(line, 'title')
+ if match[0]:
+ meta['title'] = match[1]
+ if 'title' not in meta:
+ if re_rst_title.findall(line) and i > 0:
+ meta['title'] = meta_data[i - 1].strip()
+ if 'title' not in meta:
+ if re_md_title.findall(line):
+ meta['title'] = re_md_title.findall(line)[0]
+
+ match = re_meta(line)
+ if match[0]:
+ meta[match[0]] = match[1]
+
+ return meta
+
+
+def get_metadata_from_meta_file(path, lang=None):
+ """Takes a post path, and gets data from a matching .meta file."""
+ meta_path = os.path.splitext(path)[0] + '.meta'
+ if lang:
+ meta_path += '.' + lang
+ if os.path.isfile(meta_path):
+ with codecs.open(meta_path, "r", "utf8") as meta_file:
+ meta_data = meta_file.readlines()
+ while len(meta_data) < 6:
+ meta_data.append("")
+ (title, slug, date, tags, link, description) = [
+ x.strip() for x in meta_data][:6]
+
+ meta = {}
+
+ if title:
+ meta['title'] = title
+ if slug:
+ meta['slug'] = slug
+ if date:
+ meta['date'] = date
+ if tags:
+ meta['tags'] = tags
+ if link:
+ meta['link'] = link
+ if description:
+ meta['description'] = description
+
+ return meta
+ else:
+ return {}
+
+
+def get_meta(post, file_metadata_regexp=None, lang=None):
+ """Get post's meta from source.
+
+ If ``file_metadata_regexp`` is given it will be tried to read
+ metadata from the filename.
+ If any metadata is then found inside the file the metadata from the
+ file will override previous findings.
+ """
+ meta = {}
+
+ meta.update(get_metadata_from_meta_file(post.metadata_path, lang))
+
+ if meta:
+ return meta
+
+ if file_metadata_regexp is not None:
+ meta.update(_get_metadata_from_filename_by_regex(post.source_path,
+ file_metadata_regexp))
+
+ meta.update(get_metadata_from_file(post.source_path, lang))
+
+ if lang is None:
+ # Only perform these checks for the default language
+
+ if 'slug' not in meta:
+ # If no slug is found in the metadata use the filename
+ meta['slug'] = slugify(os.path.splitext(
+ os.path.basename(post.source_path))[0])
+
+ if 'title' not in meta:
+ # If no title is found, use the filename without extension
+ meta['title'] = os.path.splitext(
+ os.path.basename(post.source_path))[0]
+
+ return meta