diff options
Diffstat (limited to 'nikola/utils.py')
| -rw-r--r-- | nikola/utils.py | 155 |
1 files changed, 112 insertions, 43 deletions
diff --git a/nikola/utils.py b/nikola/utils.py index 18b4646..f682c33 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -42,15 +42,25 @@ try: except ImportError: pass + +if sys.version_info[0] == 3: + # Python 3 + bytes_str = bytes + unicode_str = str + unichr = chr +else: + bytes_str = str + unicode_str = unicode + from doit import tools from unidecode import unidecode -from . import PyRSS2Gen as rss +import PyRSS2Gen as rss __all__ = ['get_theme_path', 'get_theme_chain', 'load_messages', 'copy_tree', - 'generic_rss_renderer', - 'copy_file', 'slugify', 'unslugify', 'get_meta', 'to_datetime', - 'apply_filters', 'config_changed'] + 'generic_rss_renderer', + 'copy_file', 'slugify', 'unslugify', 'get_meta', 'to_datetime', + 'apply_filters', 'config_changed'] class CustomEncoder(json.JSONEncoder): @@ -94,7 +104,7 @@ def get_theme_path(theme): if os.path.isdir(dir_name): return dir_name dir_name = os.path.join(os.path.dirname(__file__), - 'data', 'themes', theme) + 'data', 'themes', theme) if os.path.isdir(dir_name): return dir_name raise Exception("Can't find theme '%s'" % theme) @@ -110,22 +120,54 @@ def re_meta(line, match): return '' -def get_meta(source_path): - """get post's meta from source""" - with codecs.open(source_path, "r", "utf8") as meta_file: - meta_data = meta_file.readlines(15) +def _get_metadata_from_filename_by_regex(filename, metadata_regexp): + """ + Tries to ried the metadata from the filename based on the given re. + This requires to use symbolic group names in the pattern. + + The part to read the metadata from the filename based on a regular + expression is taken from Pelican - pelican/readers.py + """ title = slug = date = tags = link = description = '' + match = re.match(metadata_regexp, filename) + if match: + # .items() for py3k compat. + for key, value in match.groupdict().items(): + key = key.lower() # metadata must be lowercase + + if key == 'title': + title = value + if key == 'slug': + slug = value + if key == 'date': + date = value + if key == 'tags': + tags = value + if key == 'link': + link = value + if key == 'description': + description = value + return (title, slug, date, tags, link, description) + + +def _get_metadata_from_file(source_path, title='', slug='', date='', tags='', + link='', description=''): re_md_title = re.compile(r'^%s([^%s].*)' % - (re.escape('#'), re.escape('#'))) - re_rst_title = re.compile(r'^([^%s ].*)' % re.escape(string.punctuation)) + (re.escape('#'), re.escape('#'))) + # Assuming rst titles are going to be at least 4 chars long + # otherwise this detects things like ''' wich breaks other markups. + re_rst_title = re.compile(r'^([%s]{4,})' % re.escape(string.punctuation)) + + with codecs.open(source_path, "r", "utf8") as meta_file: + meta_data = meta_file.readlines(15) - for meta in meta_data: + for i, meta in enumerate(meta_data): if not title: title = re_meta(meta, '.. title:') if not title: - if re_rst_title.findall(meta): - title = re_rst_title.findall(meta)[0] + if re_rst_title.findall(meta) and i > 0: + title = meta_data[i - 1].strip() if not title: if re_md_title.findall(meta): title = re_md_title.findall(meta)[0] @@ -140,11 +182,34 @@ def get_meta(source_path): if not description: description = re_meta(meta, '.. description:') - # TODO: either enable or delete - #if not date: - #from datetime import datetime - #date = datetime.fromtimestamp( - # os.path.getmtime(source_path)).strftime('%Y/%m/%d %H:%M') + return (title, slug, date, tags, link, description) + + +def get_meta(source_path, file_metadata_regexp=None): + """Get post's meta from source. + + If ``file_metadata_regexp`` ist given it will be tried to read + metadata from the filename. + If any metadata is then found inside the file the metadata from the + file will override previous findings. + """ + title = slug = date = tags = link = description = '' + + if not (file_metadata_regexp is None): + (title, slug, date, tags, link, + description) = _get_metadata_from_filename_by_regex( + source_path, file_metadata_regexp) + + (title, slug, date, tags, link, description) = _get_metadata_from_file( + source_path, title, slug, date, tags, link, description) + + if not slug: + # If no slug is found in the metadata use the filename + slug = slugify(os.path.splitext(os.path.basename(source_path))[0]) + + if not title: + # If no title is found, use the filename without extension + title = os.path.splitext(os.path.basename(source_path))[0] return (title, slug, date, tags, link, description) @@ -194,13 +259,14 @@ def load_messages(themes, translations): english = __import__('messages_en') for lang in list(translations.keys()): # If we don't do the reload, the module is cached - translation = __import__('messages_'+lang) + translation = __import__('messages_' + lang) reload(translation) if sorted(translation.MESSAGES.keys()) !=\ sorted(english.MESSAGES.keys()) and \ - lang not in warned: + lang not in warned: # FIXME: get real logging in place - print("Warning: Incomplete translation for language '%s'." % lang) + print("Warning: Incomplete translation for language '%s'." % + lang) warned.append(lang) messages[lang].update(english.MESSAGES) messages[lang].update(translation.MESSAGES) @@ -247,15 +313,15 @@ def copy_tree(src, dst, link_cutoff=None): } -def generic_rss_renderer(lang, title, link, description, - timeline, output_path): +def generic_rss_renderer(lang, title, link, description, timeline, output_path, + rss_teasers): """Takes all necessary data, and renders a RSS feed in output_path.""" items = [] for post in timeline[:10]: args = { 'title': post.title(lang), 'link': post.permalink(lang, absolute=True), - 'description': post.text(lang, teaser_only=True), + 'description': post.text(lang, teaser_only=rss_teasers), 'guid': post.permalink(lang, absolute=True), 'pubDate': post.date, } @@ -271,8 +337,11 @@ def generic_rss_renderer(lang, title, link, description, dst_dir = os.path.dirname(output_path) if not os.path.isdir(dst_dir): os.makedirs(dst_dir) - with open(output_path, "wb+") as rss_file: - rss_obj.write_xml(rss_file) + with codecs.open(output_path, "wb+", "utf-8") as rss_file: + data = rss_obj.to_xml(encoding='utf-8') + if isinstance(data, bytes_str): + data = data.decode('utf-8') + rss_file.write(data) def copy_file(source, dest, cutoff=None): @@ -318,7 +387,7 @@ def slugify(value): """ value = unidecode(value) # WARNING: this may not be python2/3 equivalent - #value = unicode(_slugify_strip_re.sub('', value).strip().lower()) + # value = unicode(_slugify_strip_re.sub('', value).strip().lower()) value = str(_slugify_strip_re.sub('', value).strip().lower()) return _slugify_hyphenate_re.sub('-', value) @@ -343,21 +412,21 @@ class UnsafeZipException(Exception): def extract_all(zipfile): pwd = os.getcwd() os.chdir('themes') - z = list(zip(zipfile)) - namelist = z.namelist() - for f in namelist: - if f.endswith('/') and '..' in f: - raise UnsafeZipException( - 'The zip file contains ".." and is not safe to expand.') - for f in namelist: - if f.endswith('/'): - if not os.path.isdir(f): - try: - os.makedirs(f) - except: - raise OSError("mkdir '%s' error!" % f) - else: - z.extract(f) + with zip(zipfile) as z: + namelist = z.namelist() + for f in namelist: + if f.endswith('/') and '..' in f: + raise UnsafeZipException( + 'The zip file contains ".." and is not safe to expand.') + for f in namelist: + if f.endswith('/'): + if not os.path.isdir(f): + try: + os.makedirs(f) + except: + raise OSError("mkdir '%s' error!" % f) + else: + z.extract(f) os.chdir(pwd) |
