diff options
Diffstat (limited to 'nikola/utils.py')
| -rw-r--r-- | nikola/utils.py | 402 |
1 files changed, 326 insertions, 76 deletions
diff --git a/nikola/utils.py b/nikola/utils.py index 423ded8..0a615f8 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2012 Roberto Alsina y otros. + +# Copyright © 2012-2013 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -27,9 +28,11 @@ from __future__ import print_function, unicode_literals from collections import defaultdict, Callable +import calendar import datetime import hashlib import locale +import logging import os import re import codecs @@ -43,8 +46,65 @@ try: except ImportError: pass +import logbook +from logbook.more import ExceptionHandler import pytz + +class ApplicationWarning(Exception): + pass + + +def get_logger(name, handlers): + """Get a logger with handlers attached.""" + l = logbook.Logger(name) + for h in handlers: + if isinstance(h, list): + l.handlers += h + else: + l.handlers.append(h) + return l + + +STDERR_HANDLER = [logbook.StderrHandler( + level=logbook.NOTICE if not os.getenv('NIKOLA_DEBUG') else logbook.DEBUG, + format_string=u'[{record.time:%Y-%m-%dT%H:%M:%SZ}] {record.level_name}: {record.channel}: {record.message}' +)] +LOGGER = get_logger('Nikola', STDERR_HANDLER) +STRICT_HANDLER = ExceptionHandler(ApplicationWarning, level='WARNING') + +if os.getenv('NIKOLA_DEBUG'): + logging.basicConfig(level=logging.DEBUG) +else: + logging.basicConfig(level=logging.WARNING) + + +def req_missing(names, purpose, python=True, optional=False): + """Log that we are missing some requirements.""" + if not (isinstance(names, tuple) or isinstance(names, list) or isinstance(names, set)): + names = (names,) + if python: + whatarethey_s = 'Python package' + whatarethey_p = 'Python packages' + else: + whatarethey_s = whatarethey_p = 'software' + if len(names) == 1: + msg = 'In order to {0}, you must install the "{1}" {2}.'.format( + purpose, names[0], whatarethey_s) + else: + most = '", "'.join(names[:-1]) + pnames = most + '" and "' + names[-1] + msg = 'In order to {0}, you must install the "{1}" {2}.'.format( + purpose, pnames, whatarethey_p) + + if optional: + LOGGER.warn(msg) + else: + LOGGER.error(msg) + raise Exception('Missing dependencies: {0}'.format(', '.join(names))) + + return msg + if sys.version_info[0] == 3: # Python 3 bytes_str = bytes @@ -65,8 +125,35 @@ import PyRSS2Gen as rss __all__ = ['get_theme_path', 'get_theme_chain', 'load_messages', 'copy_tree', 'generic_rss_renderer', 'copy_file', 'slugify', 'unslugify', 'to_datetime', 'apply_filters', 'config_changed', 'get_crumbs', - 'get_asset_path', '_reload', 'unicode_str', 'bytes_str', - 'unichr', 'Functionary', 'LocaleBorg'] + 'get_tzname', 'get_asset_path', '_reload', 'unicode_str', 'bytes_str', + 'unichr', 'Functionary', 'LocaleBorg', 'sys_encode', 'sys_decode', + 'makedirs', 'get_parent_theme_name', 'ExtendedRSS2'] + + +ENCODING = sys.getfilesystemencoding() or sys.stdin.encoding + + +def sys_encode(thing): + """Return bytes encoded in the system's encoding.""" + if isinstance(thing, unicode_str): + return thing.encode(ENCODING) + return thing + + +def sys_decode(thing): + """Returns unicode.""" + if isinstance(thing, bytes_str): + return thing.decode(ENCODING) + return thing + + +def makedirs(path): + """Create a folder.""" + if not path or os.path.isdir(path): + return + if os.path.exists(path): + raise OSError('Path {0} already exists and is not a folder.') + os.makedirs(path) class Functionary(defaultdict): @@ -77,24 +164,12 @@ class Functionary(defaultdict): super(Functionary, self).__init__(default) self.default_lang = default_lang - def current_lang(self): - """Guess the current language from locale or default.""" - lang = locale.getlocale()[0] - if lang: - if lang in self.keys(): - return lang - lang = lang.split('_')[0] - if lang in self.keys(): - return lang - # whatever - return self.default_lang - def __call__(self, key, lang=None): """When called as a function, take an optional lang and return self[lang][key].""" if lang is None: - lang = self.current_lang() + lang = LocaleBorg().current_lang return self[lang][key] @@ -114,7 +189,7 @@ class config_changed(tools.config_changed): if isinstance(self.config, str): return self.config elif isinstance(self.config, dict): - data = json.dumps(self.config, cls=CustomEncoder) + data = json.dumps(self.config, cls=CustomEncoder, sort_keys=True) if isinstance(data, str): # pragma: no cover # python3 byte_data = data.encode("utf-8") else: @@ -155,19 +230,20 @@ def get_template_engine(themes): return 'mako' +def get_parent_theme_name(theme_name): + parent_path = os.path.join(get_theme_path(theme_name), 'parent') + if os.path.isfile(parent_path): + with open(parent_path) as fd: + return fd.readlines()[0].strip() + return None + + def get_theme_chain(theme): """Create the full theme inheritance chain.""" themes = [theme] - def get_parent(theme_name): - parent_path = os.path.join(get_theme_path(theme_name), 'parent') - if os.path.isfile(parent_path): - with open(parent_path) as fd: - return fd.readlines()[0].strip() - return None - while True: - parent = get_parent(themes[-1]) + parent = get_parent_theme_name(themes[-1]) # Avoid silly loops if parent is None or parent in themes: break @@ -175,6 +251,9 @@ def get_theme_chain(theme): return themes +warned = [] + + def load_messages(themes, translations, default_lang): """ Load theme's messages into context. @@ -182,11 +261,10 @@ def load_messages(themes, translations, default_lang): and "younger" themes have priority. """ messages = Functionary(dict, default_lang) - warned = [] oldpath = sys.path[:] for theme_name in themes[::-1]: msg_folder = os.path.join(get_theme_path(theme_name), 'messages') - default_folder = os.path.join(get_theme_path('default'), 'messages') + default_folder = os.path.join(get_theme_path('base'), 'messages') sys.path.insert(0, default_folder) sys.path.insert(0, msg_folder) english = __import__('messages_en') @@ -195,12 +273,11 @@ def load_messages(themes, translations, default_lang): translation = __import__('messages_' + lang) reload(translation) if sorted(translation.MESSAGES.keys()) !=\ - sorted(english.MESSAGES.keys()) and \ + sorted(english.MESSAGES.keys()) and \ lang not in warned: - # FIXME: get real logging in place - print("Warning: Incomplete translation for language " - "'{0}'.".format(lang)) warned.append(lang) + LOGGER.warn("Incomplete translation for language " + "'{0}'.".format(lang)) messages[lang].update(english.MESSAGES) messages[lang].update(translation.MESSAGES) del(translation) @@ -230,8 +307,7 @@ def copy_tree(src, dst, link_cutoff=None): if set(root_parts) & ignore: continue dst_dir = os.path.join(dst, *root_parts[base_len:]) - if not os.path.isdir(dst_dir): - os.makedirs(dst_dir) + makedirs(dst_dir) for src_name in files: if src_name == '.DS_Store': continue @@ -247,31 +323,37 @@ def copy_tree(src, dst, link_cutoff=None): def generic_rss_renderer(lang, title, link, description, timeline, output_path, - rss_teasers): + rss_teasers, feed_length=10, feed_url=None): """Takes all necessary data, and renders a RSS feed in output_path.""" items = [] - for post in timeline[:10]: + for post in timeline[:feed_length]: args = { 'title': post.title(lang), 'link': post.permalink(lang, absolute=True), - 'description': post.text(lang, teaser_only=rss_teasers), + 'description': post.text(lang, teaser_only=rss_teasers, really_absolute=True), 'guid': post.permalink(lang, absolute=True), # PyRSS2Gen's pubDate is GMT time. 'pubDate': (post.date if post.date.tzinfo is None else post.date.astimezone(pytz.timezone('UTC'))), + 'categories': post._tags.get(lang, []), + 'author': post.meta('author'), } - items.append(rss.RSSItem(**args)) - rss_obj = rss.RSS2( + + items.append(ExtendedItem(**args)) + rss_obj = ExtendedRSS2( title=title, link=link, description=description, lastBuildDate=datetime.datetime.now(), items=items, generator='nikola', + language=lang ) + rss_obj.self_url = feed_url + rss_obj.rss_attrs["xmlns:atom"] = "http://www.w3.org/2005/Atom" + rss_obj.rss_attrs["xmlns:dc"] = "http://purl.org/dc/elements/1.1/" dst_dir = os.path.dirname(output_path) - if not os.path.isdir(dst_dir): - os.makedirs(dst_dir) + makedirs(dst_dir) with codecs.open(output_path, "wb+", "utf-8") as rss_file: data = rss_obj.to_xml(encoding='utf-8') if isinstance(data, bytes_str): @@ -281,8 +363,7 @@ def generic_rss_renderer(lang, title, link, description, timeline, output_path, def copy_file(source, dest, cutoff=None): dst_dir = os.path.dirname(dest) - if not os.path.isdir(dst_dir): - os.makedirs(dst_dir) + makedirs(dst_dir) if os.path.islink(source): link_target = os.path.relpath( os.path.normpath(os.path.join(dst_dir, os.readlink(source)))) @@ -343,7 +424,7 @@ def unslugify(value): """ Given a slug string (as a filename), return a human readable string """ - value = re.sub('^[0-9]', '', value) + value = re.sub('^[0-9]+', '', value) value = re.sub('([_\-\.])', ' ', value) value = value.strip().capitalize() return value @@ -356,9 +437,10 @@ class UnsafeZipException(Exception): pass -def extract_all(zipfile): +def extract_all(zipfile, path='themes'): pwd = os.getcwd() - os.chdir('themes') + makedirs(path) + os.chdir(path) with zip(zipfile) as z: namelist = z.namelist() for f in namelist: @@ -367,12 +449,7 @@ def extract_all(zipfile): 'not safe to expand.') for f in namelist: if f.endswith('/'): - if not os.path.isdir(f): - try: - os.makedirs(f) - except: - raise OSError("Failed making {0} directory " - "tree!".format(f)) + makedirs(f) else: z.extract(f) os.chdir(pwd) @@ -408,12 +485,39 @@ def to_datetime(value, tzinfo=None): try: from dateutil import parser dt = parser.parse(value) - if tzinfo is None: + if tzinfo is None or dt.tzinfo: return dt return tzinfo.localize(dt) except ImportError: raise ValueError('Unrecognized date/time: {0!r}, try installing dateutil...'.format(value)) - raise ValueError('Unrecognized date/time: {0!r}'.format(value)) + except Exception: + raise ValueError('Unrecognized date/time: {0!r}'.format(value)) + + +def get_tzname(dt): + """ + Give a datetime value, find the name of the timezone + """ + try: + from dateutil import tz + except ImportError: + raise ValueError('Unrecognized date/time: {0!r}, try installing dateutil...'.format(dt)) + + tzoffset = dt.strftime('%z') + for name in pytz.common_timezones: + timezone = tz.gettz(name) + now = dt.now(timezone) + offset = now.strftime('%z') + if offset == tzoffset: + return name + raise ValueError('Unrecognized date/time: {0!r}'.format(dt)) + + +def current_time(tzinfo=None): + dt = datetime.datetime.now() + if tzinfo is not None: + dt = tzinfo.localize(dt) + return dt def apply_filters(task, filters): @@ -435,7 +539,7 @@ def apply_filters(task, filters): else: assert False, key - for target in task['targets']: + for target in task.get('targets', []): ext = os.path.splitext(target)[-1].lower() filter_ = filter_matches(ext) if filter_: @@ -495,25 +599,26 @@ def get_crumbs(path, is_file=False): def get_asset_path(path, themes, files_folders={'files': ''}): - """Checks which theme provides the path with the given asset, - and returns the "real" path to the asset, relative to the - current directory. + """ + .. versionchanged:: 6.1.0 + + Checks which theme provides the path with the given asset, + and returns the "real", absolute path to the asset. If the asset is not provided by a theme, then it will be checked for in the FILES_FOLDERS - >>> print(get_asset_path('assets/css/rst.css', ['site', 'default'])) - nikola/data/themes/default/assets/css/rst.css + >>> print(get_asset_path('assets/css/rst.css', ['bootstrap', 'base'])) # doctest: +SKIP + [...]/nikola/data/themes/base/assets/css/rst.css - >>> print(get_asset_path('assets/css/theme.css', ['site', 'default'])) - nikola/data/themes/site/assets/css/theme.css + >>> print(get_asset_path('assets/css/theme.css', ['bootstrap', 'base'])) # doctest: +SKIP + [...]/nikola/data/themes/bootstrap/assets/css/theme.css - >>> print(get_asset_path('nikola.py', ['site', 'default'], {'nikola': ''})) - nikola/nikola.py + >>> print(get_asset_path('nikola.py', ['bootstrap', 'base'], {'nikola': ''})) # doctest: +SKIP + [...]/nikola/nikola.py - >>> print(get_asset_path('nikola/nikola.py', ['site', 'default'], - ... {'nikola':'nikola'})) - nikola/nikola.py + >>> print(get_asset_path('nikola/nikola.py', ['bootstrap', 'base'], {'nikola':'nikola'})) # doctest: +SKIP + [...]/nikola/nikola.py """ for theme_name in themes: @@ -522,23 +627,168 @@ def get_asset_path(path, themes, files_folders={'files': ''}): path ) if os.path.isfile(candidate): - return os.path.relpath(candidate, os.getcwd()) + return candidate for src, rel_dst in files_folders.items(): - candidate = os.path.join( - src, - os.path.relpath(path, rel_dst) - ) + candidate = os.path.abspath(os.path.join(src, path)) if os.path.isfile(candidate): - return os.path.relpath(candidate, os.getcwd()) + return candidate # whatever! return None -class LocaleBorg: - __shared_state = { - 'current_lang': None - } +class LocaleBorg(object): + """ + Provides locale related services and autoritative current_lang, + where current_lang is the last lang for which the locale was set. + + current_lang is meant to be set only by LocaleBorg.set_locale + + python's locale code should not be directly called from code outside of + LocaleBorg, they are compatibilty issues with py version and OS support + better handled at one central point, LocaleBorg. + + In particular, don't call locale.setlocale outside of LocaleBorg. + + Assumptions: + We need locales only for the languages there is a nikola translation. + We don't need to support current_lang through nested contexts + + Usage: + # early in cmd or test execution + LocaleBorg.initialize(...) + + # any time later + lang = LocaleBorg().<service> + + Available services: + .current_lang : autoritative current_lang , the last seen in set_locale + .set_locale(lang) : sets current_lang and sets the locale for lang + .get_month_name(month_no, lang) : returns the localized month name + + NOTE: never use locale.getlocale() , it can return values that + locale.setlocale will not accept in Windows XP, 7 and pythons 2.6, 2.7, 3.3 + Examples: "Spanish", "French" can't do the full circle set / get / set + That used to break calendar, but now seems is not the case, with month at least + """ + @classmethod + def initialize(cls, locales, initial_lang): + """ + locales : dict with lang: locale_n + the same keys as in nikola's TRANSLATIONS + locale_n a sanitized locale, meaning + locale.setlocale(locale.LC_ALL, locale_n) will succeed + locale_n expressed in the string form, like "en.utf8" + """ + assert initial_lang is not None and initial_lang in locales + cls.reset() + cls.locales = locales + + # needed to decode some localized output in py2x + encodings = {} + for lang in locales: + locale.setlocale(locale.LC_ALL, locales[lang]) + loc, encoding = locale.getlocale() + encodings[lang] = encoding + + cls.encodings = encodings + cls.__shared_state['current_lang'] = initial_lang + cls.initialized = True + + @classmethod + def reset(cls): + """used in testing to not leak state between tests""" + cls.locales = {} + cls.encodings = {} + cls.__shared_state = {'current_lang': None} + cls.initialized = False def __init__(self): + if not self.initialized: + raise Exception("Attempt to use LocaleBorg before initialization") self.__dict__ = self.__shared_state + + def set_locale(self, lang): + """Sets the locale for language lang, returns '' + + in linux the locale encoding is set to utf8, + in windows that cannot be guaranted. + In either case, the locale encoding is available in cls.encodings[lang] + """ + # intentional non try-except: templates must ask locales with a lang, + # let the code explode here and not hide the point of failure + # Also, not guarded with an if lang==current_lang because calendar may + # put that out of sync + locale_n = self.locales[lang] + self.__shared_state['current_lang'] = lang + locale.setlocale(locale.LC_ALL, locale_n) + return '' + + def get_month_name(self, month_no, lang): + """returns localized month name in an unicode string""" + if sys.version_info[0] == 3: # Python 3 + with calendar.different_locale(self.locales[lang]): + s = calendar.month_name[month_no] + # for py3 s is unicode + else: # Python 2 + with calendar.TimeEncoding(self.locales[lang]): + s = calendar.month_name[month_no] + s = s.decode(self.encodings[lang]) + # paranoid about calendar ending in the wrong locale (windows) + self.set_locale(self.current_lang) + return s + + +class ExtendedRSS2(rss.RSS2): + def publish_extensions(self, handler): + if self.self_url: + handler.startElement("atom:link", { + 'href': self.self_url, + 'rel': "self", + 'type': "application/rss+xml" + }) + handler.endElement("atom:link") + + +class ExtendedItem(rss.RSSItem): + + def __init__(self, **kw): + author = kw.pop('author') + if author and '@' in author[1:]: # Yes, this is a silly way to validate an email + kw['author'] = author + self.creator = None + else: + self.creator = author + # It's an old style class + return rss.RSSItem.__init__(self, **kw) + + def publish_extensions(self, handler): + if self.creator: + handler.startElement("dc:creator", {}) + handler.characters(self.creator) + handler.endElement("dc:creator") + + +# \x00 means the "<" was backslash-escaped +explicit_title_re = re.compile(r'^(.+?)\s*(?<!\x00)<(.*?)>$', re.DOTALL) + + +def split_explicit_title(text): + """Split role content into title and target, if given. + + From Sphinx's "sphinx/util/nodes.py" + """ + match = explicit_title_re.match(text) + if match: + return True, match.group(1), match.group(2) + return False, text, text + + +def first_line(doc): + """extract first non-blank line from text, to extract docstring title""" + if doc is not None: + for line in doc.splitlines(): + striped = line.strip() + if striped: + return striped + return '' |
