diff options
Diffstat (limited to 'nikola/utils.py')
| -rw-r--r-- | nikola/utils.py | 1130 |
1 files changed, 633 insertions, 497 deletions
diff --git a/nikola/utils.py b/nikola/utils.py index 068cb3a..d029b7f 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright © 2012-2016 Roberto Alsina and others. +# Copyright © 2012-2020 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -26,15 +26,11 @@ """Utility functions.""" -from __future__ import print_function, unicode_literals, absolute_import -import calendar +import configparser import datetime -import dateutil.tz import hashlib import io -import locale -import logging -import natsort +import operator import os import re import json @@ -42,131 +38,87 @@ import shutil import socket import subprocess import sys +import threading +import typing +from collections import defaultdict, OrderedDict +from collections.abc import Callable, Iterable +from html import unescape as html_unescape +from importlib import reload as _reload +from unicodedata import normalize as unicodenormalize +from urllib.parse import quote as urlquote +from urllib.parse import unquote as urlunquote +from urllib.parse import urlparse, urlunparse +from zipfile import ZipFile as zipf + +import babel.dates import dateutil.parser import dateutil.tz -import logbook -try: - from urllib import quote as urlquote - from urllib import unquote as urlunquote - from urlparse import urlparse, urlunparse -except ImportError: - from urllib.parse import quote as urlquote # NOQA - from urllib.parse import unquote as urlunquote # NOQA - from urllib.parse import urlparse, urlunparse # NOQA -import warnings +import pygments.formatters +import pygments.formatters._mapping import PyRSS2Gen as rss +from blinker import signal +from doit import tools +from doit.cmdparse import CmdParse +from pkg_resources import resource_filename +from nikola.packages.pygments_better_html import BetterHtmlFormatter +from unidecode import unidecode + +# Renames +from nikola import DEBUG # NOQA +from .log import LOGGER, get_logger # NOQA +from .hierarchy_utils import TreeNode, clone_treenode, flatten_tree_structure, sort_classifications +from .hierarchy_utils import join_hierarchical_category_path, parse_escaped_hierarchical_category_name + try: - import pytoml as toml + import toml except ImportError: toml = None + try: - import yaml + from ruamel.yaml import YAML except ImportError: - yaml = None + YAML = None + try: import husl except ImportError: husl = None -from collections import defaultdict, Callable, OrderedDict -from logbook.compat import redirect_logging -from logbook.more import ExceptionHandler, ColorizedStderrHandler -from pygments.formatters import HtmlFormatter -from zipfile import ZipFile as zipf -from doit import tools -from unidecode import unidecode -from unicodedata import normalize as unicodenormalize -from pkg_resources import resource_filename -from doit.cmdparse import CmdParse - -from nikola import DEBUG - -__all__ = ('CustomEncoder', 'get_theme_path', 'get_theme_path_real', 'get_theme_chain', 'load_messages', 'copy_tree', - 'copy_file', 'slugify', 'unslugify', 'to_datetime', 'apply_filters', +__all__ = ('CustomEncoder', 'get_theme_path', 'get_theme_path_real', + 'get_theme_chain', 'load_messages', 'copy_tree', 'copy_file', + 'slugify', 'unslugify', 'to_datetime', 'apply_filters', 'config_changed', 'get_crumbs', 'get_tzname', 'get_asset_path', - '_reload', 'unicode_str', 'bytes_str', 'unichr', 'Functionary', - 'TranslatableSetting', 'TemplateHookRegistry', 'LocaleBorg', + '_reload', 'Functionary', 'TranslatableSetting', + 'TemplateHookRegistry', 'LocaleBorg', 'sys_encode', 'sys_decode', 'makedirs', 'get_parent_theme_name', 'demote_headers', 'get_translation_candidate', 'write_metadata', 'ask', 'ask_yesno', 'options2docstring', 'os_path_split', 'get_displayed_page_number', 'adjust_name_for_index_path_list', 'adjust_name_for_index_path', 'adjust_name_for_index_link', - 'NikolaPygmentsHTML', 'create_redirect', 'TreeNode', - 'flatten_tree_structure', 'parse_escaped_hierarchical_category_name', - 'join_hierarchical_category_path', 'clean_before_deployment', 'indent', - 'load_data') + 'NikolaPygmentsHTML', 'create_redirect', 'clean_before_deployment', + 'sort_posts', 'smartjoin', 'indent', 'load_data', 'html_unescape', + 'rss_writer', 'map_metadata', 'req_missing', + # Deprecated, moved to hierarchy_utils: + 'TreeNode', 'clone_treenode', 'flatten_tree_structure', + 'sort_classifications', 'join_hierarchical_category_path', + 'parse_escaped_hierarchical_category_name',) # Are you looking for 'generic_rss_renderer'? # It's defined in nikola.nikola.Nikola (the site object). -if sys.version_info[0] == 3: - # Python 3 - bytes_str = bytes - unicode_str = str - unichr = chr - raw_input = input - from imp import reload as _reload -else: - bytes_str = str - unicode_str = unicode # NOQA - _reload = reload # NOQA - unichr = unichr - - -class ApplicationWarning(Exception): - pass - - -class ColorfulStderrHandler(ColorizedStderrHandler): - """Stream handler with colors.""" +# Aliases, previously for Python 2/3 compatibility. +# TODO remove in v9 +bytes_str = bytes +unicode_str = str +unichr = chr - _colorful = False +# For compatibility with old logging setups. +# TODO remove in v9? +STDERR_HANDLER = None - def should_colorize(self, record): - """Inform about colorization using the value obtained from Nikola.""" - return self._colorful - - -def get_logger(name, handlers): - """Get a logger with handlers attached.""" - l = logbook.Logger(name) - for h in handlers: - if isinstance(h, list): - l.handlers += h - else: - l.handlers.append(h) - return l - - -STDERR_HANDLER = [ColorfulStderrHandler( - level=logbook.INFO if not DEBUG else logbook.DEBUG, - format_string=u'[{record.time:%Y-%m-%dT%H:%M:%SZ}] {record.level_name}: {record.channel}: {record.message}' -)] - - -LOGGER = get_logger('Nikola', STDERR_HANDLER) -STRICT_HANDLER = ExceptionHandler(ApplicationWarning, level='WARNING') USE_SLUGIFY = True -redirect_logging() - -if DEBUG: - logging.basicConfig(level=logging.DEBUG) -else: - logging.basicConfig(level=logging.INFO) - - -def showwarning(message, category, filename, lineno, file=None, line=None): - """Show a warning (from the warnings module) to the user.""" - try: - n = category.__name__ - except AttributeError: - n = str(category) - get_logger(n, STDERR_HANDLER).warn('{0}:{1}: {2}'.format(filename, lineno, message)) - -warnings.showwarning = showwarning - def req_missing(names, purpose, python=True, optional=False): """Log that we are missing some requirements. @@ -205,7 +157,7 @@ def req_missing(names, purpose, python=True, optional=False): purpose, pnames, whatarethey_p) if optional: - LOGGER.warn(msg) + LOGGER.warning(msg) else: LOGGER.error(msg) LOGGER.error('Exiting due to missing dependencies.') @@ -214,20 +166,19 @@ def req_missing(names, purpose, python=True, optional=False): return msg -from nikola import filters as task_filters # NOQA ENCODING = sys.getfilesystemencoding() or sys.stdin.encoding def sys_encode(thing): """Return bytes encoded in the system's encoding.""" - if isinstance(thing, unicode_str): + if isinstance(thing, str): return thing.encode(ENCODING) return thing def sys_decode(thing): """Return Unicode.""" - if isinstance(thing, bytes_str): + if isinstance(thing, bytes): return thing.decode(ENCODING) return thing @@ -255,7 +206,7 @@ class Functionary(defaultdict): def __init__(self, default, default_lang): """Initialize a functionary.""" - super(Functionary, self).__init__(default) + super().__init__(default) self.default_lang = default_lang def __call__(self, key, lang=None): @@ -292,7 +243,7 @@ class TranslatableSetting(object): def __getattribute__(self, attr): """Return attributes, falling back to string attributes.""" try: - return super(TranslatableSetting, self).__getattribute__(attr) + return super().__getattribute__(attr) except AttributeError: return self().__getattribute__(attr) @@ -356,15 +307,11 @@ class TranslatableSetting(object): def __str__(self): """Return the value in the currently set language (deprecated).""" - return self.values[self.get_lang()] - - def __unicode__(self): - """Return the value in the currently set language (deprecated).""" - return self.values[self.get_lang()] + return str(self.values[self.get_lang()]) def __repr__(self): """Provide a representation for programmers.""" - return '<TranslatableSetting: {0!r}>'.format(self.name) + return '<TranslatableSetting: {0!r} = {1!r}>'.format(self.name, self._inp) def format(self, *args, **kwargs): """Format ALL the values in the setting the same way.""" @@ -465,9 +412,8 @@ class TemplateHookRegistry(object): >>> r = TemplateHookRegistry('foo', None) >>> r.append('Hello!') >>> r.append(lambda x: 'Hello ' + x + '!', False, 'world') - >>> str(r()) # str() call is not recommended in real use + >>> repr(r()) 'Hello!\nHello world!' - >>> """ def __init__(self, name, site): @@ -509,9 +455,23 @@ class TemplateHookRegistry(object): c = callable(inp) self._items.append((c, inp, wants_site_and_context, args, kwargs)) + def calculate_deps(self): + """Calculate dependencies for a registry.""" + deps = [] + for is_callable, inp, wants_site_and_context, args, kwargs in self._items: + if not is_callable: + name = inp + elif hasattr(inp, 'template_registry_identifier'): + name = inp.template_registry_identifier + elif hasattr(inp, '__doc__'): + name = inp.__doc__ + else: + name = '_undefined_callable_' + deps.append((is_callable, name, wants_site_and_context, args, kwargs)) + def __hash__(self): """Return hash of a registry.""" - return hash(config_changed({self.name: self._items})._calc_digest()) + return hash(config_changed({self.name: self.calculate_deps()})._calc_digest()) def __str__(self): """Stringify a registry.""" @@ -526,12 +486,14 @@ class CustomEncoder(json.JSONEncoder): """Custom JSON encoder.""" def default(self, obj): - """Default encoding handler.""" + """Create default encoding handler.""" try: - return super(CustomEncoder, self).default(obj) + return super().default(obj) except TypeError: if isinstance(obj, (set, frozenset)): return self.encode(sorted(list(obj))) + elif isinstance(obj, TranslatableSetting): + s = json.dumps(obj._inp, cls=CustomEncoder, sort_keys=True) else: s = repr(obj).split('0x', 1)[0] return s @@ -542,11 +504,34 @@ class config_changed(tools.config_changed): def __init__(self, config, identifier=None): """Initialize config_changed.""" - super(config_changed, self).__init__(config) + super().__init__(config) self.identifier = '_config_changed' if identifier is not None: self.identifier += ':' + identifier + # DEBUG (for unexpected rebuilds) + @classmethod + def _write_into_debug_db(cls, digest: str, data: str) -> None: # pragma: no cover + """Write full values of config_changed into a sqlite3 database.""" + import sqlite3 + try: + cls.debug_db_cursor + except AttributeError: + cls.debug_db_conn = sqlite3.connect("cc_debug.sqlite3") + cls.debug_db_id = datetime.datetime.now().isoformat() + cls.debug_db_cursor = cls.debug_db_conn.cursor() + cls.debug_db_cursor.execute(""" + CREATE TABLE IF NOT EXISTS hashes (hash CHARACTER(32) PRIMARY KEY, json_data TEXT); + """) + cls.debug_db_conn.commit() + + try: + cls.debug_db_cursor.execute("INSERT INTO hashes (hash, json_data) VALUES (?, ?);", (digest, data)) + cls.debug_db_conn.commit() + except sqlite3.IntegrityError: + # ON CONFLICT DO NOTHING, except Ubuntu 16.04’s sqlite3 is too ancient for this + cls.debug_db_conn.rollback() + def _calc_digest(self): """Calculate a config_changed digest.""" if isinstance(self.config, str): @@ -558,9 +543,14 @@ class config_changed(tools.config_changed): else: byte_data = data digest = hashlib.md5(byte_data).hexdigest() + + # DEBUG (for unexpected rebuilds) + # self._write_into_debug_db(digest, data) + # Alternative (without database): # LOGGER.debug('{{"{0}": {1}}}'.format(digest, byte_data)) # Humanized format: # LOGGER.debug('[Digest {0} for {2}]\n{1}\n[Digest {0} for {2}]'.format(digest, byte_data, self.identifier)) + return digest else: raise Exception('Invalid type of config_changed parameter -- got ' @@ -605,27 +595,52 @@ def get_theme_path(theme): return theme +def parse_theme_meta(theme_dir): + """Parse a .theme meta file.""" + cp = configparser.ConfigParser() + # The `or` case is in case theme_dir ends with a trailing slash + theme_name = os.path.basename(theme_dir) or os.path.basename(os.path.dirname(theme_dir)) + theme_meta_path = os.path.join(theme_dir, theme_name + '.theme') + cp.read(theme_meta_path) + return cp if cp.has_section('Theme') else None + + def get_template_engine(themes): """Get template engine used by a given theme.""" for theme_name in themes: - engine_path = os.path.join(theme_name, 'engine') - if os.path.isfile(engine_path): - with open(engine_path) as fd: - return fd.readlines()[0].strip() + meta = parse_theme_meta(theme_name) + if meta: + e = meta.get('Theme', 'engine', fallback=None) + if e: + return e + else: + # Theme still uses old-style parent/engine files + engine_path = os.path.join(theme_name, 'engine') + if os.path.isfile(engine_path): + with open(engine_path) as fd: + return fd.readlines()[0].strip() # default return 'mako' def get_parent_theme_name(theme_name, themes_dirs=None): """Get name of parent theme.""" - parent_path = os.path.join(theme_name, 'parent') - if os.path.isfile(parent_path): - with open(parent_path) as fd: - parent = fd.readlines()[0].strip() - if themes_dirs: + meta = parse_theme_meta(theme_name) + if meta: + parent = meta.get('Theme', 'parent', fallback=None) + if themes_dirs and parent: return get_theme_path_real(parent, themes_dirs) return parent - return None + else: + # Theme still uses old-style parent/engine files + parent_path = os.path.join(theme_name, 'parent') + if os.path.isfile(parent_path): + with open(parent_path) as fd: + parent = fd.readlines()[0].strip() + if themes_dirs: + return get_theme_path_real(parent, themes_dirs) + return parent + return None def get_theme_chain(theme, themes_dirs): @@ -641,7 +656,7 @@ def get_theme_chain(theme, themes_dirs): return themes -language_incomplete_warned = [] +INCOMPLETE_LANGUAGES_WARNED = set() class LanguageNotFoundError(Exception): @@ -665,38 +680,50 @@ def load_messages(themes, translations, default_lang, themes_dirs): """ messages = Functionary(dict, default_lang) oldpath = list(sys.path) + found = {lang: False for lang in translations.keys()} + last_exception = None + completion_status = {lang: False for lang in translations.keys()} for theme_name in themes[::-1]: msg_folder = os.path.join(get_theme_path(theme_name), 'messages') default_folder = os.path.join(get_theme_path_real('base', themes_dirs), 'messages') sys.path.insert(0, default_folder) sys.path.insert(0, msg_folder) + english = __import__('messages_en') # If we don't do the reload, the module is cached _reload(english) - for lang in list(translations.keys()): + for lang in translations.keys(): try: translation = __import__('messages_' + lang) # If we don't do the reload, the module is cached _reload(translation) - if sorted(translation.MESSAGES.keys()) !=\ - sorted(english.MESSAGES.keys()) and \ - lang not in language_incomplete_warned: - language_incomplete_warned.append(lang) - LOGGER.warn("Incomplete translation for language " - "'{0}'.".format(lang)) + found[lang] = True + if sorted(translation.MESSAGES.keys()) != sorted(english.MESSAGES.keys()): + completion_status[lang] = completion_status[lang] or False + else: + completion_status[lang] = True + messages[lang].update(english.MESSAGES) for k, v in translation.MESSAGES.items(): if v: messages[lang][k] = v del(translation) except ImportError as orig: - raise LanguageNotFoundError(lang, orig) + last_exception = orig del(english) - sys.path = oldpath + sys.path = oldpath + + if not all(found.values()): + raise LanguageNotFoundError(lang, last_exception) + for lang, status in completion_status.items(): + if not status and lang not in INCOMPLETE_LANGUAGES_WARNED: + LOGGER.warning("Incomplete translation for language '{0}'.".format(lang)) + INCOMPLETE_LANGUAGES_WARNED.add(lang) + return messages -def copy_tree(src, dst, link_cutoff=None): +def copy_tree(src, dst, link_cutoff=None, ignored_filenames=None): """Copy a src tree to the dst folder. Example: @@ -707,11 +734,13 @@ def copy_tree(src, dst, link_cutoff=None): should copy "themes/defauts/assets/foo/bar" to "output/assets/foo/bar" - if link_cutoff is set, then the links pointing at things + If link_cutoff is set, then the links pointing at things *inside* that folder will stay as links, and links pointing *outside* that folder will be copied. + + ignored_filenames is a set of file names that will be ignored. """ - ignore = set(['.svn']) + ignore = set(['.svn', '.git']) | (ignored_filenames or set()) base_len = len(src.split(os.sep)) for root, dirs, files in os.walk(src, followlinks=True): root_parts = root.split(os.sep) @@ -761,11 +790,12 @@ def remove_file(source): elif os.path.isfile(source) or os.path.islink(source): os.remove(source) + # slugify is adopted from # http://code.activestate.com/recipes/ # 577257-slugify-make-a-string-usable-in-a-url-or-filename/ -_slugify_strip_re = re.compile(r'[^+\w\s-]') -_slugify_hyphenate_re = re.compile(r'[-\s]+') +_slugify_strip_re = re.compile(r'[^+\w\s-]', re.UNICODE) +_slugify_hyphenate_re = re.compile(r'[-\s]+', re.UNICODE) def slugify(value, lang=None, force=False): @@ -782,16 +812,14 @@ def slugify(value, lang=None, force=False): >>> print(slugify('foo bar', lang='en')) foo-bar """ - if lang is None: # TODO: remove in v8 - LOGGER.warn("slugify() called without language!") - if not isinstance(value, unicode_str): + if not isinstance(value, str): raise ValueError("Not a unicode object: {0}".format(value)) if USE_SLUGIFY or force: # This is the standard state of slugify, which actually does some work. # It is the preferred style, especially for Western languages. - value = unicode_str(unidecode(value)) - value = _slugify_strip_re.sub('', value, re.UNICODE).strip().lower() - return _slugify_hyphenate_re.sub('-', value, re.UNICODE) + value = str(unidecode(value)) + value = _slugify_strip_re.sub('', value).strip().lower() + return _slugify_hyphenate_re.sub('-', value) else: # This is the “disarmed” state of slugify, which lets the user # have any character they please (be it regular ASCII with spaces, @@ -814,11 +842,9 @@ def unslugify(value, lang=None, discard_numbers=True): If discard_numbers is True, numbers right at the beginning of input will be removed. """ - if lang is None: # TODO: remove in v8 - LOGGER.warn("unslugify() called without language!") if discard_numbers: value = re.sub('^[0-9]+', '', value) - value = re.sub('([_\-\.])', ' ', value) + value = re.sub(r'([_\-\.])', ' ', value) value = value.strip().capitalize() return value @@ -835,6 +861,16 @@ def encodelink(iri): encoded_link = urlunparse(link.values()) return encoded_link + +def full_path_from_urlparse(parsed) -> str: + """Given urlparse output, return the full path (with query and fragment).""" + dst = parsed.path + if parsed.query: + dst = "{0}?{1}".format(dst, parsed.query) + if parsed.fragment: + dst = "{0}#{1}".format(dst, parsed.fragment) + return dst + # A very slightly safer version of zip.extractall that works on # python < 2.6 @@ -868,6 +904,8 @@ def extract_all(zipfile, path='themes'): def to_datetime(value, tzinfo=None): """Convert string to datetime.""" try: + if type(value) == datetime.date: + value = datetime.datetime.combine(value, datetime.time(0, 0)) if not isinstance(value, datetime.datetime): # dateutil does bad things with TZs like UTC-03:00. dateregexp = re.compile(r' UTC([+-][0-9][0-9]:[0-9][0-9])') @@ -898,6 +936,9 @@ def current_time(tzinfo=None): return dt +from nikola import filters as task_filters # NOQA + + def apply_filters(task, filters, skip_ext=None): """Apply filters to a task. @@ -916,11 +957,11 @@ def apply_filters(task, filters, skip_ext=None): if isinstance(key, (tuple, list)): if ext in key: return value - elif isinstance(key, (bytes_str, unicode_str)): + elif isinstance(key, (bytes, str)): if ext == key: return value else: - assert False, key + raise ValueError("Cannot find filter match for {0}".format(key)) for target in task.get('targets', []): ext = os.path.splitext(target)[-1].lower() @@ -949,26 +990,26 @@ def get_crumbs(path, is_file=False, index_folder=None, lang=None): >>> crumbs = get_crumbs('galleries') >>> len(crumbs) 1 - >>> print('|'.join(crumbs[0])) - #|galleries + >>> crumbs[0] + ['#', 'galleries'] >>> crumbs = get_crumbs(os.path.join('galleries','demo')) >>> len(crumbs) 2 - >>> print('|'.join(crumbs[0])) - ..|galleries - >>> print('|'.join(crumbs[1])) - #|demo + >>> crumbs[0] + ['..', 'galleries'] + >>> crumbs[1] + ['#', 'demo'] >>> crumbs = get_crumbs(os.path.join('listings','foo','bar'), is_file=True) >>> len(crumbs) 3 - >>> print('|'.join(crumbs[0])) - ..|listings - >>> print('|'.join(crumbs[1])) - .|foo - >>> print('|'.join(crumbs[2])) - #|bar + >>> crumbs[0] + ['..', 'listings'] + >>> crumbs[1] + ['.', 'foo'] + >>> crumbs[2] + ['#', 'bar'] """ crumbs = path.split(os.sep) _crumbs = [] @@ -1009,8 +1050,8 @@ def get_asset_path(path, themes, files_folders={'files': ''}, output_dir='output If it's not provided by either, it will be chacked in output, where it may have been created by another plugin. - >>> print(get_asset_path('assets/css/rst.css', get_theme_chain('bootstrap3', ['themes']))) - /.../nikola/data/themes/base/assets/css/rst.css + >>> print(get_asset_path('assets/css/nikola_rst.css', get_theme_chain('bootstrap3', ['themes']))) + /.../nikola/data/themes/base/assets/css/nikola_rst.css >>> print(get_asset_path('assets/css/theme.css', get_theme_chain('bootstrap3', ['themes']))) /.../nikola/data/themes/bootstrap3/assets/css/theme.css @@ -1050,24 +1091,49 @@ class LocaleBorgUninitializedException(Exception): def __init__(self): """Initialize exception.""" - super(LocaleBorgUninitializedException, self).__init__("Attempt to use LocaleBorg before initialization") + super().__init__("Attempt to use LocaleBorg before initialization") + + +# Customized versions of babel.dates functions that don't do weird stuff with +# timezones. Without these fixes, DST would follow local settings (because +# dateutil’s timezones return stuff depending on their input, and datetime.time +# objects have no year/month/day to base the information on. +def format_datetime(datetime=None, format='medium', + locale=babel.dates.LC_TIME): + """Format a datetime object.""" + locale = babel.dates.Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + return babel.dates.get_datetime_format(format, locale=locale) \ + .replace("'", "") \ + .replace('{0}', format_time(datetime, format, locale=locale)) \ + .replace('{1}', babel.dates.format_date(datetime, format, locale=locale)) + else: + return babel.dates.parse_pattern(format).apply(datetime, locale) -class LocaleBorg(object): - """Provide locale related services and autoritative current_lang. +def format_time(time=None, format='medium', locale=babel.dates.LC_TIME): + """Format time. Input can be datetime.time or datetime.datetime.""" + locale = babel.dates.Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + format = babel.dates.get_time_format(format, locale=locale) + return babel.dates.parse_pattern(format).apply(time, locale) - current_lang is the last lang for which the locale was set - and is meant to be set only by LocaleBorg.set_locale. - python's locale code should not be directly called from code outside of - LocaleBorg, they are compatibilty issues with py version and OS support - better handled at one central point, LocaleBorg. +def format_skeleton(skeleton, datetime=None, fo=None, fuzzy=True, + locale=babel.dates.LC_TIME): + """Format a datetime based on a skeleton.""" + locale = babel.dates.Locale.parse(locale) + if fuzzy and skeleton not in locale.datetime_skeletons: + skeleton = babel.dates.match_skeleton(skeleton, locale.datetime_skeletons) + format = locale.datetime_skeletons[skeleton] + return format_datetime(datetime, format, locale) - In particular, don't call locale.setlocale outside of LocaleBorg. - Assumptions: - We need locales only for the languages there is a nikola translation. - We don't need to support current_lang through nested contexts +class LocaleBorg(object): + """Provide locale related services and autoritative current_lang. + + This class stores information about the locales used and interfaces + with the Babel library to provide internationalization services. Usage: # early in cmd or test execution @@ -1077,46 +1143,39 @@ class LocaleBorg(object): lang = LocaleBorg().<service> Available services: - .current_lang : autoritative current_lang , the last seen in set_locale - .set_locale(lang) : sets current_lang and sets the locale for lang - .get_month_name(month_no, lang) : returns the localized month name + .current_lang: autoritative current_lang, the last seen in set_locale + .formatted_date: format a date(time) according to locale rules + .format_date_in_string: take a message and format the date in it - NOTE: never use locale.getlocale() , it can return values that - locale.setlocale will not accept in Windows XP, 7 and pythons 2.6, 2.7, 3.3 - Examples: "Spanish", "French" can't do the full circle set / get / set + The default implementation uses the Babel package and completely ignores + the Python `locale` module. If you wish to override this, write functions + and assign them to the appropriate names. The functions are: + + * LocaleBorg.datetime_formatter(date, date_format, lang, locale) + * LocaleBorg.in_string_formatter(date, mode, custom_format, lang, locale) """ initialized = False + # Can be used to override Babel + datetime_formatter = None + in_string_formatter = None + @classmethod - def initialize(cls, locales, initial_lang): + def initialize(cls, locales: 'typing.Dict[str, str]', initial_lang: str): """Initialize LocaleBorg. - locales : dict with lang: locale_n - the same keys as in nikola's TRANSLATIONS - locale_n a sanitized locale, meaning - locale.setlocale(locale.LC_ALL, locale_n) will succeed - locale_n expressed in the string form, like "en.utf8" + locales: dict with custom locale name overrides. """ - assert initial_lang is not None and initial_lang in locales + if not initial_lang: + raise ValueError("Unknown initial language {0}".format(initial_lang)) cls.reset() cls.locales = locales - cls.month_name_handlers = [] - cls.formatted_date_handlers = [] - - # needed to decode some localized output in py2x - encodings = {} - for lang in locales: - locale.setlocale(locale.LC_ALL, locales[lang]) - loc, encoding = locale.getlocale() - encodings[lang] = encoding - - cls.encodings = encodings cls.__initial_lang = initial_lang cls.initialized = True def __get_shared_state(self): - if not self.initialized: + if not self.initialized: # pragma: no cover raise LocaleBorgUninitializedException() shared_state = getattr(self.__thread_local, 'shared_state', None) if shared_state is None: @@ -1130,38 +1189,14 @@ class LocaleBorg(object): Used in testing to prevent leaking state between tests. """ - import threading cls.__thread_local = threading.local() cls.__thread_lock = threading.Lock() cls.locales = {} - cls.encodings = {} cls.initialized = False - cls.month_name_handlers = [] - cls.formatted_date_handlers = [] cls.thread_local = None - cls.thread_lock = None - - @classmethod - def add_handler(cls, month_name_handler=None, formatted_date_handler=None): - """Allow to add month name and formatted date handlers. - - If month_name_handler is not None, it is expected to be a callable - which accepts (month_no, lang) and returns either a string or None. - - If formatted_date_handler is not None, it is expected to be a callable - which accepts (date_format, date, lang) and returns either a string or - None. - - A handler is expected to either return the correct result for the given - language and data, or return None to indicate it is not able to do the - job. In that case, the next handler is asked, and finally the default - implementation is used. - """ - if month_name_handler is not None: - cls.month_name_handlers.append(month_name_handler) - if formatted_date_handler is not None: - cls.formatted_date_handlers.append(formatted_date_handler) + cls.datetime_formatter = None + cls.in_string_formatter = None def __init__(self): """Initialize.""" @@ -1169,79 +1204,68 @@ class LocaleBorg(object): raise LocaleBorgUninitializedException() @property - def current_lang(self): + def current_lang(self) -> str: """Return the current language.""" return self.__get_shared_state()['current_lang'] - def __set_locale(self, lang): - """Set the locale for language lang without updating current_lang.""" - locale_n = self.locales[lang] - locale.setlocale(locale.LC_ALL, locale_n) - - def set_locale(self, lang): - """Set the locale for language lang, returns an empty string. - - in linux the locale encoding is set to utf8, - in windows that cannot be guaranted. - In either case, the locale encoding is available in cls.encodings[lang] - """ + def set_locale(self, lang: str) -> str: + """Set the current language and return an empty string (to make use in templates easier).""" with self.__thread_lock: - # intentional non try-except: templates must ask locales with a lang, - # let the code explode here and not hide the point of failure - # Also, not guarded with an if lang==current_lang because calendar may - # put that out of sync - self.__set_locale(lang) self.__get_shared_state()['current_lang'] = lang return '' - def get_month_name(self, month_no, lang): - """Return localized month name in an unicode string.""" - # For thread-safety - with self.__thread_lock: - for handler in self.month_name_handlers: - res = handler(month_no, lang) - if res is not None: - return res - old_lang = self.current_lang - self.__set_locale(lang) - s = calendar.month_name[month_no] - self.__set_locale(old_lang) - if sys.version_info[0] == 2: - enc = self.encodings[lang] - if not enc: - enc = 'UTF-8' - - s = s.decode(enc) - return s + def formatted_date(self, date_format: 'str', + date: 'typing.Union[datetime.date, datetime.datetime]', + lang: 'typing.Optional[str]' = None) -> str: + """Return the formatted date/datetime as a string.""" + if lang is None: + lang = self.current_lang + locale = self.locales.get(lang, lang) + # Get a string out of a TranslatableSetting + if isinstance(date_format, TranslatableSetting): + date_format = date_format(lang) + + # Always ask Python if the date_format is webiso + if date_format == 'webiso': + # Formatted after RFC 3339 (web ISO 8501 profile) with Zulu + # zone designator for times in UTC and no microsecond precision. + return date.replace(microsecond=0).isoformat().replace('+00:00', 'Z') + elif LocaleBorg.datetime_formatter is not None: + return LocaleBorg.datetime_formatter(date, date_format, lang, locale) + else: + return format_datetime(date, date_format, locale=locale) - def formatted_date(self, date_format, date): - """Return the formatted date as unicode.""" - with self.__thread_lock: - current_lang = self.current_lang - # For thread-safety - self.__set_locale(current_lang) - fmt_date = None - # Get a string out of a TranslatableSetting - if isinstance(date_format, TranslatableSetting): - date_format = date_format(current_lang) - # First check handlers - for handler in self.formatted_date_handlers: - fmt_date = handler(date_format, date, current_lang) - if fmt_date is not None: - break - # If no handler was able to format the date, ask Python - if fmt_date is None: - if date_format == 'webiso': - # Formatted after RFC 3339 (web ISO 8501 profile) with Zulu - # zone desgignator for times in UTC and no microsecond precision. - fmt_date = date.replace(microsecond=0).isoformat().replace('+00:00', 'Z') + def format_date_in_string(self, message: str, date: datetime.date, lang: 'typing.Optional[str]' = None) -> str: + """Format date inside a string (message). + + Accepted modes: month, month_year, month_day_year. + Format: {month} for standard, {month:MMMM} for customization. + """ + modes = { + 'month': ('date', 'LLLL'), + 'month_year': ('skeleton', 'yMMMM'), + 'month_day_year': ('date', 'long') + } + + if lang is None: + lang = self.current_lang + locale = self.locales.get(lang, lang) + + def date_formatter(match: typing.Match) -> str: + """Format a date as requested.""" + mode, custom_format = match.groups() + if LocaleBorg.in_string_formatter is not None: + return LocaleBorg.in_string_formatter(date, mode, custom_format, lang, locale) + elif custom_format: + return babel.dates.format_date(date, custom_format, locale) + else: + function, fmt = modes[mode] + if function == 'skeleton': + return format_skeleton(fmt, date, locale=locale) else: - fmt_date = date.strftime(date_format) + return babel.dates.format_date(date, fmt, locale) - # Issue #383, this changes from py2 to py3 - if isinstance(fmt_date, bytes_str): - fmt_date = fmt_date.decode('utf8') - return fmt_date + return re.sub(r'{(.*?)(?::(.*?))?}', date_formatter, message) class ExtendedRSS2(rss.RSS2): @@ -1253,8 +1277,7 @@ class ExtendedRSS2(rss.RSS2): """Publish a feed.""" if self.xsl_stylesheet_href: handler.processingInstruction("xml-stylesheet", 'type="text/xsl" href="{0}" media="all"'.format(self.xsl_stylesheet_href)) - # old-style class in py2 - rss.RSS2.publish(self, handler) + super().publish(handler) def publish_extensions(self, handler): """Publish extensions.""" @@ -1272,9 +1295,10 @@ class ExtendedItem(rss.RSSItem): def __init__(self, **kw): """Initialize RSS item.""" - self.creator = kw.pop('creator') + self.creator = kw.pop('creator', None) + # It's an old style class - return rss.RSSItem.__init__(self, **kw) + rss.RSSItem.__init__(self, **kw) def publish_extensions(self, handler): """Publish extensions.""" @@ -1314,24 +1338,34 @@ def demote_headers(doc, level=1): if level == 0: return doc elif level > 0: - r = range(1, 7 - level) + levels = range(1, 7 - (level - 1)) + levels = reversed(levels) elif level < 0: - r = range(1 + level, 7) - for i in reversed(r): - # html headers go to 6, so we can’t “lower” beneath five - elements = doc.xpath('//h' + str(i)) - for e in elements: - e.tag = 'h' + str(i + level) + levels = range(2 + level, 7) + + for before in levels: + after = before + level + if after < 1: + # html headers can't go lower than 1 + after = 1 + elif after > 6: + # html headers go until 6 + after = 6 + + if before == after: + continue + + elements = doc.xpath('//h{}'.format(before)) + new_tag = 'h{}'.format(after) + for element in elements: + element.tag = new_tag def get_root_dir(): """Find root directory of nikola site by looking for conf.py.""" root = os.getcwd() - if sys.version_info[0] == 2: - confname = b'conf.py' - else: - confname = 'conf.py' + confname = 'conf.py' while True: if os.path.exists(os.path.join(root, confname)): @@ -1385,7 +1419,7 @@ def get_translation_candidate(config, path, lang): # This will still break if the user has ?*[]\ in the pattern. But WHY WOULD HE? pattern = pattern.replace('.', r'\.') pattern = pattern.replace('{path}', '(?P<path>.+?)') - pattern = pattern.replace('{ext}', '(?P<ext>[^\./]+)') + pattern = pattern.replace('{ext}', r'(?P<ext>[^\./]+)') pattern = pattern.replace('{lang}', '(?P<lang>{0})'.format('|'.join(config['TRANSLATIONS'].keys()))) m = re.match(pattern, path) if m and all(m.groups()): # It's a translated path @@ -1406,24 +1440,59 @@ def get_translation_candidate(config, path, lang): return config['TRANSLATIONS_PATTERN'].format(path=p, ext=e, lang=lang) -def write_metadata(data): - """Write metadata.""" - order = ('title', 'slug', 'date', 'tags', 'category', 'link', 'description', 'type') - f = '.. {0}: {1}' - meta = [] - for k in order: - try: - meta.append(f.format(k, data.pop(k))) - except KeyError: - pass +def write_metadata(data, metadata_format=None, comment_wrap=False, site=None, compiler=None): + """Write metadata. - # Leftover metadata (user-specified/non-default). - for k in natsort.natsorted(list(data.keys()), alg=natsort.ns.F | natsort.ns.IC): - meta.append(f.format(k, data[k])) - - meta.append('') - - return '\n'.join(meta) + Recommended usage: pass `site`, `comment_wrap` (True, False, or a 2-tuple of start/end markers), and optionally `compiler`. Other options are for backwards compatibility. + """ + # API compatibility + if metadata_format is None and site is not None: + metadata_format = site.config.get('METADATA_FORMAT', 'nikola').lower() + if metadata_format is None: + metadata_format = 'nikola' + + if site is None: + import nikola.metadata_extractors + metadata_extractors_by = nikola.metadata_extractors.default_metadata_extractors_by() + nikola.metadata_extractors.load_defaults(site, metadata_extractors_by) + else: + metadata_extractors_by = site.metadata_extractors_by + + # Pelican is mapped to rest_docinfo, markdown_meta, or nikola. + if metadata_format == 'pelican': + if compiler and compiler.name == 'rest': + metadata_format = 'rest_docinfo' + elif compiler and compiler.name == 'markdown': + metadata_format = 'markdown_meta' + else: + # Quiet fallback. + metadata_format = 'nikola' + + default_meta = ('nikola', 'rest_docinfo', 'markdown_meta') + extractor = metadata_extractors_by['name'].get(metadata_format) + if extractor and extractor.supports_write: + extractor.check_requirements() + return extractor.write_metadata(data, comment_wrap) + elif extractor and metadata_format not in default_meta: + LOGGER.warning('Writing METADATA_FORMAT {} is not supported, using "nikola" format'.format(metadata_format)) + elif metadata_format not in default_meta: + LOGGER.warning('Unknown METADATA_FORMAT {}, using "nikola" format'.format(metadata_format)) + + if metadata_format == 'rest_docinfo': + title = data['title'] + results = [ + '=' * len(title), + title, + '=' * len(title), + '' + ] + [':{0}: {1}'.format(k, v) for k, v in data.items() if v and k != 'title'] + [''] + return '\n'.join(results) + elif metadata_format == 'markdown_meta': + results = ['{0}: {1}'.format(k, v) for k, v in data.items() if v] + ['', ''] + return '\n'.join(results) + else: # Nikola, default + from nikola.metadata_extractors import DEFAULT_EXTRACTOR + return DEFAULT_EXTRACTOR.write_metadata(data, comment_wrap) def ask(query, default=None): @@ -1432,10 +1501,7 @@ def ask(query, default=None): default_q = ' [{0}]'.format(default) else: default_q = '' - if sys.version_info[0] == 3: - inp = raw_input("{query}{default_q}: ".format(query=query, default_q=default_q)).strip() - else: - inp = raw_input("{query}{default_q}: ".format(query=query, default_q=default_q).encode('utf-8')).strip() + inp = input("{query}{default_q}: ".format(query=query, default_q=default_q)).strip() if inp or default is None: return inp else: @@ -1450,10 +1516,7 @@ def ask_yesno(query, default=None): default_q = ' [Y/n]' elif default is False: default_q = ' [y/N]' - if sys.version_info[0] == 3: - inp = raw_input("{query}{default_q} ".format(query=query, default_q=default_q)).strip() - else: - inp = raw_input("{query}{default_q} ".format(query=query, default_q=default_q).encode('utf-8')).strip() + inp = input("{query}{default_q} ".format(query=query, default_q=default_q)).strip() if inp: return inp.lower().startswith('y') elif default is not None: @@ -1502,10 +1565,6 @@ class Commands(object): # cleanup: run is doit-only, init is useless in an existing site if k in ['run', 'init']: continue - if sys.version_info[0] == 2: - k2 = bytes(k) - else: - k2 = k self._cmdnames.append(k) @@ -1516,7 +1575,7 @@ class Commands(object): # doit command: needs some help opt = v(config=self._config, **self._doitargs).get_options() nc = type( - k2, + k, (CommandWrapper,), { '__doc__': options2docstring(k, opt) @@ -1568,17 +1627,27 @@ def options2docstring(name, options): return '\n'.join(result) -class NikolaPygmentsHTML(HtmlFormatter): +class NikolaPygmentsHTML(BetterHtmlFormatter): """A Nikola-specific modification of Pygments' HtmlFormatter.""" - def __init__(self, anchor_ref, classes=None, linenos='table', linenostart=1): + def __init__(self, anchor_ref=None, classes=None, **kwargs): """Initialize formatter.""" if classes is None: classes = ['code', 'literal-block'] + if anchor_ref: + kwargs['lineanchors'] = slugify( + anchor_ref, lang=LocaleBorg().current_lang, force=True) self.nclasses = classes - super(NikolaPygmentsHTML, self).__init__( - cssclass='code', linenos=linenos, linenostart=linenostart, nowrap=False, - lineanchors=slugify(anchor_ref, lang=LocaleBorg().current_lang, force=True), anchorlinenos=True) + kwargs['cssclass'] = 'code' + if not kwargs.get('linenos'): + # Default to no line numbers (Issue #3426) + kwargs['linenos'] = False + if kwargs.get('linenos') not in {'table', 'inline', 'ol', False}: + # Map invalid values to table + kwargs['linenos'] = 'table' + kwargs['anchorlinenos'] = kwargs['linenos'] == 'table' + kwargs['nowrap'] = False + super().__init__(**kwargs) def wrap(self, source, outfile): """Wrap the ``source``, which is a generator yielding individual lines, in custom generators.""" @@ -1596,6 +1665,10 @@ class NikolaPygmentsHTML(HtmlFormatter): yield 0, '</pre>' +# For consistency, override the default formatter. +pygments.formatters._formatter_cache['HTML'] = NikolaPygmentsHTML + + def get_displayed_page_number(i, num_pages, site): """Get page number to be displayed for entry `i`.""" if not i: @@ -1621,7 +1694,7 @@ def adjust_name_for_index_path_list(path_list, i, displayed_i, lang, site, force path_list.append(index_file) if site.config["PRETTY_URLS"] and site.config["INDEXES_PRETTY_PAGE_URL"](lang) and path_list[-1] == index_file: path_schema = site.config["INDEXES_PRETTY_PAGE_URL"](lang) - if isinstance(path_schema, (bytes_str, unicode_str)): + if isinstance(path_schema, (bytes, str)): path_schema = [path_schema] else: path_schema = None @@ -1664,7 +1737,7 @@ def adjust_name_for_index_link(name, i, displayed_i, lang, site, force_addition= def create_redirect(src, dst): - """"Create a redirection.""" + """Create a redirection.""" makedirs(os.path.dirname(src)) with io.open(src, "w+", encoding="utf8") as fd: fd.write('<!DOCTYPE html>\n<head>\n<meta charset="utf-8">\n' @@ -1674,139 +1747,6 @@ def create_redirect(src, dst): '<a href="{0}">here</a>.</p>\n</body>'.format(dst)) -class TreeNode(object): - """A tree node.""" - - indent_levels = None # use for formatting comments as tree - indent_change_before = 0 # use for formatting comments as tree - indent_change_after = 0 # use for formatting comments as tree - - # The indent levels and changes allow to render a tree structure - # without keeping track of all that information during rendering. - # - # The indent_change_before is the different between the current - # comment's level and the previous comment's level; if the number - # is positive, the current level is indented further in, and if it - # is negative, it is indented further out. Positive values can be - # used to open HTML tags for each opened level. - # - # The indent_change_after is the difference between the next - # comment's level and the current comment's level. Negative values - # can be used to close HTML tags for each closed level. - # - # The indent_levels list contains one entry (index, count) per - # level, informing about the index of the current comment on that - # level and the count of comments on that level (before a comment - # of a higher level comes). This information can be used to render - # tree indicators, for example to generate a tree such as: - # - # +--- [(0,3)] - # +-+- [(1,3)] - # | +--- [(1,3), (0,2)] - # | +-+- [(1,3), (1,2)] - # | +--- [(1,3), (1,2), (0, 1)] - # +-+- [(2,3)] - # +- [(2,3), (0,1)] - # - # (The lists used as labels represent the content of the - # indent_levels property for that node.) - - def __init__(self, name, parent=None): - """Initialize node.""" - self.name = name - self.parent = parent - self.children = [] - - def get_path(self): - """Get path.""" - path = [] - curr = self - while curr is not None: - path.append(curr) - curr = curr.parent - return reversed(path) - - def get_children(self): - """Get children of a node.""" - return self.children - - -def flatten_tree_structure(root_list): - """Flatten a tree.""" - elements = [] - - def generate(input_list, indent_levels_so_far): - for index, element in enumerate(input_list): - # add to destination - elements.append(element) - # compute and set indent levels - indent_levels = indent_levels_so_far + [(index, len(input_list))] - element.indent_levels = indent_levels - # add children - children = element.get_children() - element.children_count = len(children) - generate(children, indent_levels) - - generate(root_list, []) - # Add indent change counters - level = 0 - last_element = None - for element in elements: - new_level = len(element.indent_levels) - # Compute level change before this element - change = new_level - level - if last_element is not None: - last_element.indent_change_after = change - element.indent_change_before = change - # Update variables - level = new_level - last_element = element - # Set level change after last element - if last_element is not None: - last_element.indent_change_after = -level - return elements - - -def parse_escaped_hierarchical_category_name(category_name): - """Parse a category name.""" - result = [] - current = None - index = 0 - next_backslash = category_name.find('\\', index) - next_slash = category_name.find('/', index) - while index < len(category_name): - if next_backslash == -1 and next_slash == -1: - current = (current if current else "") + category_name[index:] - index = len(category_name) - elif next_slash >= 0 and (next_backslash == -1 or next_backslash > next_slash): - result.append((current if current else "") + category_name[index:next_slash]) - current = '' - index = next_slash + 1 - next_slash = category_name.find('/', index) - else: - if len(category_name) == next_backslash + 1: - raise Exception("Unexpected '\\' in '{0}' at last position!".format(category_name)) - esc_ch = category_name[next_backslash + 1] - if esc_ch not in {'/', '\\'}: - raise Exception("Unknown escape sequence '\\{0}' in '{1}'!".format(esc_ch, category_name)) - current = (current if current else "") + category_name[index:next_backslash] + esc_ch - index = next_backslash + 2 - next_backslash = category_name.find('\\', index) - if esc_ch == '/': - next_slash = category_name.find('/', index) - if current is not None: - result.append(current) - return result - - -def join_hierarchical_category_path(category_path): - """Join a category path.""" - def escape(s): - return s.replace('\\', '\\\\').replace('/', '\\/') - - return '/'.join([escape(p) for p in category_path]) - - def colorize_str_from_base_color(string, base_color): """Find a perceptual similar color from a base color based on the hash of a string. @@ -1815,14 +1755,7 @@ def colorize_str_from_base_color(string, base_color): lightness and saturation untouched using HUSL colorspace. """ def hash_str(string, pos): - x = hashlib.md5(string.encode('utf-8')).digest()[pos] - try: - # Python 2: a string - # TODO: remove in v8 - return ord(x) - except TypeError: - # Python 3: already an integer - return x + return hashlib.md5(string.encode('utf-8')).digest()[pos] def degreediff(dega, degb): return min(abs(dega - degb), abs((degb - dega) + 360)) @@ -1840,6 +1773,13 @@ def colorize_str_from_base_color(string, base_color): return husl.husl_to_hex(h, s, l) +def colorize_str(string: str, base_color: str, presets: dict): + """Colorize a string by using a presets dict or generate one based on base_color.""" + if string in presets: + return presets[string] + return colorize_str_from_base_color(string, base_color) + + def color_hsl_adjust_hex(hexstr, adjust_h=None, adjust_s=None, adjust_l=None): """Adjust a hex color using HSL arguments, adjustments in percentages 1.0 to -1.0. Returns a hex color.""" h, s, l = husl.hex_to_husl(hexstr) @@ -1901,6 +1841,64 @@ def clean_before_deployment(site): return undeployed_posts +def sort_posts(posts, *keys): + """Sort posts by a given predicate. Helper function for templates. + + If a key starts with '-', it is sorted in descending order. + + Usage examples:: + + sort_posts(timeline, 'title', 'date') + sort_posts(timeline, 'author', '-section_name') + """ + # We reverse the keys to get the usual ordering method: the first key + # provided is the most important sorting predicate (first by 'title', then + # by 'date' in the first example) + for key in reversed(keys): + if key.startswith('-'): + key = key[1:] + reverse = True + else: + reverse = False + try: + # An attribute (or method) of the Post object + a = getattr(posts[0], key) + if callable(a): + keyfunc = operator.methodcaller(key) + else: + keyfunc = operator.attrgetter(key) + except AttributeError: + # Post metadata + keyfunc = operator.methodcaller('meta', key) + + posts = sorted(posts, reverse=reverse, key=keyfunc) + return posts + + +def smartjoin(join_char: str, string_or_iterable) -> str: + """Join string_or_iterable with join_char if it is iterable; otherwise converts it to string. + + >>> smartjoin('; ', 'foo, bar') + 'foo, bar' + >>> smartjoin('; ', ['foo', 'bar']) + 'foo; bar' + >>> smartjoin(' to ', ['count', 42]) + 'count to 42' + """ + if isinstance(string_or_iterable, (str, bytes)): + return string_or_iterable + elif isinstance(string_or_iterable, Iterable): + return join_char.join([str(e) for e in string_or_iterable]) + else: + return str(string_or_iterable) + + +def _smartjoin_filter(string_or_iterable, join_char: str) -> str: + """Join stuff smartly, with reversed arguments for Jinja2 filters.""" + # http://jinja.pocoo.org/docs/2.10/api/#custom-filters + return smartjoin(join_char, string_or_iterable) + + # Stolen from textwrap in Python 3.4.3. def indent(text, prefix, predicate=None): """Add 'prefix' to the beginning of selected lines in 'text'. @@ -1924,11 +1922,13 @@ def load_data(path): """Given path to a file, load data from it.""" ext = os.path.splitext(path)[-1] loader = None + function = 'load' if ext in {'.yml', '.yaml'}: - loader = yaml - if yaml is None: - req_missing(['yaml'], 'use YAML data files') + if YAML is None: + req_missing(['ruamel.yaml'], 'use YAML data files') return {} + loader = YAML(typ='safe') + function = 'load' elif ext in {'.json', '.js'}: loader = json elif ext in {'.toml', '.tml'}: @@ -1938,5 +1938,141 @@ def load_data(path): loader = toml if loader is None: return - with io.open(path, 'r', encoding='utf8') as inf: - return loader.load(inf) + with io.open(path, 'r', encoding='utf-8-sig') as inf: + return getattr(loader, function)(inf) + + +def rss_writer(rss_obj, output_path): + """Write an RSS object to an xml file.""" + dst_dir = os.path.dirname(output_path) + makedirs(dst_dir) + with io.open(output_path, "w+", encoding="utf-8") as rss_file: + data = rss_obj.to_xml(encoding='utf-8') + if isinstance(data, bytes): + data = data.decode('utf-8') + rss_file.write(data) + + +def map_metadata(meta, key, config): + """Map metadata from other platforms to Nikola names. + + This uses the METADATA_MAPPING and METADATA_VALUE_MAPPING settings (via ``config``) and modifies the dict in place. + """ + for foreign, ours in config.get('METADATA_MAPPING', {}).get(key, {}).items(): + if foreign in meta: + meta[ours] = meta[foreign] + + for meta_key, hook in config.get('METADATA_VALUE_MAPPING', {}).get(key, {}).items(): + if meta_key in meta: + meta[meta_key] = hook(meta[meta_key]) + + +class ClassificationTranslationManager(object): + """Keeps track of which classifications could be translated as which others. + + The internal structure is as follows: + - per language, you have a map of classifications to maps + - the inner map is a map from other languages to sets of classifications + which are considered as translations + """ + + def __init__(self): + self._data = defaultdict(dict) + + def add_translation(self, translation_map): + """Add translation of one classification. + + ``translation_map`` must be a dictionary mapping languages to their + translations of the added classification. + """ + for lang, classification in translation_map.items(): + clmap = self._data[lang] + cldata = clmap.get(classification) + if cldata is None: + cldata = defaultdict(set) + clmap[classification] = cldata + for other_lang, other_classification in translation_map.items(): + if other_lang != lang: + cldata[other_lang].add(other_classification) + + def get_translations(self, classification, lang): + """Get a dict mapping other languages to (unsorted) lists of translated classifications.""" + clmap = self._data[lang] + cldata = clmap.get(classification) + if cldata is None: + return {} + else: + return {other_lang: list(classifications) for other_lang, classifications in cldata.items()} + + def get_translations_as_list(self, classification, lang, classifications_per_language): + """Get a list of pairs ``(other_lang, other_classification)`` which are translations of ``classification``. + + Avoid classifications not in ``classifications_per_language``. + """ + clmap = self._data[lang] + cldata = clmap.get(classification) + if cldata is None: + return [] + else: + result = [] + for other_lang, classifications in cldata.items(): + for other_classification in classifications: + if other_classification in classifications_per_language[other_lang]: + result.append((other_lang, other_classification)) + return result + + def has_translations(self, classification, lang): + """Return whether we know about the classification in that language. + + Note that this function returning ``True`` does not mean that + ``get_translations`` returns a non-empty dict or that + ``get_translations_as_list`` returns a non-empty list, but only + that this classification was explicitly added with + ``add_translation`` at some point. + """ + return self._data[lang].get(classification) is not None + + def add_defaults(self, posts_per_classification_per_language): + """Treat every classification as its own literal translation into every other language. + + ``posts_per_classification_per_language`` should be the first argument + to ``Taxonomy.postprocess_posts_per_classification``. + """ + # First collect all classifications from all languages + all_classifications = set() + for _, classifications in posts_per_classification_per_language.items(): + all_classifications.update(classifications.keys()) + # Next, add translation records for all of them + for classification in all_classifications: + record = {tlang: classification for tlang in posts_per_classification_per_language} + self.add_translation(record) + + def read_from_config(self, site, basename, posts_per_classification_per_language, add_defaults_default): + """Read translations from config. + + ``site`` should be the Nikola site object. Will consider + the variables ``<basename>_TRANSLATIONS`` and + ``<basename>_TRANSLATIONS_ADD_DEFAULTS``. + + ``posts_per_classification_per_language`` should be the first argument + to ``Taxonomy.postprocess_posts_per_classification``, i.e. this function + should be called from that function. ``add_defaults_default`` specifies + what the default value for ``<basename>_TRANSLATIONS_ADD_DEFAULTS`` is. + + Also sends signal via blinker to allow interested plugins to add + translations by themselves. The signal name used is + ``<lower(basename)>_translations_config``, and the argument is a dict + with entries ``translation_manager``, ``site`` and + ``posts_per_classification_per_language``. + """ + # Add translations + for record in site.config.get('{}_TRANSLATIONS'.format(basename), []): + self.add_translation(record) + # Add default translations + if site.config.get('{}_TRANSLATIONS_ADD_DEFAULTS'.format(basename), add_defaults_default): + self.add_defaults(posts_per_classification_per_language) + # Use blinker to inform interested parties (plugins) that they can add + # translations themselves + args = {'translation_manager': self, 'site': site, + 'posts_per_classification_per_language': posts_per_classification_per_language} + signal('{}_translations_config'.format(basename.lower())).send(args) |
