diff options
Diffstat (limited to 'nikola/utils.py')
| -rw-r--r-- | nikola/utils.py | 1296 |
1 files changed, 841 insertions, 455 deletions
diff --git a/nikola/utils.py b/nikola/utils.py index 3a268ff..d029b7f 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright © 2012-2015 Roberto Alsina and others. +# Copyright © 2012-2020 Roberto Alsina and others. # Permission is hereby granted, free of charge, to any # person obtaining a copy of this software and associated @@ -26,124 +26,99 @@ """Utility functions.""" -from __future__ import print_function, unicode_literals, absolute_import -import calendar +import configparser import datetime -import dateutil.tz import hashlib import io -import locale -import logging -import natsort +import operator import os import re import json import shutil +import socket import subprocess import sys +import threading +import typing +from collections import defaultdict, OrderedDict +from collections.abc import Callable, Iterable +from html import unescape as html_unescape +from importlib import reload as _reload +from unicodedata import normalize as unicodenormalize +from urllib.parse import quote as urlquote +from urllib.parse import unquote as urlunquote +from urllib.parse import urlparse, urlunparse +from zipfile import ZipFile as zipf + +import babel.dates import dateutil.parser import dateutil.tz -import logbook -import warnings +import pygments.formatters +import pygments.formatters._mapping import PyRSS2Gen as rss -from collections import defaultdict, Callable -from logbook.compat import redirect_logging -from logbook.more import ExceptionHandler, ColorizedStderrHandler -from pygments.formatters import HtmlFormatter -from zipfile import ZipFile as zipf +from blinker import signal from doit import tools -from unidecode import unidecode -from pkg_resources import resource_filename from doit.cmdparse import CmdParse +from pkg_resources import resource_filename +from nikola.packages.pygments_better_html import BetterHtmlFormatter +from unidecode import unidecode -from nikola import DEBUG - -__all__ = ('CustomEncoder', 'get_theme_path', 'get_theme_chain', 'load_messages', 'copy_tree', - 'copy_file', 'slugify', 'unslugify', 'to_datetime', 'apply_filters', +# Renames +from nikola import DEBUG # NOQA +from .log import LOGGER, get_logger # NOQA +from .hierarchy_utils import TreeNode, clone_treenode, flatten_tree_structure, sort_classifications +from .hierarchy_utils import join_hierarchical_category_path, parse_escaped_hierarchical_category_name + +try: + import toml +except ImportError: + toml = None + +try: + from ruamel.yaml import YAML +except ImportError: + YAML = None + +try: + import husl +except ImportError: + husl = None + +__all__ = ('CustomEncoder', 'get_theme_path', 'get_theme_path_real', + 'get_theme_chain', 'load_messages', 'copy_tree', 'copy_file', + 'slugify', 'unslugify', 'to_datetime', 'apply_filters', 'config_changed', 'get_crumbs', 'get_tzname', 'get_asset_path', - '_reload', 'unicode_str', 'bytes_str', 'unichr', 'Functionary', - 'TranslatableSetting', 'TemplateHookRegistry', 'LocaleBorg', + '_reload', 'Functionary', 'TranslatableSetting', + 'TemplateHookRegistry', 'LocaleBorg', 'sys_encode', 'sys_decode', 'makedirs', 'get_parent_theme_name', 'demote_headers', 'get_translation_candidate', 'write_metadata', 'ask', 'ask_yesno', 'options2docstring', 'os_path_split', 'get_displayed_page_number', 'adjust_name_for_index_path_list', 'adjust_name_for_index_path', 'adjust_name_for_index_link', - 'NikolaPygmentsHTML', 'create_redirect', 'TreeNode', - 'flatten_tree_structure', 'parse_escaped_hierarchical_category_name', - 'join_hierarchical_category_path', 'indent') + 'NikolaPygmentsHTML', 'create_redirect', 'clean_before_deployment', + 'sort_posts', 'smartjoin', 'indent', 'load_data', 'html_unescape', + 'rss_writer', 'map_metadata', 'req_missing', + # Deprecated, moved to hierarchy_utils: + 'TreeNode', 'clone_treenode', 'flatten_tree_structure', + 'sort_classifications', 'join_hierarchical_category_path', + 'parse_escaped_hierarchical_category_name',) # Are you looking for 'generic_rss_renderer'? # It's defined in nikola.nikola.Nikola (the site object). -if sys.version_info[0] == 3: - # Python 3 - bytes_str = bytes - unicode_str = str - unichr = chr - raw_input = input - from imp import reload as _reload -else: - bytes_str = str - unicode_str = unicode # NOQA - _reload = reload # NOQA - unichr = unichr - - -class ApplicationWarning(Exception): - pass - - -class ColorfulStderrHandler(ColorizedStderrHandler): +# Aliases, previously for Python 2/3 compatibility. +# TODO remove in v9 +bytes_str = bytes +unicode_str = str +unichr = chr - """Stream handler with colors.""" +# For compatibility with old logging setups. +# TODO remove in v9? +STDERR_HANDLER = None - _colorful = False - - def should_colorize(self, record): - """Inform about colorization using the value obtained from Nikola.""" - return self._colorful - - -def get_logger(name, handlers): - """Get a logger with handlers attached.""" - l = logbook.Logger(name) - for h in handlers: - if isinstance(h, list): - l.handlers += h - else: - l.handlers.append(h) - return l - - -STDERR_HANDLER = [ColorfulStderrHandler( - level=logbook.INFO if not DEBUG else logbook.DEBUG, - format_string=u'[{record.time:%Y-%m-%dT%H:%M:%SZ}] {record.level_name}: {record.channel}: {record.message}' -)] - - -LOGGER = get_logger('Nikola', STDERR_HANDLER) -STRICT_HANDLER = ExceptionHandler(ApplicationWarning, level='WARNING') USE_SLUGIFY = True -redirect_logging() - -if DEBUG: - logging.basicConfig(level=logging.DEBUG) -else: - logging.basicConfig(level=logging.INFO) - - -def showwarning(message, category, filename, lineno, file=None, line=None): - """Show a warning (from the warnings module) to the user.""" - try: - n = category.__name__ - except AttributeError: - n = str(category) - get_logger(n, STDERR_HANDLER).warn('{0}:{1}: {2}'.format(filename, lineno, message)) - -warnings.showwarning = showwarning - def req_missing(names, purpose, python=True, optional=False): """Log that we are missing some requirements. @@ -182,7 +157,7 @@ def req_missing(names, purpose, python=True, optional=False): purpose, pnames, whatarethey_p) if optional: - LOGGER.warn(msg) + LOGGER.warning(msg) else: LOGGER.error(msg) LOGGER.error('Exiting due to missing dependencies.') @@ -191,20 +166,19 @@ def req_missing(names, purpose, python=True, optional=False): return msg -from nikola import filters as task_filters # NOQA ENCODING = sys.getfilesystemencoding() or sys.stdin.encoding def sys_encode(thing): """Return bytes encoded in the system's encoding.""" - if isinstance(thing, unicode_str): + if isinstance(thing, str): return thing.encode(ENCODING) return thing def sys_decode(thing): """Return Unicode.""" - if isinstance(thing, bytes_str): + if isinstance(thing, bytes): return thing.decode(ENCODING) return thing @@ -228,12 +202,11 @@ def makedirs(path): class Functionary(defaultdict): - """Class that looks like a function, but is a defaultdict.""" def __init__(self, default, default_lang): """Initialize a functionary.""" - super(Functionary, self).__init__(default) + super().__init__(default) self.default_lang = default_lang def __call__(self, key, lang=None): @@ -244,7 +217,6 @@ class Functionary(defaultdict): class TranslatableSetting(object): - """A setting that can be translated. You can access it via: SETTING(lang). You can omit lang, in which @@ -271,7 +243,7 @@ class TranslatableSetting(object): def __getattribute__(self, attr): """Return attributes, falling back to string attributes.""" try: - return super(TranslatableSetting, self).__getattribute__(attr) + return super().__getattribute__(attr) except AttributeError: return self().__getattribute__(attr) @@ -294,7 +266,7 @@ class TranslatableSetting(object): self.overriden_default = False self.values = defaultdict() - if isinstance(inp, dict): + if isinstance(inp, dict) and inp: self.translated = True self.values.update(inp) if self.default_lang not in self.values.keys(): @@ -335,15 +307,11 @@ class TranslatableSetting(object): def __str__(self): """Return the value in the currently set language (deprecated).""" - return self.values[self.get_lang()] - - def __unicode__(self): - """Return the value in the currently set language (deprecated).""" - return self.values[self.get_lang()] + return str(self.values[self.get_lang()]) def __repr__(self): """Provide a representation for programmers.""" - return '<TranslatableSetting: {0!r}>'.format(self.name) + return '<TranslatableSetting: {0!r} = {1!r}>'.format(self.name, self._inp) def format(self, *args, **kwargs): """Format ALL the values in the setting the same way.""" @@ -423,15 +391,20 @@ class TranslatableSetting(object): def __eq__(self, other): """Test whether two TranslatableSettings are equal.""" - return self.values == other.values + try: + return self.values == other.values + except AttributeError: + return self(self.default_lang) == other def __ne__(self, other): """Test whether two TranslatableSettings are inequal.""" - return self.values != other.values + try: + return self.values != other.values + except AttributeError: + return self(self.default_lang) != other class TemplateHookRegistry(object): - r"""A registry for template hooks. Usage: @@ -439,9 +412,8 @@ class TemplateHookRegistry(object): >>> r = TemplateHookRegistry('foo', None) >>> r.append('Hello!') >>> r.append(lambda x: 'Hello ' + x + '!', False, 'world') - >>> str(r()) # str() call is not recommended in real use + >>> repr(r()) 'Hello!\nHello world!' - >>> """ def __init__(self, name, site): @@ -483,9 +455,23 @@ class TemplateHookRegistry(object): c = callable(inp) self._items.append((c, inp, wants_site_and_context, args, kwargs)) + def calculate_deps(self): + """Calculate dependencies for a registry.""" + deps = [] + for is_callable, inp, wants_site_and_context, args, kwargs in self._items: + if not is_callable: + name = inp + elif hasattr(inp, 'template_registry_identifier'): + name = inp.template_registry_identifier + elif hasattr(inp, '__doc__'): + name = inp.__doc__ + else: + name = '_undefined_callable_' + deps.append((is_callable, name, wants_site_and_context, args, kwargs)) + def __hash__(self): """Return hash of a registry.""" - return hash(config_changed({self.name: self._items})._calc_digest()) + return hash(config_changed({self.name: self.calculate_deps()})._calc_digest()) def __str__(self): """Stringify a registry.""" @@ -497,32 +483,55 @@ class TemplateHookRegistry(object): class CustomEncoder(json.JSONEncoder): - """Custom JSON encoder.""" def default(self, obj): - """Default encoding handler.""" + """Create default encoding handler.""" try: - return super(CustomEncoder, self).default(obj) + return super().default(obj) except TypeError: if isinstance(obj, (set, frozenset)): return self.encode(sorted(list(obj))) + elif isinstance(obj, TranslatableSetting): + s = json.dumps(obj._inp, cls=CustomEncoder, sort_keys=True) else: s = repr(obj).split('0x', 1)[0] return s class config_changed(tools.config_changed): - """A copy of doit's config_changed, using pickle instead of serializing manually.""" def __init__(self, config, identifier=None): """Initialize config_changed.""" - super(config_changed, self).__init__(config) + super().__init__(config) self.identifier = '_config_changed' if identifier is not None: self.identifier += ':' + identifier + # DEBUG (for unexpected rebuilds) + @classmethod + def _write_into_debug_db(cls, digest: str, data: str) -> None: # pragma: no cover + """Write full values of config_changed into a sqlite3 database.""" + import sqlite3 + try: + cls.debug_db_cursor + except AttributeError: + cls.debug_db_conn = sqlite3.connect("cc_debug.sqlite3") + cls.debug_db_id = datetime.datetime.now().isoformat() + cls.debug_db_cursor = cls.debug_db_conn.cursor() + cls.debug_db_cursor.execute(""" + CREATE TABLE IF NOT EXISTS hashes (hash CHARACTER(32) PRIMARY KEY, json_data TEXT); + """) + cls.debug_db_conn.commit() + + try: + cls.debug_db_cursor.execute("INSERT INTO hashes (hash, json_data) VALUES (?, ?);", (digest, data)) + cls.debug_db_conn.commit() + except sqlite3.IntegrityError: + # ON CONFLICT DO NOTHING, except Ubuntu 16.04’s sqlite3 is too ancient for this + cls.debug_db_conn.rollback() + def _calc_digest(self): """Calculate a config_changed digest.""" if isinstance(self.config, str): @@ -534,7 +543,14 @@ class config_changed(tools.config_changed): else: byte_data = data digest = hashlib.md5(byte_data).hexdigest() + + # DEBUG (for unexpected rebuilds) + # self._write_into_debug_db(digest, data) + # Alternative (without database): # LOGGER.debug('{{"{0}": {1}}}'.format(digest, byte_data)) + # Humanized format: + # LOGGER.debug('[Digest {0} for {2}]\n{1}\n[Digest {0} for {2}]'.format(digest, byte_data, self.identifier)) + return digest else: raise Exception('Invalid type of config_changed parameter -- got ' @@ -559,46 +575,80 @@ class config_changed(tools.config_changed): sort_keys=True)) -def get_theme_path(theme, _themes_dir='themes'): +def get_theme_path_real(theme, themes_dirs): """Return the path where the given theme's files are located. Looks in ./themes and in the place where themes go when installed. """ - dir_name = os.path.join(_themes_dir, theme) - if os.path.isdir(dir_name): - return dir_name + for themes_dir in themes_dirs: + dir_name = os.path.join(themes_dir, theme) + if os.path.isdir(dir_name): + return dir_name dir_name = resource_filename('nikola', os.path.join('data', 'themes', theme)) if os.path.isdir(dir_name): return dir_name raise Exception("Can't find theme '{0}'".format(theme)) -def get_template_engine(themes, _themes_dir='themes'): +def get_theme_path(theme): + """Return the theme's path, which equals the theme's name.""" + return theme + + +def parse_theme_meta(theme_dir): + """Parse a .theme meta file.""" + cp = configparser.ConfigParser() + # The `or` case is in case theme_dir ends with a trailing slash + theme_name = os.path.basename(theme_dir) or os.path.basename(os.path.dirname(theme_dir)) + theme_meta_path = os.path.join(theme_dir, theme_name + '.theme') + cp.read(theme_meta_path) + return cp if cp.has_section('Theme') else None + + +def get_template_engine(themes): """Get template engine used by a given theme.""" for theme_name in themes: - engine_path = os.path.join(get_theme_path(theme_name, _themes_dir), 'engine') - if os.path.isfile(engine_path): - with open(engine_path) as fd: - return fd.readlines()[0].strip() + meta = parse_theme_meta(theme_name) + if meta: + e = meta.get('Theme', 'engine', fallback=None) + if e: + return e + else: + # Theme still uses old-style parent/engine files + engine_path = os.path.join(theme_name, 'engine') + if os.path.isfile(engine_path): + with open(engine_path) as fd: + return fd.readlines()[0].strip() # default return 'mako' -def get_parent_theme_name(theme_name, _themes_dir='themes'): +def get_parent_theme_name(theme_name, themes_dirs=None): """Get name of parent theme.""" - parent_path = os.path.join(get_theme_path(theme_name, _themes_dir), 'parent') - if os.path.isfile(parent_path): - with open(parent_path) as fd: - return fd.readlines()[0].strip() - return None + meta = parse_theme_meta(theme_name) + if meta: + parent = meta.get('Theme', 'parent', fallback=None) + if themes_dirs and parent: + return get_theme_path_real(parent, themes_dirs) + return parent + else: + # Theme still uses old-style parent/engine files + parent_path = os.path.join(theme_name, 'parent') + if os.path.isfile(parent_path): + with open(parent_path) as fd: + parent = fd.readlines()[0].strip() + if themes_dirs: + return get_theme_path_real(parent, themes_dirs) + return parent + return None -def get_theme_chain(theme, _themes_dir='themes'): - """Create the full theme inheritance chain.""" - themes = [theme] +def get_theme_chain(theme, themes_dirs): + """Create the full theme inheritance chain including paths.""" + themes = [get_theme_path_real(theme, themes_dirs)] while True: - parent = get_parent_theme_name(themes[-1], _themes_dir) + parent = get_parent_theme_name(themes[-1], themes_dirs=themes_dirs) # Avoid silly loops if parent is None or parent in themes: break @@ -606,11 +656,10 @@ def get_theme_chain(theme, _themes_dir='themes'): return themes -language_incomplete_warned = [] +INCOMPLETE_LANGUAGES_WARNED = set() class LanguageNotFoundError(Exception): - """An exception thrown if language is not found.""" def __init__(self, lang, orig): @@ -623,7 +672,7 @@ class LanguageNotFoundError(Exception): return 'cannot find language {0}'.format(self.lang) -def load_messages(themes, translations, default_lang): +def load_messages(themes, translations, default_lang, themes_dirs): """Load theme's messages into context. All the messages from parent themes are loaded, @@ -631,35 +680,50 @@ def load_messages(themes, translations, default_lang): """ messages = Functionary(dict, default_lang) oldpath = list(sys.path) + found = {lang: False for lang in translations.keys()} + last_exception = None + completion_status = {lang: False for lang in translations.keys()} for theme_name in themes[::-1]: msg_folder = os.path.join(get_theme_path(theme_name), 'messages') - default_folder = os.path.join(get_theme_path('base'), 'messages') + default_folder = os.path.join(get_theme_path_real('base', themes_dirs), 'messages') sys.path.insert(0, default_folder) sys.path.insert(0, msg_folder) + english = __import__('messages_en') - for lang in list(translations.keys()): + # If we don't do the reload, the module is cached + _reload(english) + for lang in translations.keys(): try: translation = __import__('messages_' + lang) # If we don't do the reload, the module is cached _reload(translation) - if sorted(translation.MESSAGES.keys()) !=\ - sorted(english.MESSAGES.keys()) and \ - lang not in language_incomplete_warned: - language_incomplete_warned.append(lang) - LOGGER.warn("Incomplete translation for language " - "'{0}'.".format(lang)) + found[lang] = True + if sorted(translation.MESSAGES.keys()) != sorted(english.MESSAGES.keys()): + completion_status[lang] = completion_status[lang] or False + else: + completion_status[lang] = True + messages[lang].update(english.MESSAGES) for k, v in translation.MESSAGES.items(): if v: messages[lang][k] = v del(translation) except ImportError as orig: - raise LanguageNotFoundError(lang, orig) - sys.path = oldpath + last_exception = orig + del(english) + sys.path = oldpath + + if not all(found.values()): + raise LanguageNotFoundError(lang, last_exception) + for lang, status in completion_status.items(): + if not status and lang not in INCOMPLETE_LANGUAGES_WARNED: + LOGGER.warning("Incomplete translation for language '{0}'.".format(lang)) + INCOMPLETE_LANGUAGES_WARNED.add(lang) + return messages -def copy_tree(src, dst, link_cutoff=None): +def copy_tree(src, dst, link_cutoff=None, ignored_filenames=None): """Copy a src tree to the dst folder. Example: @@ -670,11 +734,13 @@ def copy_tree(src, dst, link_cutoff=None): should copy "themes/defauts/assets/foo/bar" to "output/assets/foo/bar" - if link_cutoff is set, then the links pointing at things + If link_cutoff is set, then the links pointing at things *inside* that folder will stay as links, and links pointing *outside* that folder will be copied. + + ignored_filenames is a set of file names that will be ignored. """ - ignore = set(['.svn']) + ignore = set(['.svn', '.git']) | (ignored_filenames or set()) base_len = len(src.split(os.sep)) for root, dirs, files in os.walk(src, followlinks=True): root_parts = root.split(os.sep) @@ -724,35 +790,36 @@ def remove_file(source): elif os.path.isfile(source) or os.path.islink(source): os.remove(source) -# slugify is copied from + +# slugify is adopted from # http://code.activestate.com/recipes/ # 577257-slugify-make-a-string-usable-in-a-url-or-filename/ -_slugify_strip_re = re.compile(r'[^+\w\s-]') -_slugify_hyphenate_re = re.compile(r'[-\s]+') +_slugify_strip_re = re.compile(r'[^+\w\s-]', re.UNICODE) +_slugify_hyphenate_re = re.compile(r'[-\s]+', re.UNICODE) -def slugify(value, force=False): +def slugify(value, lang=None, force=False): u"""Normalize string, convert to lowercase, remove non-alpha characters, convert spaces to hyphens. From Django's "django/template/defaultfilters.py". - >>> print(slugify('áéí.óú')) + >>> print(slugify('áéí.óú', lang='en')) aeiou - >>> print(slugify('foo/bar')) + >>> print(slugify('foo/bar', lang='en')) foobar - >>> print(slugify('foo bar')) + >>> print(slugify('foo bar', lang='en')) foo-bar """ - if not isinstance(value, unicode_str): + if not isinstance(value, str): raise ValueError("Not a unicode object: {0}".format(value)) if USE_SLUGIFY or force: # This is the standard state of slugify, which actually does some work. # It is the preferred style, especially for Western languages. - value = unicode_str(unidecode(value)) - value = _slugify_strip_re.sub('', value, re.UNICODE).strip().lower() - return _slugify_hyphenate_re.sub('-', value, re.UNICODE) + value = str(unidecode(value)) + value = _slugify_strip_re.sub('', value).strip().lower() + return _slugify_hyphenate_re.sub('-', value) else: # This is the “disarmed” state of slugify, which lets the user # have any character they please (be it regular ASCII with spaces, @@ -769,7 +836,7 @@ def slugify(value, force=False): return value -def unslugify(value, discard_numbers=True): +def unslugify(value, lang=None, discard_numbers=True): """Given a slug string (as a filename), return a human readable string. If discard_numbers is True, numbers right at the beginning of input @@ -777,16 +844,38 @@ def unslugify(value, discard_numbers=True): """ if discard_numbers: value = re.sub('^[0-9]+', '', value) - value = re.sub('([_\-\.])', ' ', value) + value = re.sub(r'([_\-\.])', ' ', value) value = value.strip().capitalize() return value +def encodelink(iri): + """Given an encoded or unencoded link string, return an encoded string suitable for use as a link in HTML and XML.""" + iri = unicodenormalize('NFC', iri) + link = OrderedDict(urlparse(iri)._asdict()) + link['path'] = urlquote(urlunquote(link['path']).encode('utf-8'), safe="/~") + try: + link['netloc'] = link['netloc'].encode('utf-8').decode('idna').encode('idna').decode('utf-8') + except UnicodeDecodeError: + link['netloc'] = link['netloc'].encode('idna').decode('utf-8') + encoded_link = urlunparse(link.values()) + return encoded_link + + +def full_path_from_urlparse(parsed) -> str: + """Given urlparse output, return the full path (with query and fragment).""" + dst = parsed.path + if parsed.query: + dst = "{0}?{1}".format(dst, parsed.query) + if parsed.fragment: + dst = "{0}#{1}".format(dst, parsed.fragment) + return dst + # A very slightly safer version of zip.extractall that works on # python < 2.6 -class UnsafeZipException(Exception): +class UnsafeZipException(Exception): """Exception for unsafe zip files.""" pass @@ -815,6 +904,8 @@ def extract_all(zipfile, path='themes'): def to_datetime(value, tzinfo=None): """Convert string to datetime.""" try: + if type(value) == datetime.date: + value = datetime.datetime.combine(value, datetime.time(0, 0)) if not isinstance(value, datetime.datetime): # dateutil does bad things with TZs like UTC-03:00. dateregexp = re.compile(r' UTC([+-][0-9][0-9]:[0-9][0-9])') @@ -845,6 +936,9 @@ def current_time(tzinfo=None): return dt +from nikola import filters as task_filters # NOQA + + def apply_filters(task, filters, skip_ext=None): """Apply filters to a task. @@ -863,11 +957,11 @@ def apply_filters(task, filters, skip_ext=None): if isinstance(key, (tuple, list)): if ext in key: return value - elif isinstance(key, (bytes_str, unicode_str)): + elif isinstance(key, (bytes, str)): if ext == key: return value else: - assert False, key + raise ValueError("Cannot find filter match for {0}".format(key)) for target in task.get('targets', []): ext = os.path.splitext(target)[-1].lower() @@ -887,7 +981,7 @@ def apply_filters(task, filters, skip_ext=None): return task -def get_crumbs(path, is_file=False, index_folder=None): +def get_crumbs(path, is_file=False, index_folder=None, lang=None): """Create proper links for a crumb bar. index_folder is used if you want to use title from index file @@ -896,26 +990,26 @@ def get_crumbs(path, is_file=False, index_folder=None): >>> crumbs = get_crumbs('galleries') >>> len(crumbs) 1 - >>> print('|'.join(crumbs[0])) - #|galleries + >>> crumbs[0] + ['#', 'galleries'] >>> crumbs = get_crumbs(os.path.join('galleries','demo')) >>> len(crumbs) 2 - >>> print('|'.join(crumbs[0])) - ..|galleries - >>> print('|'.join(crumbs[1])) - #|demo + >>> crumbs[0] + ['..', 'galleries'] + >>> crumbs[1] + ['#', 'demo'] >>> crumbs = get_crumbs(os.path.join('listings','foo','bar'), is_file=True) >>> len(crumbs) 3 - >>> print('|'.join(crumbs[0])) - ..|listings - >>> print('|'.join(crumbs[1])) - .|foo - >>> print('|'.join(crumbs[2])) - #|bar + >>> crumbs[0] + ['..', 'listings'] + >>> crumbs[1] + ['.', 'foo'] + >>> crumbs[2] + ['#', 'bar'] """ crumbs = path.split(os.sep) _crumbs = [] @@ -923,8 +1017,10 @@ def get_crumbs(path, is_file=False, index_folder=None): for i, crumb in enumerate(crumbs[-3::-1]): # Up to parent folder only _path = '/'.join(['..'] * (i + 1)) _crumbs.append([_path, crumb]) - _crumbs.insert(0, ['.', crumbs[-2]]) # file's folder - _crumbs.insert(0, ['#', crumbs[-1]]) # file itself + if len(crumbs) >= 2: + _crumbs.insert(0, ['.', crumbs[-2]]) # file's folder + if len(crumbs) >= 1: + _crumbs.insert(0, ['#', crumbs[-1]]) # file itself else: for i, crumb in enumerate(crumbs[::-1]): _path = '/'.join(['..'] * i) or '#' @@ -940,40 +1036,49 @@ def get_crumbs(path, is_file=False, index_folder=None): index_post = index_folder.parse_index(folder, '', '') folder = folder.replace(crumb, '') if index_post: - crumb = index_post.title() or crumb + crumb = index_post.title(lang) or crumb _crumbs[i][1] = crumb return list(reversed(_crumbs)) -def get_asset_path(path, themes, files_folders={'files': ''}, _themes_dir='themes'): +def get_asset_path(path, themes, files_folders={'files': ''}, output_dir='output'): """Return the "real", absolute path to the asset. By default, it checks which theme provides the asset. If the asset is not provided by a theme, then it will be checked for in the FILES_FOLDERS. + If it's not provided by either, it will be chacked in output, where + it may have been created by another plugin. - >>> print(get_asset_path('assets/css/rst.css', ['bootstrap3', 'base'])) - /.../nikola/data/themes/base/assets/css/rst.css + >>> print(get_asset_path('assets/css/nikola_rst.css', get_theme_chain('bootstrap3', ['themes']))) + /.../nikola/data/themes/base/assets/css/nikola_rst.css - >>> print(get_asset_path('assets/css/theme.css', ['bootstrap3', 'base'])) + >>> print(get_asset_path('assets/css/theme.css', get_theme_chain('bootstrap3', ['themes']))) /.../nikola/data/themes/bootstrap3/assets/css/theme.css - >>> print(get_asset_path('nikola.py', ['bootstrap3', 'base'], {'nikola': ''})) + >>> print(get_asset_path('nikola.py', get_theme_chain('bootstrap3', ['themes']), {'nikola': ''})) /.../nikola/nikola.py - >>> print(get_asset_path('nikola/nikola.py', ['bootstrap3', 'base'], {'nikola':'nikola'})) + >>> print(get_asset_path('nikola.py', get_theme_chain('bootstrap3', ['themes']), {'nikola': 'nikola'})) None + >>> print(get_asset_path('nikola/nikola.py', get_theme_chain('bootstrap3', ['themes']), {'nikola': 'nikola'})) + /.../nikola/nikola.py + """ for theme_name in themes: - candidate = os.path.join( - get_theme_path(theme_name, _themes_dir), - path - ) + candidate = os.path.join(get_theme_path(theme_name), path) if os.path.isfile(candidate): return candidate for src, rel_dst in files_folders.items(): - candidate = os.path.abspath(os.path.join(src, path)) + relpath = os.path.normpath(os.path.relpath(path, rel_dst)) + if not relpath.startswith('..' + os.path.sep): + candidate = os.path.abspath(os.path.join(src, relpath)) + if os.path.isfile(candidate): + return candidate + + if output_dir: + candidate = os.path.join(output_dir, path) if os.path.isfile(candidate): return candidate @@ -982,30 +1087,53 @@ def get_asset_path(path, themes, files_folders={'files': ''}, _themes_dir='theme class LocaleBorgUninitializedException(Exception): - """Exception for unitialized LocaleBorg.""" def __init__(self): """Initialize exception.""" - super(LocaleBorgUninitializedException, self).__init__("Attempt to use LocaleBorg before initialization") + super().__init__("Attempt to use LocaleBorg before initialization") + + +# Customized versions of babel.dates functions that don't do weird stuff with +# timezones. Without these fixes, DST would follow local settings (because +# dateutil’s timezones return stuff depending on their input, and datetime.time +# objects have no year/month/day to base the information on. +def format_datetime(datetime=None, format='medium', + locale=babel.dates.LC_TIME): + """Format a datetime object.""" + locale = babel.dates.Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + return babel.dates.get_datetime_format(format, locale=locale) \ + .replace("'", "") \ + .replace('{0}', format_time(datetime, format, locale=locale)) \ + .replace('{1}', babel.dates.format_date(datetime, format, locale=locale)) + else: + return babel.dates.parse_pattern(format).apply(datetime, locale) -class LocaleBorg(object): +def format_time(time=None, format='medium', locale=babel.dates.LC_TIME): + """Format time. Input can be datetime.time or datetime.datetime.""" + locale = babel.dates.Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + format = babel.dates.get_time_format(format, locale=locale) + return babel.dates.parse_pattern(format).apply(time, locale) - """Provide locale related services and autoritative current_lang. - current_lang is the last lang for which the locale was set - and is meant to be set only by LocaleBorg.set_locale. +def format_skeleton(skeleton, datetime=None, fo=None, fuzzy=True, + locale=babel.dates.LC_TIME): + """Format a datetime based on a skeleton.""" + locale = babel.dates.Locale.parse(locale) + if fuzzy and skeleton not in locale.datetime_skeletons: + skeleton = babel.dates.match_skeleton(skeleton, locale.datetime_skeletons) + format = locale.datetime_skeletons[skeleton] + return format_datetime(datetime, format, locale) - python's locale code should not be directly called from code outside of - LocaleBorg, they are compatibilty issues with py version and OS support - better handled at one central point, LocaleBorg. - In particular, don't call locale.setlocale outside of LocaleBorg. +class LocaleBorg(object): + """Provide locale related services and autoritative current_lang. - Assumptions: - We need locales only for the languages there is a nikola translation. - We don't need to support current_lang through nested contexts + This class stores information about the locales used and interfaces + with the Babel library to provide internationalization services. Usage: # early in cmd or test execution @@ -1015,97 +1143,132 @@ class LocaleBorg(object): lang = LocaleBorg().<service> Available services: - .current_lang : autoritative current_lang , the last seen in set_locale - .set_locale(lang) : sets current_lang and sets the locale for lang - .get_month_name(month_no, lang) : returns the localized month name - - NOTE: never use locale.getlocale() , it can return values that - locale.setlocale will not accept in Windows XP, 7 and pythons 2.6, 2.7, 3.3 - Examples: "Spanish", "French" can't do the full circle set / get / set - That used to break calendar, but now seems is not the case, with month at least + .current_lang: autoritative current_lang, the last seen in set_locale + .formatted_date: format a date(time) according to locale rules + .format_date_in_string: take a message and format the date in it + + The default implementation uses the Babel package and completely ignores + the Python `locale` module. If you wish to override this, write functions + and assign them to the appropriate names. The functions are: + + * LocaleBorg.datetime_formatter(date, date_format, lang, locale) + * LocaleBorg.in_string_formatter(date, mode, custom_format, lang, locale) """ initialized = False + # Can be used to override Babel + datetime_formatter = None + in_string_formatter = None + @classmethod - def initialize(cls, locales, initial_lang): + def initialize(cls, locales: 'typing.Dict[str, str]', initial_lang: str): """Initialize LocaleBorg. - locales : dict with lang: locale_n - the same keys as in nikola's TRANSLATIONS - locale_n a sanitized locale, meaning - locale.setlocale(locale.LC_ALL, locale_n) will succeed - locale_n expressed in the string form, like "en.utf8" + locales: dict with custom locale name overrides. """ - assert initial_lang is not None and initial_lang in locales + if not initial_lang: + raise ValueError("Unknown initial language {0}".format(initial_lang)) cls.reset() cls.locales = locales - - # needed to decode some localized output in py2x - encodings = {} - for lang in locales: - locale.setlocale(locale.LC_ALL, locales[lang]) - loc, encoding = locale.getlocale() - encodings[lang] = encoding - - cls.encodings = encodings - cls.__shared_state['current_lang'] = initial_lang + cls.__initial_lang = initial_lang cls.initialized = True + def __get_shared_state(self): + if not self.initialized: # pragma: no cover + raise LocaleBorgUninitializedException() + shared_state = getattr(self.__thread_local, 'shared_state', None) + if shared_state is None: + shared_state = {'current_lang': self.__initial_lang} + self.__thread_local.shared_state = shared_state + return shared_state + @classmethod def reset(cls): """Reset LocaleBorg. Used in testing to prevent leaking state between tests. """ + cls.__thread_local = threading.local() + cls.__thread_lock = threading.Lock() + cls.locales = {} - cls.encodings = {} - cls.__shared_state = {'current_lang': None} cls.initialized = False + cls.thread_local = None + cls.datetime_formatter = None + cls.in_string_formatter = None def __init__(self): """Initialize.""" if not self.initialized: raise LocaleBorgUninitializedException() - self.__dict__ = self.__shared_state - def set_locale(self, lang): - """Set the locale for language lang, returns an empty string. + @property + def current_lang(self) -> str: + """Return the current language.""" + return self.__get_shared_state()['current_lang'] + + def set_locale(self, lang: str) -> str: + """Set the current language and return an empty string (to make use in templates easier).""" + with self.__thread_lock: + self.__get_shared_state()['current_lang'] = lang + return '' + + def formatted_date(self, date_format: 'str', + date: 'typing.Union[datetime.date, datetime.datetime]', + lang: 'typing.Optional[str]' = None) -> str: + """Return the formatted date/datetime as a string.""" + if lang is None: + lang = self.current_lang + locale = self.locales.get(lang, lang) + # Get a string out of a TranslatableSetting + if isinstance(date_format, TranslatableSetting): + date_format = date_format(lang) + + # Always ask Python if the date_format is webiso + if date_format == 'webiso': + # Formatted after RFC 3339 (web ISO 8501 profile) with Zulu + # zone designator for times in UTC and no microsecond precision. + return date.replace(microsecond=0).isoformat().replace('+00:00', 'Z') + elif LocaleBorg.datetime_formatter is not None: + return LocaleBorg.datetime_formatter(date, date_format, lang, locale) + else: + return format_datetime(date, date_format, locale=locale) - in linux the locale encoding is set to utf8, - in windows that cannot be guaranted. - In either case, the locale encoding is available in cls.encodings[lang] + def format_date_in_string(self, message: str, date: datetime.date, lang: 'typing.Optional[str]' = None) -> str: + """Format date inside a string (message). + + Accepted modes: month, month_year, month_day_year. + Format: {month} for standard, {month:MMMM} for customization. """ - # intentional non try-except: templates must ask locales with a lang, - # let the code explode here and not hide the point of failure - # Also, not guarded with an if lang==current_lang because calendar may - # put that out of sync - locale_n = self.locales[lang] - self.__shared_state['current_lang'] = lang - locale.setlocale(locale.LC_ALL, locale_n) - return '' - - def get_month_name(self, month_no, lang): - """Return localized month name in an unicode string.""" - if sys.version_info[0] == 3: # Python 3 - with calendar.different_locale(self.locales[lang]): - s = calendar.month_name[month_no] - # for py3 s is unicode - else: # Python 2 - with calendar.TimeEncoding(self.locales[lang]): - s = calendar.month_name[month_no] - enc = self.encodings[lang] - if not enc: - enc = 'UTF-8' - - s = s.decode(enc) - # paranoid about calendar ending in the wrong locale (windows) - self.set_locale(self.current_lang) - return s + modes = { + 'month': ('date', 'LLLL'), + 'month_year': ('skeleton', 'yMMMM'), + 'month_day_year': ('date', 'long') + } + if lang is None: + lang = self.current_lang + locale = self.locales.get(lang, lang) + + def date_formatter(match: typing.Match) -> str: + """Format a date as requested.""" + mode, custom_format = match.groups() + if LocaleBorg.in_string_formatter is not None: + return LocaleBorg.in_string_formatter(date, mode, custom_format, lang, locale) + elif custom_format: + return babel.dates.format_date(date, custom_format, locale) + else: + function, fmt = modes[mode] + if function == 'skeleton': + return format_skeleton(fmt, date, locale=locale) + else: + return babel.dates.format_date(date, fmt, locale) -class ExtendedRSS2(rss.RSS2): + return re.sub(r'{(.*?)(?::(.*?))?}', date_formatter, message) + +class ExtendedRSS2(rss.RSS2): """Extended RSS class.""" xsl_stylesheet_href = None @@ -1114,8 +1277,7 @@ class ExtendedRSS2(rss.RSS2): """Publish a feed.""" if self.xsl_stylesheet_href: handler.processingInstruction("xml-stylesheet", 'type="text/xsl" href="{0}" media="all"'.format(self.xsl_stylesheet_href)) - # old-style class in py2 - rss.RSS2.publish(self, handler) + super().publish(handler) def publish_extensions(self, handler): """Publish extensions.""" @@ -1129,14 +1291,14 @@ class ExtendedRSS2(rss.RSS2): class ExtendedItem(rss.RSSItem): - """Extended RSS item.""" def __init__(self, **kw): """Initialize RSS item.""" - self.creator = kw.pop('creator') + self.creator = kw.pop('creator', None) + # It's an old style class - return rss.RSSItem.__init__(self, **kw) + rss.RSSItem.__init__(self, **kw) def publish_extensions(self, handler): """Publish extensions.""" @@ -1176,24 +1338,34 @@ def demote_headers(doc, level=1): if level == 0: return doc elif level > 0: - r = range(1, 7 - level) + levels = range(1, 7 - (level - 1)) + levels = reversed(levels) elif level < 0: - r = range(1 + level, 7) - for i in reversed(r): - # html headers go to 6, so we can’t “lower” beneath five - elements = doc.xpath('//h' + str(i)) - for e in elements: - e.tag = 'h' + str(i + level) + levels = range(2 + level, 7) + + for before in levels: + after = before + level + if after < 1: + # html headers can't go lower than 1 + after = 1 + elif after > 6: + # html headers go until 6 + after = 6 + + if before == after: + continue + + elements = doc.xpath('//h{}'.format(before)) + new_tag = 'h{}'.format(after) + for element in elements: + element.tag = new_tag def get_root_dir(): """Find root directory of nikola site by looking for conf.py.""" root = os.getcwd() - if sys.version_info[0] == 2: - confname = b'conf.py' - else: - confname = 'conf.py' + confname = 'conf.py' while True: if os.path.exists(os.path.join(root, confname)): @@ -1224,10 +1396,10 @@ def get_translation_candidate(config, path, lang): cache/posts/fancy.post.html >>> print(get_translation_candidate(config, 'cache/posts/fancy.post.html', 'es')) cache/posts/fancy.post.es.html - >>> print(get_translation_candidate(config, 'cache/stories/charts.html', 'es')) - cache/stories/charts.es.html - >>> print(get_translation_candidate(config, 'cache/stories/charts.html', 'en')) - cache/stories/charts.html + >>> print(get_translation_candidate(config, 'cache/pages/charts.html', 'es')) + cache/pages/charts.es.html + >>> print(get_translation_candidate(config, 'cache/pages/charts.html', 'en')) + cache/pages/charts.html >>> config = {'TRANSLATIONS_PATTERN': '{path}.{ext}.{lang}', 'DEFAULT_LANG': 'en', 'TRANSLATIONS': {'es':'1', 'en': 1}} >>> print(get_translation_candidate(config, '*.rst', 'es')) @@ -1247,7 +1419,7 @@ def get_translation_candidate(config, path, lang): # This will still break if the user has ?*[]\ in the pattern. But WHY WOULD HE? pattern = pattern.replace('.', r'\.') pattern = pattern.replace('{path}', '(?P<path>.+?)') - pattern = pattern.replace('{ext}', '(?P<ext>[^\./]+)') + pattern = pattern.replace('{ext}', r'(?P<ext>[^\./]+)') pattern = pattern.replace('{lang}', '(?P<lang>{0})'.format('|'.join(config['TRANSLATIONS'].keys()))) m = re.match(pattern, path) if m and all(m.groups()): # It's a translated path @@ -1268,24 +1440,59 @@ def get_translation_candidate(config, path, lang): return config['TRANSLATIONS_PATTERN'].format(path=p, ext=e, lang=lang) -def write_metadata(data): - """Write metadata.""" - order = ('title', 'slug', 'date', 'tags', 'category', 'link', 'description', 'type') - f = '.. {0}: {1}' - meta = [] - for k in order: - try: - meta.append(f.format(k, data.pop(k))) - except KeyError: - pass - - # Leftover metadata (user-specified/non-default). - for k in natsort.natsorted(list(data.keys()), alg=natsort.ns.F | natsort.ns.IC): - meta.append(f.format(k, data[k])) +def write_metadata(data, metadata_format=None, comment_wrap=False, site=None, compiler=None): + """Write metadata. - meta.append('') - - return '\n'.join(meta) + Recommended usage: pass `site`, `comment_wrap` (True, False, or a 2-tuple of start/end markers), and optionally `compiler`. Other options are for backwards compatibility. + """ + # API compatibility + if metadata_format is None and site is not None: + metadata_format = site.config.get('METADATA_FORMAT', 'nikola').lower() + if metadata_format is None: + metadata_format = 'nikola' + + if site is None: + import nikola.metadata_extractors + metadata_extractors_by = nikola.metadata_extractors.default_metadata_extractors_by() + nikola.metadata_extractors.load_defaults(site, metadata_extractors_by) + else: + metadata_extractors_by = site.metadata_extractors_by + + # Pelican is mapped to rest_docinfo, markdown_meta, or nikola. + if metadata_format == 'pelican': + if compiler and compiler.name == 'rest': + metadata_format = 'rest_docinfo' + elif compiler and compiler.name == 'markdown': + metadata_format = 'markdown_meta' + else: + # Quiet fallback. + metadata_format = 'nikola' + + default_meta = ('nikola', 'rest_docinfo', 'markdown_meta') + extractor = metadata_extractors_by['name'].get(metadata_format) + if extractor and extractor.supports_write: + extractor.check_requirements() + return extractor.write_metadata(data, comment_wrap) + elif extractor and metadata_format not in default_meta: + LOGGER.warning('Writing METADATA_FORMAT {} is not supported, using "nikola" format'.format(metadata_format)) + elif metadata_format not in default_meta: + LOGGER.warning('Unknown METADATA_FORMAT {}, using "nikola" format'.format(metadata_format)) + + if metadata_format == 'rest_docinfo': + title = data['title'] + results = [ + '=' * len(title), + title, + '=' * len(title), + '' + ] + [':{0}: {1}'.format(k, v) for k, v in data.items() if v and k != 'title'] + [''] + return '\n'.join(results) + elif metadata_format == 'markdown_meta': + results = ['{0}: {1}'.format(k, v) for k, v in data.items() if v] + ['', ''] + return '\n'.join(results) + else: # Nikola, default + from nikola.metadata_extractors import DEFAULT_EXTRACTOR + return DEFAULT_EXTRACTOR.write_metadata(data, comment_wrap) def ask(query, default=None): @@ -1294,10 +1501,7 @@ def ask(query, default=None): default_q = ' [{0}]'.format(default) else: default_q = '' - if sys.version_info[0] == 3: - inp = raw_input("{query}{default_q}: ".format(query=query, default_q=default_q)).strip() - else: - inp = raw_input("{query}{default_q}: ".format(query=query, default_q=default_q).encode('utf-8')).strip() + inp = input("{query}{default_q}: ".format(query=query, default_q=default_q)).strip() if inp or default is None: return inp else: @@ -1312,10 +1516,7 @@ def ask_yesno(query, default=None): default_q = ' [Y/n]' elif default is False: default_q = ' [y/N]' - if sys.version_info[0] == 3: - inp = raw_input("{query}{default_q} ".format(query=query, default_q=default_q)).strip() - else: - inp = raw_input("{query}{default_q} ".format(query=query, default_q=default_q).encode('utf-8')).strip() + inp = input("{query}{default_q} ".format(query=query, default_q=default_q)).strip() if inp: return inp.lower().startswith('y') elif default is not None: @@ -1326,7 +1527,6 @@ def ask_yesno(query, default=None): class CommandWrapper(object): - """Converts commands into functions.""" def __init__(self, cmd, commands_object): @@ -1342,7 +1542,6 @@ class CommandWrapper(object): class Commands(object): - """Nikola Commands. Sample usage: @@ -1366,10 +1565,6 @@ class Commands(object): # cleanup: run is doit-only, init is useless in an existing site if k in ['run', 'init']: continue - if sys.version_info[0] == 2: - k2 = bytes(k) - else: - k2 = k self._cmdnames.append(k) @@ -1380,7 +1575,7 @@ class Commands(object): # doit command: needs some help opt = v(config=self._config, **self._doitargs).get_options() nc = type( - k2, + k, (CommandWrapper,), { '__doc__': options2docstring(k, opt) @@ -1432,18 +1627,27 @@ def options2docstring(name, options): return '\n'.join(result) -class NikolaPygmentsHTML(HtmlFormatter): - +class NikolaPygmentsHTML(BetterHtmlFormatter): """A Nikola-specific modification of Pygments' HtmlFormatter.""" - def __init__(self, anchor_ref, classes=None, linenos='table', linenostart=1): + def __init__(self, anchor_ref=None, classes=None, **kwargs): """Initialize formatter.""" if classes is None: classes = ['code', 'literal-block'] + if anchor_ref: + kwargs['lineanchors'] = slugify( + anchor_ref, lang=LocaleBorg().current_lang, force=True) self.nclasses = classes - super(NikolaPygmentsHTML, self).__init__( - cssclass='code', linenos=linenos, linenostart=linenostart, nowrap=False, - lineanchors=slugify(anchor_ref, force=True), anchorlinenos=True) + kwargs['cssclass'] = 'code' + if not kwargs.get('linenos'): + # Default to no line numbers (Issue #3426) + kwargs['linenos'] = False + if kwargs.get('linenos') not in {'table', 'inline', 'ol', False}: + # Map invalid values to table + kwargs['linenos'] = 'table' + kwargs['anchorlinenos'] = kwargs['linenos'] == 'table' + kwargs['nowrap'] = False + super().__init__(**kwargs) def wrap(self, source, outfile): """Wrap the ``source``, which is a generator yielding individual lines, in custom generators.""" @@ -1461,6 +1665,10 @@ class NikolaPygmentsHTML(HtmlFormatter): yield 0, '</pre>' +# For consistency, override the default formatter. +pygments.formatters._formatter_cache['HTML'] = NikolaPygmentsHTML + + def get_displayed_page_number(i, num_pages, site): """Get page number to be displayed for entry `i`.""" if not i: @@ -1486,7 +1694,7 @@ def adjust_name_for_index_path_list(path_list, i, displayed_i, lang, site, force path_list.append(index_file) if site.config["PRETTY_URLS"] and site.config["INDEXES_PRETTY_PAGE_URL"](lang) and path_list[-1] == index_file: path_schema = site.config["INDEXES_PRETTY_PAGE_URL"](lang) - if isinstance(path_schema, (bytes_str, unicode_str)): + if isinstance(path_schema, (bytes, str)): path_schema = [path_schema] else: path_schema = None @@ -1529,7 +1737,7 @@ def adjust_name_for_index_link(name, i, displayed_i, lang, site, force_addition= def create_redirect(src, dst): - """"Create a redirection.""" + """Create a redirection.""" makedirs(os.path.dirname(src)) with io.open(src, "w+", encoding="utf8") as fd: fd.write('<!DOCTYPE html>\n<head>\n<meta charset="utf-8">\n' @@ -1539,138 +1747,156 @@ def create_redirect(src, dst): '<a href="{0}">here</a>.</p>\n</body>'.format(dst)) -class TreeNode(object): - - """A tree node.""" - - indent_levels = None # use for formatting comments as tree - indent_change_before = 0 # use for formatting comments as tree - indent_change_after = 0 # use for formatting comments as tree - - # The indent levels and changes allow to render a tree structure - # without keeping track of all that information during rendering. - # - # The indent_change_before is the different between the current - # comment's level and the previous comment's level; if the number - # is positive, the current level is indented further in, and if it - # is negative, it is indented further out. Positive values can be - # used to open HTML tags for each opened level. - # - # The indent_change_after is the difference between the next - # comment's level and the current comment's level. Negative values - # can be used to close HTML tags for each closed level. - # - # The indent_levels list contains one entry (index, count) per - # level, informing about the index of the current comment on that - # level and the count of comments on that level (before a comment - # of a higher level comes). This information can be used to render - # tree indicators, for example to generate a tree such as: - # - # +--- [(0,3)] - # +-+- [(1,3)] - # | +--- [(1,3), (0,2)] - # | +-+- [(1,3), (1,2)] - # | +--- [(1,3), (1,2), (0, 1)] - # +-+- [(2,3)] - # +- [(2,3), (0,1)] - # - # (The lists used as labels represent the content of the - # indent_levels property for that node.) - - def __init__(self, name, parent=None): - """Initialize node.""" - self.name = name - self.parent = parent - self.children = [] - - def get_path(self): - """Get path.""" - path = [] - curr = self - while curr is not None: - path.append(curr) - curr = curr.parent - return reversed(path) - - def get_children(self): - """Get children of a node.""" - return self.children - - -def flatten_tree_structure(root_list): - """Flatten a tree.""" - elements = [] - - def generate(input_list, indent_levels_so_far): - for index, element in enumerate(input_list): - # add to destination - elements.append(element) - # compute and set indent levels - indent_levels = indent_levels_so_far + [(index, len(input_list))] - element.indent_levels = indent_levels - # add children - children = element.get_children() - element.children_count = len(children) - generate(children, indent_levels) - - generate(root_list, []) - # Add indent change counters - level = 0 - last_element = None - for element in elements: - new_level = len(element.indent_levels) - # Compute level change before this element - change = new_level - level - if last_element is not None: - last_element.indent_change_after = change - element.indent_change_before = change - # Update variables - level = new_level - last_element = element - # Set level change after last element - if last_element is not None: - last_element.indent_change_after = -level - return elements - - -def parse_escaped_hierarchical_category_name(category_name): - """Parse a category name.""" - result = [] - current = None - index = 0 - next_backslash = category_name.find('\\', index) - next_slash = category_name.find('/', index) - while index < len(category_name): - if next_backslash == -1 and next_slash == -1: - current = (current if current else "") + category_name[index:] - index = len(category_name) - elif next_slash >= 0 and (next_backslash == -1 or next_backslash > next_slash): - result.append((current if current else "") + category_name[index:next_slash]) - current = '' - index = next_slash + 1 - next_slash = category_name.find('/', index) +def colorize_str_from_base_color(string, base_color): + """Find a perceptual similar color from a base color based on the hash of a string. + + Make up to 16 attempts (number of bytes returned by hashing) at picking a + hue for our color at least 27 deg removed from the base color, leaving + lightness and saturation untouched using HUSL colorspace. + """ + def hash_str(string, pos): + return hashlib.md5(string.encode('utf-8')).digest()[pos] + + def degreediff(dega, degb): + return min(abs(dega - degb), abs((degb - dega) + 360)) + + if husl is None: + req_missing(['husl'], 'Use color mixing (section colors)', + optional=True) + return base_color + h, s, l = husl.hex_to_husl(base_color) + old_h = h + idx = 0 + while degreediff(old_h, h) < 27 and idx < 16: + h = 360.0 * (float(hash_str(string, idx)) / 255) + idx += 1 + return husl.husl_to_hex(h, s, l) + + +def colorize_str(string: str, base_color: str, presets: dict): + """Colorize a string by using a presets dict or generate one based on base_color.""" + if string in presets: + return presets[string] + return colorize_str_from_base_color(string, base_color) + + +def color_hsl_adjust_hex(hexstr, adjust_h=None, adjust_s=None, adjust_l=None): + """Adjust a hex color using HSL arguments, adjustments in percentages 1.0 to -1.0. Returns a hex color.""" + h, s, l = husl.hex_to_husl(hexstr) + + if adjust_h: + h = h + (adjust_h * 360.0) + + if adjust_s: + s = s + (adjust_s * 100.0) + + if adjust_l: + l = l + (adjust_l * 100.0) + + return husl.husl_to_hex(h, s, l) + + +def dns_sd(port, inet6): + """Optimistically publish a HTTP service to the local network over DNS-SD. + + Works only on Linux/FreeBSD. Requires the `avahi` and `dbus` modules (symlinks in virtualenvs) + """ + try: + import avahi + import dbus + inet = avahi.PROTO_INET6 if inet6 else avahi.PROTO_INET + name = "{0}'s Nikola Server on {1}".format(os.getlogin(), socket.gethostname()) + bus = dbus.SystemBus() + bus_server = dbus.Interface(bus.get_object(avahi.DBUS_NAME, + avahi.DBUS_PATH_SERVER), + avahi.DBUS_INTERFACE_SERVER) + bus_group = dbus.Interface(bus.get_object(avahi.DBUS_NAME, + bus_server.EntryGroupNew()), + avahi.DBUS_INTERFACE_ENTRY_GROUP) + bus_group.AddService(avahi.IF_UNSPEC, inet, dbus.UInt32(0), + name, '_http._tcp', '', '', + dbus.UInt16(port), '') + bus_group.Commit() + return bus_group # remember to bus_group.Reset() to unpublish + except Exception: + return None + + +def clean_before_deployment(site): + """Clean drafts and future posts before deployment.""" + undeployed_posts = [] + deploy_drafts = site.config.get('DEPLOY_DRAFTS', True) + deploy_future = site.config.get('DEPLOY_FUTURE', False) + if not (deploy_drafts and deploy_future): # == !drafts || !future + # Remove drafts and future posts + out_dir = site.config['OUTPUT_FOLDER'] + site.scan_posts() + for post in site.timeline: + if (not deploy_drafts and post.is_draft) or (not deploy_future and post.publish_later): + for lang in post.translated_to: + remove_file(os.path.join(out_dir, post.destination_path(lang))) + source_path = post.destination_path(lang, post.source_ext(True)) + remove_file(os.path.join(out_dir, source_path)) + undeployed_posts.append(post) + return undeployed_posts + + +def sort_posts(posts, *keys): + """Sort posts by a given predicate. Helper function for templates. + + If a key starts with '-', it is sorted in descending order. + + Usage examples:: + + sort_posts(timeline, 'title', 'date') + sort_posts(timeline, 'author', '-section_name') + """ + # We reverse the keys to get the usual ordering method: the first key + # provided is the most important sorting predicate (first by 'title', then + # by 'date' in the first example) + for key in reversed(keys): + if key.startswith('-'): + key = key[1:] + reverse = True else: - if len(category_name) == next_backslash + 1: - raise Exception("Unexpected '\\' in '{0}' at last position!".format(category_name)) - esc_ch = category_name[next_backslash + 1] - if esc_ch not in {'/', '\\'}: - raise Exception("Unknown escape sequence '\\{0}' in '{1}'!".format(esc_ch, category_name)) - current = (current if current else "") + category_name[index:next_backslash] + esc_ch - index = next_backslash + 2 - next_backslash = category_name.find('\\', index) - if esc_ch == '/': - next_slash = category_name.find('/', index) - if current is not None: - result.append(current) - return result + reverse = False + try: + # An attribute (or method) of the Post object + a = getattr(posts[0], key) + if callable(a): + keyfunc = operator.methodcaller(key) + else: + keyfunc = operator.attrgetter(key) + except AttributeError: + # Post metadata + keyfunc = operator.methodcaller('meta', key) + posts = sorted(posts, reverse=reverse, key=keyfunc) + return posts -def join_hierarchical_category_path(category_path): - """Join a category path.""" - def escape(s): - return s.replace('\\', '\\\\').replace('/', '\\/') - return '/'.join([escape(p) for p in category_path]) +def smartjoin(join_char: str, string_or_iterable) -> str: + """Join string_or_iterable with join_char if it is iterable; otherwise converts it to string. + + >>> smartjoin('; ', 'foo, bar') + 'foo, bar' + >>> smartjoin('; ', ['foo', 'bar']) + 'foo; bar' + >>> smartjoin(' to ', ['count', 42]) + 'count to 42' + """ + if isinstance(string_or_iterable, (str, bytes)): + return string_or_iterable + elif isinstance(string_or_iterable, Iterable): + return join_char.join([str(e) for e in string_or_iterable]) + else: + return str(string_or_iterable) + + +def _smartjoin_filter(string_or_iterable, join_char: str) -> str: + """Join stuff smartly, with reversed arguments for Jinja2 filters.""" + # http://jinja.pocoo.org/docs/2.10/api/#custom-filters + return smartjoin(join_char, string_or_iterable) # Stolen from textwrap in Python 3.4.3. @@ -1690,3 +1916,163 @@ def indent(text, prefix, predicate=None): for line in text.splitlines(True): yield (prefix + line if predicate(line) else line) return ''.join(prefixed_lines()) + + +def load_data(path): + """Given path to a file, load data from it.""" + ext = os.path.splitext(path)[-1] + loader = None + function = 'load' + if ext in {'.yml', '.yaml'}: + if YAML is None: + req_missing(['ruamel.yaml'], 'use YAML data files') + return {} + loader = YAML(typ='safe') + function = 'load' + elif ext in {'.json', '.js'}: + loader = json + elif ext in {'.toml', '.tml'}: + if toml is None: + req_missing(['toml'], 'use TOML data files') + return {} + loader = toml + if loader is None: + return + with io.open(path, 'r', encoding='utf-8-sig') as inf: + return getattr(loader, function)(inf) + + +def rss_writer(rss_obj, output_path): + """Write an RSS object to an xml file.""" + dst_dir = os.path.dirname(output_path) + makedirs(dst_dir) + with io.open(output_path, "w+", encoding="utf-8") as rss_file: + data = rss_obj.to_xml(encoding='utf-8') + if isinstance(data, bytes): + data = data.decode('utf-8') + rss_file.write(data) + + +def map_metadata(meta, key, config): + """Map metadata from other platforms to Nikola names. + + This uses the METADATA_MAPPING and METADATA_VALUE_MAPPING settings (via ``config``) and modifies the dict in place. + """ + for foreign, ours in config.get('METADATA_MAPPING', {}).get(key, {}).items(): + if foreign in meta: + meta[ours] = meta[foreign] + + for meta_key, hook in config.get('METADATA_VALUE_MAPPING', {}).get(key, {}).items(): + if meta_key in meta: + meta[meta_key] = hook(meta[meta_key]) + + +class ClassificationTranslationManager(object): + """Keeps track of which classifications could be translated as which others. + + The internal structure is as follows: + - per language, you have a map of classifications to maps + - the inner map is a map from other languages to sets of classifications + which are considered as translations + """ + + def __init__(self): + self._data = defaultdict(dict) + + def add_translation(self, translation_map): + """Add translation of one classification. + + ``translation_map`` must be a dictionary mapping languages to their + translations of the added classification. + """ + for lang, classification in translation_map.items(): + clmap = self._data[lang] + cldata = clmap.get(classification) + if cldata is None: + cldata = defaultdict(set) + clmap[classification] = cldata + for other_lang, other_classification in translation_map.items(): + if other_lang != lang: + cldata[other_lang].add(other_classification) + + def get_translations(self, classification, lang): + """Get a dict mapping other languages to (unsorted) lists of translated classifications.""" + clmap = self._data[lang] + cldata = clmap.get(classification) + if cldata is None: + return {} + else: + return {other_lang: list(classifications) for other_lang, classifications in cldata.items()} + + def get_translations_as_list(self, classification, lang, classifications_per_language): + """Get a list of pairs ``(other_lang, other_classification)`` which are translations of ``classification``. + + Avoid classifications not in ``classifications_per_language``. + """ + clmap = self._data[lang] + cldata = clmap.get(classification) + if cldata is None: + return [] + else: + result = [] + for other_lang, classifications in cldata.items(): + for other_classification in classifications: + if other_classification in classifications_per_language[other_lang]: + result.append((other_lang, other_classification)) + return result + + def has_translations(self, classification, lang): + """Return whether we know about the classification in that language. + + Note that this function returning ``True`` does not mean that + ``get_translations`` returns a non-empty dict or that + ``get_translations_as_list`` returns a non-empty list, but only + that this classification was explicitly added with + ``add_translation`` at some point. + """ + return self._data[lang].get(classification) is not None + + def add_defaults(self, posts_per_classification_per_language): + """Treat every classification as its own literal translation into every other language. + + ``posts_per_classification_per_language`` should be the first argument + to ``Taxonomy.postprocess_posts_per_classification``. + """ + # First collect all classifications from all languages + all_classifications = set() + for _, classifications in posts_per_classification_per_language.items(): + all_classifications.update(classifications.keys()) + # Next, add translation records for all of them + for classification in all_classifications: + record = {tlang: classification for tlang in posts_per_classification_per_language} + self.add_translation(record) + + def read_from_config(self, site, basename, posts_per_classification_per_language, add_defaults_default): + """Read translations from config. + + ``site`` should be the Nikola site object. Will consider + the variables ``<basename>_TRANSLATIONS`` and + ``<basename>_TRANSLATIONS_ADD_DEFAULTS``. + + ``posts_per_classification_per_language`` should be the first argument + to ``Taxonomy.postprocess_posts_per_classification``, i.e. this function + should be called from that function. ``add_defaults_default`` specifies + what the default value for ``<basename>_TRANSLATIONS_ADD_DEFAULTS`` is. + + Also sends signal via blinker to allow interested plugins to add + translations by themselves. The signal name used is + ``<lower(basename)>_translations_config``, and the argument is a dict + with entries ``translation_manager``, ``site`` and + ``posts_per_classification_per_language``. + """ + # Add translations + for record in site.config.get('{}_TRANSLATIONS'.format(basename), []): + self.add_translation(record) + # Add default translations + if site.config.get('{}_TRANSLATIONS_ADD_DEFAULTS'.format(basename), add_defaults_default): + self.add_defaults(posts_per_classification_per_language) + # Use blinker to inform interested parties (plugins) that they can add + # translations themselves + args = {'translation_manager': self, 'site': site, + 'posts_per_classification_per_language': posts_per_classification_per_language} + signal('{}_translations_config'.format(basename.lower())).send(args) |
