aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/util.py')
-rw-r--r--gallery_dl/util.py200
1 files changed, 124 insertions, 76 deletions
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index fbede3e..3462138 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -145,6 +145,14 @@ def to_string(value):
return str(value)
+def to_timestamp(dt):
+ """Convert naive datetime to UTC timestamp string"""
+ try:
+ return str((dt - EPOCH) // SECOND)
+ except Exception:
+ return ""
+
+
def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
"""Serialize 'obj' as JSON and write it to 'fp'"""
json.dump(
@@ -370,6 +378,8 @@ class UniversalNone():
NONE = UniversalNone()
+EPOCH = datetime.datetime(1970, 1, 1)
+SECOND = datetime.timedelta(0, 1)
WINDOWS = (os.name == "nt")
SENTINEL = object()
SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
@@ -390,11 +400,17 @@ def compile_expression(expr, name="<expr>", globals=GLOBALS):
def build_predicate(predicates):
if not predicates:
- return lambda url, kwds: True
+ return lambda url, kwdict: True
elif len(predicates) == 1:
return predicates[0]
- else:
- return ChainPredicate(predicates)
+ return functools.partial(chain_predicates, predicates)
+
+
+def chain_predicates(predicates, url, kwdict):
+ for pred in predicates:
+ if not pred(url, kwdict):
+ return False
+ return True
class RangePredicate():
@@ -408,7 +424,7 @@ class RangePredicate():
else:
self.lower, self.upper = 0, 0
- def __call__(self, url, kwds):
+ def __call__(self, url, _):
self.index += 1
if self.index > self.upper:
@@ -473,7 +489,7 @@ class UniquePredicate():
def __init__(self):
self.urls = set()
- def __call__(self, url, kwds):
+ def __call__(self, url, _):
if url.startswith("text:"):
return True
if url not in self.urls:
@@ -498,18 +514,6 @@ class FilterPredicate():
raise exception.FilterError(exc)
-class ChainPredicate():
- """Predicate; True if all of its predicates return True"""
- def __init__(self, predicates):
- self.predicates = predicates
-
- def __call__(self, url, kwds):
- for pred in self.predicates:
- if not pred(url, kwds):
- return False
- return True
-
-
class ExtendedUrl():
"""URL with attached config key-value pairs"""
def __init__(self, url, gconf, lconf):
@@ -536,6 +540,7 @@ class Formatter():
- "d": calls text.parse_timestamp
- "U": calls urllib.parse.unquote
- "S": calls util.to_string()
+ - "T": calls util.to_timestamü()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
Extra Format Specifiers:
@@ -559,12 +564,14 @@ class Formatter():
Replaces all occurrences of <old> with <new>
Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
"""
+ CACHE = {}
CONVERSIONS = {
"l": str.lower,
"u": str.upper,
"c": str.capitalize,
"C": string.capwords,
"t": str.strip,
+ "T": to_timestamp,
"d": text.parse_timestamp,
"U": urllib.parse.unquote,
"S": to_string,
@@ -575,19 +582,26 @@ class Formatter():
def __init__(self, format_string, default=None):
self.default = default
- self.result = []
- self.fields = []
-
- for literal_text, field_name, format_spec, conversion in \
- _string.formatter_parser(format_string):
- if literal_text:
- self.result.append(literal_text)
- if field_name:
- self.fields.append((
- len(self.result),
- self._field_access(field_name, format_spec, conversion),
- ))
- self.result.append("")
+ key = (format_string, default)
+
+ try:
+ self.result, self.fields = self.CACHE[key]
+ except KeyError:
+ self.result = []
+ self.fields = []
+
+ for literal_text, field_name, format_spec, conv in \
+ _string.formatter_parser(format_string):
+ if literal_text:
+ self.result.append(literal_text)
+ if field_name:
+ self.fields.append((
+ len(self.result),
+ self._field_access(field_name, format_spec, conv),
+ ))
+ self.result.append("")
+
+ self.CACHE[key] = (self.result, self.fields)
if len(self.result) == 1:
if self.fields:
@@ -777,9 +791,20 @@ class PathFormat():
raise exception.FilenameFormatError(exc)
directory_fmt = config("directory")
- if directory_fmt is None:
- directory_fmt = extractor.directory_fmt
try:
+ if directory_fmt is None:
+ directory_fmt = extractor.directory_fmt
+ elif isinstance(directory_fmt, dict):
+ self.directory_conditions = [
+ (compile_expression(expr), [
+ Formatter(fmt, kwdefault).format_map
+ for fmt in fmts
+ ])
+ for expr, fmts in directory_fmt.items() if expr
+ ]
+ self.build_directory = self.build_directory_conditional
+ directory_fmt = directory_fmt.get("", extractor.directory_fmt)
+
self.directory_formatters = [
Formatter(dirfmt, kwdefault).format_map
for dirfmt in directory_fmt
@@ -793,19 +818,6 @@ class PathFormat():
self.path = self.realpath = self.temppath = ""
self.delete = self._create_directory = False
- basedir = extractor._parentdir
- if not basedir:
- basedir = config("base-directory")
- if basedir is None:
- basedir = "." + os.sep + "gallery-dl" + os.sep
- elif basedir:
- basedir = expand_path(basedir)
- if os.altsep and os.altsep in basedir:
- basedir = basedir.replace(os.altsep, os.sep)
- if basedir[-1] != os.sep:
- basedir += os.sep
- self.basedirectory = basedir
-
extension_map = config("extension-map")
if extension_map is None:
extension_map = self.EXTENSION_MAP
@@ -826,6 +838,22 @@ class PathFormat():
remove = config("path-remove", "\x00-\x1f\x7f")
self.clean_path = self._build_cleanfunc(remove, "")
+ basedir = extractor._parentdir
+ if not basedir:
+ basedir = config("base-directory")
+ sep = os.sep
+ if basedir is None:
+ basedir = "." + sep + "gallery-dl" + sep
+ elif basedir:
+ basedir = expand_path(basedir)
+ altsep = os.altsep
+ if altsep and altsep in basedir:
+ basedir = basedir.replace(altsep, sep)
+ if basedir[-1] != sep:
+ basedir += sep
+ basedir = self.clean_path(basedir)
+ self.basedirectory = basedir
+
@staticmethod
def _build_cleanfunc(chars, repl):
if not chars:
@@ -837,8 +865,8 @@ class PathFormat():
def func(x, c=chars, r=repl):
return x.replace(c, r)
else:
- def func(x, sub=re.compile("[" + chars + "]").sub, r=repl):
- return sub(r, x)
+ return functools.partial(
+ re.compile("[" + chars + "]").sub, repl)
return func
def open(self, mode="wb"):
@@ -870,29 +898,14 @@ class PathFormat():
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
self.kwdict = kwdict
-
- # Build path segments by applying 'kwdict' to directory format strings
- segments = []
- append = segments.append
- try:
- for formatter in self.directory_formatters:
- segment = formatter(kwdict).strip()
- if WINDOWS:
- # remove trailing dots and spaces (#647)
- segment = segment.rstrip(". ")
- if segment:
- append(self.clean_segment(segment))
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- # Join path segments
sep = os.sep
- directory = self.clean_path(self.basedirectory + sep.join(segments))
- # Ensure 'directory' ends with a path separator
+ segments = self.build_directory(kwdict)
if segments:
- directory += sep
- self.directory = directory
+ self.directory = directory = self.basedirectory + self.clean_path(
+ sep.join(segments) + sep)
+ else:
+ self.directory = directory = self.basedirectory
if WINDOWS:
# Enable longer-than-260-character paths on Windows
@@ -935,17 +948,15 @@ class PathFormat():
self.temppath = self.realpath = self.realpath[:-1]
return True
- def build_filename(self):
+ def build_filename(self, kwdict):
"""Apply 'kwdict' to filename format string"""
try:
return self.clean_path(self.clean_segment(
- self.filename_formatter(self.kwdict)))
+ self.filename_formatter(kwdict)))
except Exception as exc:
raise exception.FilenameFormatError(exc)
- def build_filename_conditional(self):
- kwdict = self.kwdict
-
+ def build_filename_conditional(self, kwdict):
try:
for condition, formatter in self.filename_conditions:
if condition(kwdict):
@@ -956,12 +967,49 @@ class PathFormat():
except Exception as exc:
raise exception.FilenameFormatError(exc)
+ def build_directory(self, kwdict):
+ """Apply 'kwdict' to directory format strings"""
+ segments = []
+ append = segments.append
+
+ try:
+ for formatter in self.directory_formatters:
+ segment = formatter(kwdict).strip()
+ if WINDOWS:
+ # remove trailing dots and spaces (#647)
+ segment = segment.rstrip(". ")
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ def build_directory_conditional(self, kwdict):
+ segments = []
+ append = segments.append
+
+ try:
+ for condition, formatters in self.directory_conditions:
+ if condition(kwdict):
+ break
+ else:
+ formatters = self.directory_formatters
+ for formatter in formatters:
+ segment = formatter(kwdict).strip()
+ if WINDOWS:
+ segment = segment.rstrip(". ")
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
def build_path(self):
"""Combine directory and filename to full paths"""
if self._create_directory:
os.makedirs(self.realdirectory, exist_ok=True)
self._create_directory = False
- self.filename = filename = self.build_filename()
+ self.filename = filename = self.build_filename(self.kwdict)
self.path = self.directory + filename
self.realpath = self.realdirectory + filename
if not self.temppath:
@@ -1028,9 +1076,9 @@ class DownloadArchive():
# fallback for missing WITHOUT ROWID support (#553)
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry PRIMARY KEY)")
-
- self.keygen = (extractor.category + extractor.config(
- "archive-format", extractor.archive_fmt)
+ self.keygen = (
+ extractor.config("archive-prefix", extractor.category) +
+ extractor.config("archive-format", extractor.archive_fmt)
).format_map
def check(self, kwdict):