aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/job.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
commita6e995c093de8aae2e91a0787281bb34c0b871eb (patch)
tree2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/job.py
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/job.py')
-rw-r--r--gallery_dl/job.py214
1 files changed, 119 insertions, 95 deletions
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index a88f536..3176eb4 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2023 Mike Fährmann
+# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -28,6 +28,7 @@ from . import (
)
from .extractor.message import Message
stdout_write = output.stdout_write
+FLAGS = util.FLAGS
class Job():
@@ -47,41 +48,16 @@ class Job():
self.kwdict = {}
self.kwdict_eval = False
- cfgpath = []
- if parent:
- if extr.category == parent.extractor.category or \
- extr.category in parent.parents:
- parents = parent.parents
- else:
- parents = parent.parents + (parent.extractor.category,)
-
- if parents:
- for category in parents:
- cat = "{}>{}".format(category, extr.category)
- cfgpath.append((cat, extr.subcategory))
- cfgpath.append((category + ">*", extr.subcategory))
- cfgpath.append((extr.category, extr.subcategory))
- self.parents = parents
- else:
- self.parents = ()
- else:
- self.parents = ()
-
- if extr.basecategory:
- if not cfgpath:
- cfgpath.append((extr.category, extr.subcategory))
- cfgpath.append((extr.basecategory, extr.subcategory))
-
- if cfgpath:
+ if cfgpath := self._build_config_path(parent):
+ if isinstance(cfgpath, list):
+ extr.config = extr._config_shared
+ extr.config_accumulate = extr._config_shared_accumulate
extr._cfgpath = cfgpath
- extr.config = extr._config_shared
- extr.config_accumulate = extr._config_shared_accumulate
- actions = extr.config("actions")
- if actions:
- from .actions import LoggerAdapter, parse
+ if actions := extr.config("actions"):
+ from .actions import LoggerAdapter, parse_logging
self._logger_adapter = LoggerAdapter
- self._logger_actions = parse(actions)
+ self._logger_actions = parse_logging(actions)
path_proxy = output.PathfmtProxy(self)
self._logger_extra = {
@@ -93,16 +69,6 @@ class Job():
extr.log = self._wrap_logger(extr.log)
extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url)
- # data from parent job
- if parent:
- pextr = parent.extractor
-
- # transfer (sub)category
- if pextr.config("category-transfer", pextr.categorytransfer):
- extr._cfgpath = pextr._cfgpath
- extr.category = pextr.category
- extr.subcategory = pextr.subcategory
-
self.metadata_url = extr.config2("metadata-url", "url-metadata")
self.metadata_http = extr.config2("metadata-http", "http-metadata")
metadata_path = extr.config2("metadata-path", "path-metadata")
@@ -121,8 +87,7 @@ class Job():
"current_git_head": util.git_head()
}
# user-supplied metadata
- kwdict = extr.config("keywords")
- if kwdict:
+ if kwdict := extr.config("keywords"):
if extr.config("keywords-eval"):
self.kwdict_eval = []
for key, value in kwdict.items():
@@ -134,6 +99,41 @@ class Job():
else:
self.kwdict.update(kwdict)
+ def _build_config_path(self, parent):
+ extr = self.extractor
+ cfgpath = []
+
+ if parent:
+ pextr = parent.extractor
+ if extr.category == pextr.category or \
+ extr.category in parent.parents:
+ parents = parent.parents
+ else:
+ parents = parent.parents + (pextr.category,)
+ self.parents = parents
+
+ if pextr.config("category-transfer", pextr.categorytransfer):
+ extr.category = pextr.category
+ extr.subcategory = pextr.subcategory
+ return pextr._cfgpath
+
+ if parents:
+ sub = extr.subcategory
+ for category in parents:
+ cat = f"{category}>{extr.category}"
+ cfgpath.append((cat, sub))
+ cfgpath.append((category + ">*", sub))
+ cfgpath.append((extr.category, sub))
+ else:
+ self.parents = ()
+
+ if extr.basecategory:
+ if not cfgpath:
+ cfgpath.append((extr.category, extr.subcategory))
+ cfgpath.append((extr.basecategory, extr.subcategory))
+
+ return cfgpath
+
def run(self):
"""Execute or run the job"""
extractor = self.extractor
@@ -151,9 +151,10 @@ class Job():
try:
for msg in extractor:
self.dispatch(msg)
- except exception.StopExtraction as exc:
- if exc.message:
- log.error(exc.message)
+ except exception.StopExtraction:
+ pass
+ except exception.AbortExtraction as exc:
+ log.error(exc.message)
self.status |= exc.code
except (exception.TerminateExtraction, exception.RestartExtraction):
raise
@@ -162,10 +163,14 @@ class Job():
log.debug("", exc_info=exc)
self.status |= exc.code
except OSError as exc:
- log.error("Unable to download data: %s: %s",
- exc.__class__.__name__, exc)
log.debug("", exc_info=exc)
- self.status |= 128
+ name = exc.__class__.__name__
+ if name == "JSONDecodeError":
+ log.error("Failed to parse JSON data: %s: %s", name, exc)
+ self.status |= 1
+ else: # regular OSError
+ log.error("Unable to download data: %s: %s", name, exc)
+ self.status |= 128
except Exception as exc:
log.error(("An unexpected error occurred: %s - %s. "
"Please run gallery-dl again with the --verbose flag, "
@@ -184,6 +189,8 @@ class Job():
self.handle_finalize()
extractor.finalize()
+ if s := extractor.status:
+ self.status |= s
return self.status
def dispatch(self, msg):
@@ -195,6 +202,8 @@ class Job():
if self.pred_url(url, kwdict):
self.update_kwdict(kwdict)
self.handle_url(url, kwdict)
+ if FLAGS.FILE is not None:
+ FLAGS.process("FILE")
elif msg[0] == Message.Directory:
self.update_kwdict(msg[1])
@@ -205,7 +214,10 @@ class Job():
if self.metadata_url:
kwdict[self.metadata_url] = url
if self.pred_queue(url, kwdict):
+ self.update_kwdict(kwdict)
self.handle_queue(url, kwdict)
+ if FLAGS.CHILD is not None:
+ FLAGS.process("CHILD")
def handle_url(self, url, kwdict):
"""Handle Message.Url"""
@@ -226,6 +238,8 @@ class Job():
kwdict["subcategory"] = extr.subcategory
if self.metadata_http:
kwdict.pop(self.metadata_http, None)
+ if extr.kwdict:
+ kwdict.update(extr.kwdict)
if self.kwdict:
kwdict.update(self.kwdict)
if self.kwdict_eval:
@@ -243,8 +257,7 @@ class Job():
if self.extractor.config(target + "-unique"):
predicates.append(util.UniquePredicate())
- pfilter = self.extractor.config(target + "-filter")
- if pfilter:
+ if pfilter := self.extractor.config(target + "-filter"):
try:
pred = util.FilterPredicate(pfilter, target)
except (SyntaxError, ValueError, TypeError) as exc:
@@ -252,8 +265,7 @@ class Job():
else:
predicates.append(pred)
- prange = self.extractor.config(target + "-range")
- if prange:
+ if prange := self.extractor.config(target + "-range"):
try:
pred = util.RangePredicate(prange)
except ValueError as exc:
@@ -382,6 +394,8 @@ class DownloadJob(Job):
if "post-after" in self.hooks:
for callback in self.hooks["post-after"]:
callback(self.pathfmt)
+ if FLAGS.POST is not None:
+ FLAGS.process("POST")
self.pathfmt.set_directory(kwdict)
if "post" in self.hooks:
for callback in self.hooks["post"]:
@@ -392,12 +406,10 @@ class DownloadJob(Job):
return
self.visited.add(url)
- cls = kwdict.get("_extractor")
- if cls:
+ if cls := kwdict.get("_extractor"):
extr = cls.from_url(url)
else:
- extr = extractor.find(url)
- if extr:
+ if extr := extractor.find(url):
if self._extractor_filter is None:
self._extractor_filter = self._build_extractor_filter()
if not self._extractor_filter(extr):
@@ -413,8 +425,7 @@ class DownloadJob(Job):
else:
extr._parentdir = pextr._parentdir
- pmeta = pextr.config2("parent-metadata", "metadata-parent")
- if pmeta:
+ if pmeta := pextr.config2("parent-metadata", "metadata-parent"):
if isinstance(pmeta, str):
data = self.kwdict.copy()
if kwdict:
@@ -446,9 +457,13 @@ class DownloadJob(Job):
except StopIteration:
pass
else:
+ pextr.log.info("Downloading fallback URL")
text.nameext_from_url(url, kwdict)
+ if kwdict["filename"].startswith((
+ "HLS", "DASH")):
+ kwdict["filename"] = url.rsplit("/", 2)[-2]
if url.startswith("ytdl:"):
- kwdict["extension"] = ""
+ kwdict["extension"] = "mp4"
self.handle_url(url, kwdict)
break
except exception.RestartExtraction:
@@ -463,8 +478,7 @@ class DownloadJob(Job):
self.archive.finalize()
self.archive.close()
- pathfmt = self.pathfmt
- if pathfmt:
+ if pathfmt := self.pathfmt:
hooks = self.hooks
if "post-after" in hooks:
for callback in hooks["post-after"]:
@@ -500,8 +514,7 @@ class DownloadJob(Job):
def download(self, url):
"""Download 'url'"""
scheme = url.partition(":")[0]
- downloader = self.get_downloader(scheme)
- if downloader:
+ if downloader := self.get_downloader(scheme):
try:
return downloader.download(url, self.pathfmt)
except OSError as exc:
@@ -547,8 +560,7 @@ class DownloadJob(Job):
# monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
- archive_path = cfg("archive")
- if archive_path:
+ if archive_path := cfg("archive"):
archive_table = cfg("archive-table")
archive_prefix = cfg("archive-prefix")
if archive_prefix is None:
@@ -585,8 +597,7 @@ class DownloadJob(Job):
self._archive_write_file = ("file" in events)
self._archive_write_skip = ("skip" in events)
- skip = cfg("skip", True)
- if skip:
+ if skip := cfg("skip", True):
self._skipexc = None
if skip == "enumerate":
pathfmt.check_file = pathfmt._enum_file
@@ -600,8 +611,7 @@ class DownloadJob(Job):
self._skipexc = SystemExit
self._skipmax = text.parse_int(smax)
- skip_filter = cfg("skip-filter")
- if skip_filter:
+ if skip_filter := cfg("skip-filter"):
self._skipftr = util.compile_filter(skip_filter)
else:
self._skipftr = None
@@ -614,8 +624,7 @@ class DownloadJob(Job):
if not cfg("postprocess", True):
return
- postprocessors = extr.config_accumulate("postprocessors")
- if postprocessors:
+ if postprocessors := extr.config_accumulate("postprocessors"):
self.hooks = collections.defaultdict(list)
pp_log = self.get_logger("postprocessor")
@@ -648,7 +657,26 @@ class DownloadJob(Job):
clist, negate)(extr):
continue
- name = pp_dict.get("name")
+ name = pp_dict.get("name", "")
+ if "__init__" not in pp_dict:
+ name, sep, event = name.rpartition("@")
+ if sep:
+ pp_dict["name"] = name
+ if "event" not in pp_dict:
+ pp_dict["event"] = event
+ else:
+ name = event
+
+ name, sep, mode = name.rpartition("/")
+ if sep:
+ pp_dict["name"] = name
+ if "mode" not in pp_dict:
+ pp_dict["mode"] = mode
+ else:
+ name = mode
+
+ pp_dict["__init__"] = None
+
pp_cls = postprocessor.find(name)
if not pp_cls:
pp_log.warning("module '%s' not found", name)
@@ -680,8 +708,7 @@ class DownloadJob(Job):
for hook, callback in hooks.items():
self.hooks[hook].append(callback)
- @staticmethod
- def _call_hook(callback, condition, pathfmt):
+ def _call_hook(self, callback, condition, pathfmt):
if condition(pathfmt.kwdict):
callback(pathfmt)
@@ -775,7 +802,7 @@ class KeywordJob(Job):
if markers is None:
markers = {markerid}
elif markerid in markers:
- write("{}\n <circular reference>\n".format(prefix[:-2]))
+ write(f"{prefix[:-2]}\n <circular reference>\n")
return # ignore circular reference
else:
markers.add(markerid)
@@ -801,7 +828,7 @@ class KeywordJob(Job):
else:
# string or number
- write("{}\n {}\n".format(key, value))
+ write(f"{key}\n {value}\n")
markers.remove(markerid)
@@ -816,20 +843,17 @@ class UrlJob(Job):
if depth >= self.maxdepth:
self.handle_queue = self.handle_url
- @staticmethod
- def handle_url(url, _):
+ def handle_url(self, url, _):
stdout_write(url + "\n")
- @staticmethod
- def handle_url_fallback(url, kwdict):
+ def handle_url_fallback(self, url, kwdict):
stdout_write(url + "\n")
if "_fallback" in kwdict:
for url in kwdict["_fallback"]:
stdout_write("| " + url + "\n")
def handle_queue(self, url, kwdict):
- cls = kwdict.get("_extractor")
- if cls:
+ if cls := kwdict.get("_extractor"):
extr = cls.from_url(url)
else:
extr = extractor.find(url)
@@ -862,20 +886,18 @@ class InfoJob(Job):
return 0
def _print_multi(self, title, *values):
- stdout_write("{}\n {}\n\n".format(
- title, " / ".join(map(util.json_dumps, values))))
+ stdout_write(
+ f"{title}\n {' / '.join(map(util.json_dumps, values))}\n\n")
def _print_config(self, title, optname, value):
optval = self.extractor.config(optname, util.SENTINEL)
if optval is not util.SENTINEL:
stdout_write(
- "{} (custom):\n {}\n{} (default):\n {}\n\n".format(
- title, util.json_dumps(optval),
- title, util.json_dumps(value)))
+ f"{title} (custom):\n {util.json_dumps(optval)}\n"
+ f"{title} (default):\n {util.json_dumps(value)}\n\n")
elif value:
stdout_write(
- "{} (default):\n {}\n\n".format(
- title, util.json_dumps(value)))
+ f"{title} (default):\n {util.json_dumps(value)}\n\n")
class DataJob(Job):
@@ -912,7 +934,10 @@ class DataJob(Job):
except exception.StopExtraction:
pass
except Exception as exc:
- self.data.append((exc.__class__.__name__, str(exc)))
+ self.data.append((-1, {
+ "error" : exc.__class__.__name__,
+ "message": str(exc),
+ }))
except BaseException:
pass
@@ -941,8 +966,7 @@ class DataJob(Job):
self.data.append((Message.Queue, url, self.filter(kwdict)))
def handle_queue_resolve(self, url, kwdict):
- cls = kwdict.get("_extractor")
- if cls:
+ if cls := kwdict.get("_extractor"):
extr = cls.from_url(url)
else:
extr = extractor.find(url)