aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/job.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/job.py')
-rw-r--r--gallery_dl/job.py171
1 files changed, 99 insertions, 72 deletions
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 9369e5d..7a52bd6 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -87,17 +87,22 @@ class Job():
"current_git_head": util.git_head()
}
# user-supplied metadata
- if kwdict := extr.config("keywords"):
- if extr.config("keywords-eval"):
- self.kwdict_eval = []
- for key, value in kwdict.items():
- if isinstance(value, str):
- fmt = formatter.parse(value, None, util.identity)
- self.kwdict_eval.append((key, fmt.format_map))
- else:
- self.kwdict[key] = value
- else:
- self.kwdict.update(kwdict)
+ kwdict = extr.config("keywords")
+ if kwdict_global := extr.config("keywords-global"):
+ kwdict = {**kwdict_global, **kwdict} if kwdict else kwdict_global
+ elif not kwdict:
+ return
+
+ if extr.config("keywords-eval"):
+ self.kwdict_eval = []
+ for key, value in kwdict.items():
+ if isinstance(value, str):
+ fmt = formatter.parse(value, None, util.identity)
+ self.kwdict_eval.append((key, fmt.format_map))
+ else:
+ self.kwdict[key] = value
+ else:
+ self.kwdict.update(kwdict)
def _build_config_path(self, parent):
extr = self.extractor
@@ -130,6 +135,8 @@ class Job():
if extr.basecategory:
if not cfgpath:
cfgpath.append((extr.category, extr.subcategory))
+ if extr.basesubcategory:
+ cfgpath.append((extr.basesubcategory, extr.subcategory))
cfgpath.append((extr.basecategory, extr.subcategory))
return cfgpath
@@ -138,37 +145,35 @@ class Job():
"""Execute or run the job"""
extractor = self.extractor
log = extractor.log
- msg = None
self._init()
# sleep before extractor start
sleep = util.build_duration_func(
extractor.config("sleep-extractor"))
- if sleep:
+ if sleep is not None:
extractor.sleep(sleep(), "extractor")
try:
- for msg in extractor:
- self.dispatch(msg)
+ msg = self.dispatch(extractor)
except exception.StopExtraction as exc:
if exc.depth > 1 and exc.target != extractor.__class__.subcategory:
exc.depth -= 1
raise
pass
except exception.AbortExtraction as exc:
+ log.traceback(exc)
log.error(exc.message)
self.status |= exc.code
except (exception.TerminateExtraction, exception.RestartExtraction):
raise
except exception.GalleryDLException as exc:
log.error("%s: %s", exc.__class__.__name__, exc)
- log.debug("", exc_info=exc)
+ log.traceback(exc)
self.status |= exc.code
except OSError as exc:
- log.debug("", exc_info=exc)
- name = exc.__class__.__name__
- if name == "JSONDecodeError":
+ log.traceback(exc)
+ if (name := exc.__class__.__name__) == "JSONDecodeError":
log.error("Failed to parse JSON data: %s: %s", name, exc)
self.status |= 1
else: # regular OSError
@@ -180,7 +185,7 @@ class Job():
"copy its output and report this issue on "
"https://github.com/mikf/gallery-dl/issues ."),
exc.__class__.__name__, exc)
- log.debug("", exc_info=exc)
+ log.traceback(exc)
self.status |= 1
except BaseException:
self.status |= 1
@@ -196,31 +201,47 @@ class Job():
self.status |= s
return self.status
- def dispatch(self, msg):
+ def dispatch(self, messages):
"""Call the appropriate message handler"""
- if msg[0] == Message.Url:
- _, url, kwdict = msg
- if self.metadata_url:
- kwdict[self.metadata_url] = url
- if self.pred_url(url, kwdict):
- self.update_kwdict(kwdict)
- self.handle_url(url, kwdict)
- if FLAGS.FILE is not None:
- FLAGS.process("FILE")
-
- elif msg[0] == Message.Directory:
- self.update_kwdict(msg[1])
- self.handle_directory(msg[1])
-
- elif msg[0] == Message.Queue:
- _, url, kwdict = msg
- if self.metadata_url:
- kwdict[self.metadata_url] = url
- if self.pred_queue(url, kwdict):
- self.update_kwdict(kwdict)
- self.handle_queue(url, kwdict)
- if FLAGS.CHILD is not None:
- FLAGS.process("CHILD")
+ msg = None
+ process = True
+
+ for msg, url, kwdict in messages:
+
+ if msg == Message.Directory:
+ if self.pred_post(url, kwdict):
+ process = True
+ self.update_kwdict(kwdict)
+ self.handle_directory(kwdict)
+ else:
+ process = None
+ if FLAGS.POST is not None:
+ FLAGS.process("POST")
+
+ elif process is None:
+ continue
+
+ elif msg == Message.Url:
+ if self.metadata_url:
+ kwdict[self.metadata_url] = url
+ if self.pred_url(url, kwdict):
+ self.update_kwdict(kwdict)
+ self.handle_url(url, kwdict)
+ if FLAGS.FILE is not None:
+ FLAGS.process("FILE")
+
+ elif msg == Message.Queue:
+ if process is None:
+ continue
+ if self.metadata_url:
+ kwdict[self.metadata_url] = url
+ if self.pred_queue(url, kwdict):
+ self.update_kwdict(kwdict)
+ self.handle_queue(url, kwdict)
+ if FLAGS.CHILD is not None:
+ FLAGS.process("CHILD")
+
+ return msg
def handle_url(self, url, kwdict):
"""Handle Message.Url"""
@@ -252,15 +273,16 @@ class Job():
def _init(self):
self.extractor.initialize()
self.pred_url = self._prepare_predicates("image", True)
+ self.pred_post = self._prepare_predicates("post", False)
self.pred_queue = self._prepare_predicates("chapter", False)
def _prepare_predicates(self, target, skip=True):
predicates = []
- if self.extractor.config(target + "-unique"):
+ if self.extractor.config(f"{target}-unique"):
predicates.append(util.UniquePredicate())
- if pfilter := self.extractor.config(target + "-filter"):
+ if pfilter := self.extractor.config(f"{target}-filter"):
try:
pred = util.FilterPredicate(pfilter, target)
except (SyntaxError, ValueError, TypeError) as exc:
@@ -268,7 +290,7 @@ class Job():
else:
predicates.append(pred)
- if prange := self.extractor.config(target + "-range"):
+ if prange := self.extractor.config(f"{target}-range"):
try:
pred = util.RangePredicate(prange)
except ValueError as exc:
@@ -288,7 +310,7 @@ class Job():
return self._logger_adapter(logger, self)
def _write_unsupported(self, url):
- if self.ulog:
+ if self.ulog is not None:
self.ulog.info(url)
@@ -321,7 +343,7 @@ class DownloadJob(Job):
for callback in hooks["prepare"]:
callback(pathfmt)
- if archive and archive.check(kwdict):
+ if archive is not None and archive.check(kwdict):
pathfmt.fix_extension()
self.handle_skip()
return
@@ -330,7 +352,7 @@ class DownloadJob(Job):
pathfmt.build_path()
if pathfmt.exists():
- if archive and self._archive_write_skip:
+ if archive is not None and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@@ -340,12 +362,12 @@ class DownloadJob(Job):
callback(pathfmt)
if kwdict.pop("_file_recheck", False) and pathfmt.exists():
- if archive and self._archive_write_skip:
+ if archive is not None and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
- if self.sleep:
+ if self.sleep is not None:
self.extractor.sleep(self.sleep(), "download")
# download from URL
@@ -369,7 +391,7 @@ class DownloadJob(Job):
return
if not pathfmt.temppath:
- if archive and self._archive_write_skip:
+ if archive is not None and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@@ -383,15 +405,17 @@ class DownloadJob(Job):
pathfmt.finalize()
self.out.success(pathfmt.path)
self._skipcnt = 0
- if archive and self._archive_write_file:
+ if archive is not None and self._archive_write_file:
archive.add(kwdict)
if "after" in hooks:
for callback in hooks["after"]:
callback(pathfmt)
+ if archive is not None and self._archive_write_after:
+ archive.add(kwdict)
def handle_directory(self, kwdict):
"""Set and create the target directory for downloads"""
- if not self.pathfmt:
+ if self.pathfmt is None:
self.initialize(kwdict)
else:
if "post-after" in self.hooks:
@@ -428,7 +452,8 @@ class DownloadJob(Job):
else:
extr._parentdir = pextr._parentdir
- if pmeta := pextr.config2("parent-metadata", "metadata-parent"):
+ if pmeta := pextr.config2(
+ "parent-metadata", "metadata-parent", pextr.parent):
if isinstance(pmeta, str):
data = self.kwdict.copy()
if kwdict:
@@ -509,7 +534,7 @@ class DownloadJob(Job):
self.out.skip(pathfmt.path)
if self._skipexc:
- if not self._skipftr or self._skipftr(pathfmt.kwdict):
+ if self._skipftr is None or self._skipftr(pathfmt.kwdict):
self._skipcnt += 1
if self._skipcnt >= self._skipmax:
raise self._skipexc
@@ -553,7 +578,7 @@ class DownloadJob(Job):
cfg = extr.config
pathfmt = self.pathfmt = path.PathFormat(extr)
- if kwdict:
+ if kwdict is not None:
pathfmt.set_directory(kwdict)
self.sleep = util.build_duration_func(cfg("sleep"))
@@ -593,11 +618,13 @@ class DownloadJob(Job):
if events is None:
self._archive_write_file = True
self._archive_write_skip = False
+ self._archive_write_after = False
else:
if isinstance(events, str):
events = events.split(",")
self._archive_write_file = ("file" in events)
self._archive_write_skip = ("skip" in events)
+ self._archive_write_after = ("after" in events)
if skip := cfg("skip", True):
self._skipexc = None
@@ -621,7 +648,7 @@ class DownloadJob(Job):
else:
# monkey-patch methods to always return False
pathfmt.exists = lambda x=None: False
- if self.archive:
+ if self.archive is not None:
self.archive.check = pathfmt.exists
if not cfg("postprocess", True):
@@ -681,15 +708,15 @@ class DownloadJob(Job):
pp_dict["__init__"] = None
pp_cls = postprocessor.find(name)
- if not pp_cls:
+ if pp_cls is None:
pp_log.warning("module '%s' not found", name)
continue
try:
pp_obj = pp_cls(self, pp_dict)
except Exception as exc:
+ pp_log.traceback(exc)
pp_log.error("'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
- pp_log.debug("", exc_info=exc)
else:
pp_list.append(pp_obj)
@@ -706,15 +733,11 @@ class DownloadJob(Job):
condition = util.compile_filter(expr)
for hook, callback in hooks.items():
self.hooks[hook].append(functools.partial(
- self._call_hook, callback, condition))
+ _call_hook_condition, callback, condition))
else:
for hook, callback in hooks.items():
self.hooks[hook].append(callback)
- def _call_hook(self, callback, condition, pathfmt):
- if condition(pathfmt.kwdict):
- callback(pathfmt)
-
def _build_extractor_filter(self):
clist = self.extractor.config("whitelist")
if clist is not None:
@@ -730,20 +753,25 @@ class DownloadJob(Job):
return util.build_extractor_filter(clist, negate, special)
+def _call_hook_condition(callback, condition, pathfmt):
+ if condition(pathfmt.kwdict):
+ callback(pathfmt)
+
+
class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""
def handle_url(self, url, kwdict):
ext = kwdict["extension"] or "jpg"
kwdict["extension"] = self.pathfmt.extension_map(ext, ext)
- if self.sleep:
+ if self.sleep is not None:
self.extractor.sleep(self.sleep(), "download")
- if self.archive and self._archive_write_skip:
+ if self.archive is not None and self._archive_write_skip:
self.archive.add(kwdict)
self.out.skip(self.pathfmt.build_filename(kwdict))
def handle_directory(self, kwdict):
- if not self.pathfmt:
+ if self.pathfmt is None:
self.initialize()
@@ -931,13 +959,12 @@ class DataJob(Job):
extractor = self.extractor
sleep = util.build_duration_func(
extractor.config("sleep-extractor"))
- if sleep:
+ if sleep is not None:
extractor.sleep(sleep(), "extractor")
# collect data
try:
- for msg in extractor:
- self.dispatch(msg)
+ self.dispatch(extractor)
except exception.StopExtraction:
pass
except Exception as exc: