diff options
Diffstat (limited to 'gallery_dl/job.py')
| -rw-r--r-- | gallery_dl/job.py | 171 |
1 files changed, 99 insertions, 72 deletions
diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 9369e5d..7a52bd6 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -87,17 +87,22 @@ class Job(): "current_git_head": util.git_head() } # user-supplied metadata - if kwdict := extr.config("keywords"): - if extr.config("keywords-eval"): - self.kwdict_eval = [] - for key, value in kwdict.items(): - if isinstance(value, str): - fmt = formatter.parse(value, None, util.identity) - self.kwdict_eval.append((key, fmt.format_map)) - else: - self.kwdict[key] = value - else: - self.kwdict.update(kwdict) + kwdict = extr.config("keywords") + if kwdict_global := extr.config("keywords-global"): + kwdict = {**kwdict_global, **kwdict} if kwdict else kwdict_global + elif not kwdict: + return + + if extr.config("keywords-eval"): + self.kwdict_eval = [] + for key, value in kwdict.items(): + if isinstance(value, str): + fmt = formatter.parse(value, None, util.identity) + self.kwdict_eval.append((key, fmt.format_map)) + else: + self.kwdict[key] = value + else: + self.kwdict.update(kwdict) def _build_config_path(self, parent): extr = self.extractor @@ -130,6 +135,8 @@ class Job(): if extr.basecategory: if not cfgpath: cfgpath.append((extr.category, extr.subcategory)) + if extr.basesubcategory: + cfgpath.append((extr.basesubcategory, extr.subcategory)) cfgpath.append((extr.basecategory, extr.subcategory)) return cfgpath @@ -138,37 +145,35 @@ class Job(): """Execute or run the job""" extractor = self.extractor log = extractor.log - msg = None self._init() # sleep before extractor start sleep = util.build_duration_func( extractor.config("sleep-extractor")) - if sleep: + if sleep is not None: extractor.sleep(sleep(), "extractor") try: - for msg in extractor: - self.dispatch(msg) + msg = self.dispatch(extractor) except exception.StopExtraction as exc: if exc.depth > 1 and exc.target != extractor.__class__.subcategory: exc.depth -= 1 raise pass except exception.AbortExtraction as exc: + log.traceback(exc) log.error(exc.message) self.status |= exc.code except (exception.TerminateExtraction, exception.RestartExtraction): raise except exception.GalleryDLException as exc: log.error("%s: %s", exc.__class__.__name__, exc) - log.debug("", exc_info=exc) + log.traceback(exc) self.status |= exc.code except OSError as exc: - log.debug("", exc_info=exc) - name = exc.__class__.__name__ - if name == "JSONDecodeError": + log.traceback(exc) + if (name := exc.__class__.__name__) == "JSONDecodeError": log.error("Failed to parse JSON data: %s: %s", name, exc) self.status |= 1 else: # regular OSError @@ -180,7 +185,7 @@ class Job(): "copy its output and report this issue on " "https://github.com/mikf/gallery-dl/issues ."), exc.__class__.__name__, exc) - log.debug("", exc_info=exc) + log.traceback(exc) self.status |= 1 except BaseException: self.status |= 1 @@ -196,31 +201,47 @@ class Job(): self.status |= s return self.status - def dispatch(self, msg): + def dispatch(self, messages): """Call the appropriate message handler""" - if msg[0] == Message.Url: - _, url, kwdict = msg - if self.metadata_url: - kwdict[self.metadata_url] = url - if self.pred_url(url, kwdict): - self.update_kwdict(kwdict) - self.handle_url(url, kwdict) - if FLAGS.FILE is not None: - FLAGS.process("FILE") - - elif msg[0] == Message.Directory: - self.update_kwdict(msg[1]) - self.handle_directory(msg[1]) - - elif msg[0] == Message.Queue: - _, url, kwdict = msg - if self.metadata_url: - kwdict[self.metadata_url] = url - if self.pred_queue(url, kwdict): - self.update_kwdict(kwdict) - self.handle_queue(url, kwdict) - if FLAGS.CHILD is not None: - FLAGS.process("CHILD") + msg = None + process = True + + for msg, url, kwdict in messages: + + if msg == Message.Directory: + if self.pred_post(url, kwdict): + process = True + self.update_kwdict(kwdict) + self.handle_directory(kwdict) + else: + process = None + if FLAGS.POST is not None: + FLAGS.process("POST") + + elif process is None: + continue + + elif msg == Message.Url: + if self.metadata_url: + kwdict[self.metadata_url] = url + if self.pred_url(url, kwdict): + self.update_kwdict(kwdict) + self.handle_url(url, kwdict) + if FLAGS.FILE is not None: + FLAGS.process("FILE") + + elif msg == Message.Queue: + if process is None: + continue + if self.metadata_url: + kwdict[self.metadata_url] = url + if self.pred_queue(url, kwdict): + self.update_kwdict(kwdict) + self.handle_queue(url, kwdict) + if FLAGS.CHILD is not None: + FLAGS.process("CHILD") + + return msg def handle_url(self, url, kwdict): """Handle Message.Url""" @@ -252,15 +273,16 @@ class Job(): def _init(self): self.extractor.initialize() self.pred_url = self._prepare_predicates("image", True) + self.pred_post = self._prepare_predicates("post", False) self.pred_queue = self._prepare_predicates("chapter", False) def _prepare_predicates(self, target, skip=True): predicates = [] - if self.extractor.config(target + "-unique"): + if self.extractor.config(f"{target}-unique"): predicates.append(util.UniquePredicate()) - if pfilter := self.extractor.config(target + "-filter"): + if pfilter := self.extractor.config(f"{target}-filter"): try: pred = util.FilterPredicate(pfilter, target) except (SyntaxError, ValueError, TypeError) as exc: @@ -268,7 +290,7 @@ class Job(): else: predicates.append(pred) - if prange := self.extractor.config(target + "-range"): + if prange := self.extractor.config(f"{target}-range"): try: pred = util.RangePredicate(prange) except ValueError as exc: @@ -288,7 +310,7 @@ class Job(): return self._logger_adapter(logger, self) def _write_unsupported(self, url): - if self.ulog: + if self.ulog is not None: self.ulog.info(url) @@ -321,7 +343,7 @@ class DownloadJob(Job): for callback in hooks["prepare"]: callback(pathfmt) - if archive and archive.check(kwdict): + if archive is not None and archive.check(kwdict): pathfmt.fix_extension() self.handle_skip() return @@ -330,7 +352,7 @@ class DownloadJob(Job): pathfmt.build_path() if pathfmt.exists(): - if archive and self._archive_write_skip: + if archive is not None and self._archive_write_skip: archive.add(kwdict) self.handle_skip() return @@ -340,12 +362,12 @@ class DownloadJob(Job): callback(pathfmt) if kwdict.pop("_file_recheck", False) and pathfmt.exists(): - if archive and self._archive_write_skip: + if archive is not None and self._archive_write_skip: archive.add(kwdict) self.handle_skip() return - if self.sleep: + if self.sleep is not None: self.extractor.sleep(self.sleep(), "download") # download from URL @@ -369,7 +391,7 @@ class DownloadJob(Job): return if not pathfmt.temppath: - if archive and self._archive_write_skip: + if archive is not None and self._archive_write_skip: archive.add(kwdict) self.handle_skip() return @@ -383,15 +405,17 @@ class DownloadJob(Job): pathfmt.finalize() self.out.success(pathfmt.path) self._skipcnt = 0 - if archive and self._archive_write_file: + if archive is not None and self._archive_write_file: archive.add(kwdict) if "after" in hooks: for callback in hooks["after"]: callback(pathfmt) + if archive is not None and self._archive_write_after: + archive.add(kwdict) def handle_directory(self, kwdict): """Set and create the target directory for downloads""" - if not self.pathfmt: + if self.pathfmt is None: self.initialize(kwdict) else: if "post-after" in self.hooks: @@ -428,7 +452,8 @@ class DownloadJob(Job): else: extr._parentdir = pextr._parentdir - if pmeta := pextr.config2("parent-metadata", "metadata-parent"): + if pmeta := pextr.config2( + "parent-metadata", "metadata-parent", pextr.parent): if isinstance(pmeta, str): data = self.kwdict.copy() if kwdict: @@ -509,7 +534,7 @@ class DownloadJob(Job): self.out.skip(pathfmt.path) if self._skipexc: - if not self._skipftr or self._skipftr(pathfmt.kwdict): + if self._skipftr is None or self._skipftr(pathfmt.kwdict): self._skipcnt += 1 if self._skipcnt >= self._skipmax: raise self._skipexc @@ -553,7 +578,7 @@ class DownloadJob(Job): cfg = extr.config pathfmt = self.pathfmt = path.PathFormat(extr) - if kwdict: + if kwdict is not None: pathfmt.set_directory(kwdict) self.sleep = util.build_duration_func(cfg("sleep")) @@ -593,11 +618,13 @@ class DownloadJob(Job): if events is None: self._archive_write_file = True self._archive_write_skip = False + self._archive_write_after = False else: if isinstance(events, str): events = events.split(",") self._archive_write_file = ("file" in events) self._archive_write_skip = ("skip" in events) + self._archive_write_after = ("after" in events) if skip := cfg("skip", True): self._skipexc = None @@ -621,7 +648,7 @@ class DownloadJob(Job): else: # monkey-patch methods to always return False pathfmt.exists = lambda x=None: False - if self.archive: + if self.archive is not None: self.archive.check = pathfmt.exists if not cfg("postprocess", True): @@ -681,15 +708,15 @@ class DownloadJob(Job): pp_dict["__init__"] = None pp_cls = postprocessor.find(name) - if not pp_cls: + if pp_cls is None: pp_log.warning("module '%s' not found", name) continue try: pp_obj = pp_cls(self, pp_dict) except Exception as exc: + pp_log.traceback(exc) pp_log.error("'%s' initialization failed: %s: %s", name, exc.__class__.__name__, exc) - pp_log.debug("", exc_info=exc) else: pp_list.append(pp_obj) @@ -706,15 +733,11 @@ class DownloadJob(Job): condition = util.compile_filter(expr) for hook, callback in hooks.items(): self.hooks[hook].append(functools.partial( - self._call_hook, callback, condition)) + _call_hook_condition, callback, condition)) else: for hook, callback in hooks.items(): self.hooks[hook].append(callback) - def _call_hook(self, callback, condition, pathfmt): - if condition(pathfmt.kwdict): - callback(pathfmt) - def _build_extractor_filter(self): clist = self.extractor.config("whitelist") if clist is not None: @@ -730,20 +753,25 @@ class DownloadJob(Job): return util.build_extractor_filter(clist, negate, special) +def _call_hook_condition(callback, condition, pathfmt): + if condition(pathfmt.kwdict): + callback(pathfmt) + + class SimulationJob(DownloadJob): """Simulate the extraction process without downloading anything""" def handle_url(self, url, kwdict): ext = kwdict["extension"] or "jpg" kwdict["extension"] = self.pathfmt.extension_map(ext, ext) - if self.sleep: + if self.sleep is not None: self.extractor.sleep(self.sleep(), "download") - if self.archive and self._archive_write_skip: + if self.archive is not None and self._archive_write_skip: self.archive.add(kwdict) self.out.skip(self.pathfmt.build_filename(kwdict)) def handle_directory(self, kwdict): - if not self.pathfmt: + if self.pathfmt is None: self.initialize() @@ -931,13 +959,12 @@ class DataJob(Job): extractor = self.extractor sleep = util.build_duration_func( extractor.config("sleep-extractor")) - if sleep: + if sleep is not None: extractor.sleep(sleep(), "extractor") # collect data try: - for msg in extractor: - self.dispatch(msg) + self.dispatch(extractor) except exception.StopExtraction: pass except Exception as exc: |
