diff options
Diffstat (limited to 'gallery_dl/postprocessor')
| -rw-r--r-- | gallery_dl/postprocessor/__init__.py | 2 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/hash.py | 71 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/metadata.py | 34 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/rename.py | 91 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/ugoira.py | 169 |
5 files changed, 304 insertions, 63 deletions
diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py index 4690554..7837b06 100644 --- a/gallery_dl/postprocessor/__init__.py +++ b/gallery_dl/postprocessor/__init__.py @@ -12,9 +12,11 @@ modules = [ "classify", "compare", "exec", + "hash", "metadata", "mtime", "python", + "rename", "ugoira", "zip", ] diff --git a/gallery_dl/postprocessor/hash.py b/gallery_dl/postprocessor/hash.py new file mode 100644 index 0000000..92a7477 --- /dev/null +++ b/gallery_dl/postprocessor/hash.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Compute file hash digests""" + +from .common import PostProcessor +import hashlib + + +class HashPP(PostProcessor): + + def __init__(self, job, options): + PostProcessor.__init__(self, job) + + self.chunk_size = options.get("chunk-size", 32768) + self.filename = options.get("filename") + + hashes = options.get("hashes") + if isinstance(hashes, dict): + self.hashes = list(hashes.items()) + elif isinstance(hashes, str): + self.hashes = [] + for h in hashes.split(","): + name, sep, key = h.partition(":") + self.hashes.append((key if sep else name, name)) + elif hashes: + self.hashes = hashes + else: + self.hashes = (("md5", "md5"), ("sha1", "sha1")) + + events = options.get("event") + if events is None: + events = ("file",) + elif isinstance(events, str): + events = events.split(",") + job.register_hooks({event: self.run for event in events}, options) + + def run(self, pathfmt): + hashes = [ + (key, hashlib.new(name)) + for key, name in self.hashes + ] + + size = self.chunk_size + with self._open(pathfmt) as fp: + while True: + data = fp.read(size) + if not data: + break + for _, h in hashes: + h.update(data) + + for key, h in hashes: + pathfmt.kwdict[key] = h.hexdigest() + + if self.filename: + pathfmt.build_path() + + def _open(self, pathfmt): + try: + return open(pathfmt.temppath, "rb") + except OSError: + return open(pathfmt.realpath, "rb") + + +__postprocessor__ = HashPP diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index e89b170..3ef9fbc 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -103,10 +103,10 @@ class MetadataPP(PostProcessor): job.register_hooks({event: self.run for event in events}, options) self._init_archive(job, options, "_MD_") + self.filter = self._make_filter(options) self.mtime = options.get("mtime") self.omode = options.get("open", omode) self.encoding = options.get("encoding", "utf-8") - self.private = options.get("private", False) self.skip = options.get("skip", False) def run(self, pathfmt): @@ -114,7 +114,10 @@ class MetadataPP(PostProcessor): if archive and archive.check(pathfmt.kwdict): return - directory = self._directory(pathfmt) + if util.WINDOWS and pathfmt.extended: + directory = pathfmt._extended_path(self._directory(pathfmt)) + else: + directory = self._directory(pathfmt) path = directory + self._filename(pathfmt) if self.skip and os.path.exists(path): @@ -231,10 +234,33 @@ class MetadataPP(PostProcessor): fp.write("\n".join(tags) + "\n") def _write_json(self, fp, kwdict): - if not self.private: - kwdict = util.filter_dict(kwdict) + if self.filter: + kwdict = self.filter(kwdict) fp.write(self._json_encode(kwdict) + "\n") + def _make_filter(self, options): + include = options.get("include") + if include: + if isinstance(include, str): + include = include.split(",") + return lambda d: {k: d[k] for k in include if k in d} + + exclude = options.get("exclude") + private = options.get("private") + if exclude: + if isinstance(exclude, str): + exclude = exclude.split(",") + exclude = set(exclude) + + if private: + return lambda d: {k: v for k, v in d.items() + if k not in exclude} + return lambda d: {k: v for k, v in util.filter_dict(d).items() + if k not in exclude} + + if not private: + return util.filter_dict + @staticmethod def _make_encoder(options, indent=None): return json.JSONEncoder( diff --git a/gallery_dl/postprocessor/rename.py b/gallery_dl/postprocessor/rename.py new file mode 100644 index 0000000..f71738d --- /dev/null +++ b/gallery_dl/postprocessor/rename.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Rename files""" + +from .common import PostProcessor +from .. import formatter +import os + + +class RenamePP(PostProcessor): + + def __init__(self, job, options): + PostProcessor.__init__(self, job) + + self.skip = options.get("skip", True) + old = options.get("from") + new = options.get("to") + + if old: + self._old = self._apply_format(old) + self._new = (self._apply_format(new) if new else + self._apply_pathfmt) + job.register_hooks({ + "prepare": self.rename_from, + }, options) + + elif new: + self._old = self._apply_pathfmt + self._new = self._apply_format(new) + job.register_hooks({ + "skip" : self.rename_to_skip, + "prepare-after": self.rename_to_pafter, + }, options) + + else: + raise ValueError("Option 'from' or 'to' is required") + + def rename_from(self, pathfmt): + name_old = self._old(pathfmt) + path_old = pathfmt.realdirectory + name_old + + if os.path.exists(path_old): + name_new = self._new(pathfmt) + path_new = pathfmt.realdirectory + name_new + self._rename(path_old, name_old, path_new, name_new) + + def rename_to_skip(self, pathfmt): + name_old = self._old(pathfmt) + path_old = pathfmt.realdirectory + name_old + + if os.path.exists(path_old): + pathfmt.filename = name_new = self._new(pathfmt) + pathfmt.path = pathfmt.directory + name_new + pathfmt.realpath = path_new = pathfmt.realdirectory + name_new + self._rename(path_old, name_old, path_new, name_new) + + def rename_to_pafter(self, pathfmt): + pathfmt.filename = name_new = self._new(pathfmt) + pathfmt.path = pathfmt.directory + name_new + pathfmt.realpath = pathfmt.realdirectory + name_new + pathfmt.kwdict["_file_recheck"] = True + + def _rename(self, path_old, name_old, path_new, name_new): + if self.skip and os.path.exists(path_new): + return self.log.warning( + "Not renaming '%s' to '%s' since another file with the " + "same name exists", name_old, name_new) + + self.log.info("'%s' -> '%s'", name_old, name_new) + os.replace(path_old, path_new) + + def _apply_pathfmt(self, pathfmt): + return pathfmt.build_filename(pathfmt.kwdict) + + def _apply_format(self, format_string): + fmt = formatter.parse(format_string).format_map + + def apply(pathfmt): + return pathfmt.clean_path(pathfmt.clean_segment(fmt( + pathfmt.kwdict))) + + return apply + + +__postprocessor__ = RenamePP diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index 9e60ce2..f053afa 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -36,7 +36,8 @@ class UgoiraPP(PostProcessor): self.delete = not options.get("keep-files", False) self.repeat = options.get("repeat-last-frame", True) self.mtime = options.get("mtime", True) - self.uniform = False + self.skip = options.get("skip", True) + self.uniform = self._convert_zip = self._convert_files = False ffmpeg = options.get("ffmpeg-location") self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg" @@ -90,33 +91,44 @@ class UgoiraPP(PostProcessor): if self.prevent_odd: args += ("-vf", "crop=iw-mod(iw\\,2):ih-mod(ih\\,2)") - job.register_hooks( - {"prepare": self.prepare, "file": self.convert}, options) + job.register_hooks({ + "prepare": self.prepare, + "file" : self.convert_zip, + "after" : self.convert_files, + }, options) def prepare(self, pathfmt): - self._frames = None - - if pathfmt.extension != "zip": + if "frames" not in pathfmt.kwdict: + self._frames = None return - kwdict = pathfmt.kwdict - if "frames" in kwdict: - self._frames = kwdict["frames"] - elif "pixiv_ugoira_frame_data" in kwdict: - self._frames = kwdict["pixiv_ugoira_frame_data"]["data"] + self._frames = pathfmt.kwdict["frames"] + if pathfmt.extension == "zip": + self._convert_zip = True + if self.delete: + pathfmt.set_extension(self.extension) + pathfmt.build_path() else: - return - - if self.delete: - pathfmt.set_extension(self.extension) pathfmt.build_path() + index = pathfmt.kwdict["_ugoira_frame_index"] + frame = self._frames[index].copy() + frame["index"] = index + frame["path"] = pathfmt.realpath + frame["ext"] = pathfmt.kwdict["extension"] + + if not index: + self._files = [frame] + else: + self._files.append(frame) + if len(self._files) >= len(self._frames): + self._convert_files = True - def convert(self, pathfmt): - if not self._frames: + def convert_zip(self, pathfmt): + if not self._convert_zip: return + self._convert_zip = False with tempfile.TemporaryDirectory() as tempdir: - # extract frames try: with zipfile.ZipFile(pathfmt.temppath) as zfile: zfile.extractall(tempdir) @@ -124,53 +136,89 @@ class UgoiraPP(PostProcessor): pathfmt.realpath = pathfmt.temppath return - # process frames and collect command-line arguments - pathfmt.set_extension(self.extension) - pathfmt.build_path() - - args = self._process(pathfmt, tempdir) - if self.args_pp: - args += self.args_pp - if self.args: - args += self.args - - # ensure target directory exists - os.makedirs(pathfmt.realdirectory, exist_ok=True) - - # invoke ffmpeg - try: - if self.twopass: - if "-f" not in self.args: - args += ("-f", self.extension) - args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass") - self._exec(args + ["1", "-y", os.devnull]) - self._exec(args + ["2", pathfmt.realpath]) - else: - args.append(pathfmt.realpath) - self._exec(args) - if self._finalize: - self._finalize(pathfmt, tempdir) - except OSError as exc: - print() - self.log.error("Unable to invoke FFmpeg (%s: %s)", - exc.__class__.__name__, exc) - pathfmt.realpath = pathfmt.temppath - except Exception as exc: - print() - self.log.error("%s: %s", exc.__class__.__name__, exc) - self.log.debug("", exc_info=True) - pathfmt.realpath = pathfmt.temppath - else: - if self.mtime: - mtime = pathfmt.kwdict.get("_mtime") - if mtime: - util.set_mtime(pathfmt.realpath, mtime) + if self.convert(pathfmt, tempdir): if self.delete: pathfmt.delete = True else: + self.log.info(pathfmt.filename) pathfmt.set_extension("zip") pathfmt.build_path() + def convert_files(self, pathfmt): + if not self._convert_files: + return + self._convert_files = False + + with tempfile.TemporaryDirectory() as tempdir: + for frame in self._files: + + # update frame filename extension + frame["file"] = name = "{}.{}".format( + frame["file"].partition(".")[0], frame["ext"]) + + # move frame into tempdir + try: + self._copy_file(frame["path"], tempdir + "/" + name) + except OSError as exc: + self.log.debug("Unable to copy frame %s (%s: %s)", + name, exc.__class__.__name__, exc) + return + + pathfmt.kwdict["num"] = 0 + self._frames = self._files + if self.convert(pathfmt, tempdir): + self.log.info(pathfmt.filename) + if self.delete: + self.log.debug("Deleting frames") + for frame in self._files: + util.remove_file(frame["path"]) + + def convert(self, pathfmt, tempdir): + pathfmt.set_extension(self.extension) + pathfmt.build_path() + if self.skip and pathfmt.exists(): + return True + + # process frames and collect command-line arguments + args = self._process(pathfmt, tempdir) + if self.args_pp: + args += self.args_pp + if self.args: + args += self.args + + # ensure target directory exists + os.makedirs(pathfmt.realdirectory, exist_ok=True) + + # invoke ffmpeg + try: + if self.twopass: + if "-f" not in self.args: + args += ("-f", self.extension) + args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass") + self._exec(args + ["1", "-y", os.devnull]) + self._exec(args + ["2", pathfmt.realpath]) + else: + args.append(pathfmt.realpath) + self._exec(args) + if self._finalize: + self._finalize(pathfmt, tempdir) + except OSError as exc: + print() + self.log.error("Unable to invoke FFmpeg (%s: %s)", + exc.__class__.__name__, exc) + pathfmt.realpath = pathfmt.temppath + except Exception as exc: + print() + self.log.error("%s: %s", exc.__class__.__name__, exc) + self.log.debug("", exc_info=True) + pathfmt.realpath = pathfmt.temppath + else: + if self.mtime: + mtime = pathfmt.kwdict.get("_mtime") + if mtime: + util.set_mtime(pathfmt.realpath, mtime) + return True + def _exec(self, args): self.log.debug(args) out = None if self.output else subprocess.DEVNULL @@ -182,6 +230,9 @@ class UgoiraPP(PostProcessor): raise ValueError() return retcode + def _copy_file(self, src, dst): + shutil.copyfile(src, dst) + def _process_concat(self, pathfmt, tempdir): rate_in, rate_out = self.calculate_framerate(self._frames) args = [self.ffmpeg, "-f", "concat"] |
