aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/postprocessor
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/postprocessor')
-rw-r--r--gallery_dl/postprocessor/__init__.py2
-rw-r--r--gallery_dl/postprocessor/hash.py71
-rw-r--r--gallery_dl/postprocessor/metadata.py34
-rw-r--r--gallery_dl/postprocessor/rename.py91
-rw-r--r--gallery_dl/postprocessor/ugoira.py169
5 files changed, 304 insertions, 63 deletions
diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py
index 4690554..7837b06 100644
--- a/gallery_dl/postprocessor/__init__.py
+++ b/gallery_dl/postprocessor/__init__.py
@@ -12,9 +12,11 @@ modules = [
"classify",
"compare",
"exec",
+ "hash",
"metadata",
"mtime",
"python",
+ "rename",
"ugoira",
"zip",
]
diff --git a/gallery_dl/postprocessor/hash.py b/gallery_dl/postprocessor/hash.py
new file mode 100644
index 0000000..92a7477
--- /dev/null
+++ b/gallery_dl/postprocessor/hash.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Compute file hash digests"""
+
+from .common import PostProcessor
+import hashlib
+
+
+class HashPP(PostProcessor):
+
+ def __init__(self, job, options):
+ PostProcessor.__init__(self, job)
+
+ self.chunk_size = options.get("chunk-size", 32768)
+ self.filename = options.get("filename")
+
+ hashes = options.get("hashes")
+ if isinstance(hashes, dict):
+ self.hashes = list(hashes.items())
+ elif isinstance(hashes, str):
+ self.hashes = []
+ for h in hashes.split(","):
+ name, sep, key = h.partition(":")
+ self.hashes.append((key if sep else name, name))
+ elif hashes:
+ self.hashes = hashes
+ else:
+ self.hashes = (("md5", "md5"), ("sha1", "sha1"))
+
+ events = options.get("event")
+ if events is None:
+ events = ("file",)
+ elif isinstance(events, str):
+ events = events.split(",")
+ job.register_hooks({event: self.run for event in events}, options)
+
+ def run(self, pathfmt):
+ hashes = [
+ (key, hashlib.new(name))
+ for key, name in self.hashes
+ ]
+
+ size = self.chunk_size
+ with self._open(pathfmt) as fp:
+ while True:
+ data = fp.read(size)
+ if not data:
+ break
+ for _, h in hashes:
+ h.update(data)
+
+ for key, h in hashes:
+ pathfmt.kwdict[key] = h.hexdigest()
+
+ if self.filename:
+ pathfmt.build_path()
+
+ def _open(self, pathfmt):
+ try:
+ return open(pathfmt.temppath, "rb")
+ except OSError:
+ return open(pathfmt.realpath, "rb")
+
+
+__postprocessor__ = HashPP
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index e89b170..3ef9fbc 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -103,10 +103,10 @@ class MetadataPP(PostProcessor):
job.register_hooks({event: self.run for event in events}, options)
self._init_archive(job, options, "_MD_")
+ self.filter = self._make_filter(options)
self.mtime = options.get("mtime")
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
- self.private = options.get("private", False)
self.skip = options.get("skip", False)
def run(self, pathfmt):
@@ -114,7 +114,10 @@ class MetadataPP(PostProcessor):
if archive and archive.check(pathfmt.kwdict):
return
- directory = self._directory(pathfmt)
+ if util.WINDOWS and pathfmt.extended:
+ directory = pathfmt._extended_path(self._directory(pathfmt))
+ else:
+ directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
if self.skip and os.path.exists(path):
@@ -231,10 +234,33 @@ class MetadataPP(PostProcessor):
fp.write("\n".join(tags) + "\n")
def _write_json(self, fp, kwdict):
- if not self.private:
- kwdict = util.filter_dict(kwdict)
+ if self.filter:
+ kwdict = self.filter(kwdict)
fp.write(self._json_encode(kwdict) + "\n")
+ def _make_filter(self, options):
+ include = options.get("include")
+ if include:
+ if isinstance(include, str):
+ include = include.split(",")
+ return lambda d: {k: d[k] for k in include if k in d}
+
+ exclude = options.get("exclude")
+ private = options.get("private")
+ if exclude:
+ if isinstance(exclude, str):
+ exclude = exclude.split(",")
+ exclude = set(exclude)
+
+ if private:
+ return lambda d: {k: v for k, v in d.items()
+ if k not in exclude}
+ return lambda d: {k: v for k, v in util.filter_dict(d).items()
+ if k not in exclude}
+
+ if not private:
+ return util.filter_dict
+
@staticmethod
def _make_encoder(options, indent=None):
return json.JSONEncoder(
diff --git a/gallery_dl/postprocessor/rename.py b/gallery_dl/postprocessor/rename.py
new file mode 100644
index 0000000..f71738d
--- /dev/null
+++ b/gallery_dl/postprocessor/rename.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Rename files"""
+
+from .common import PostProcessor
+from .. import formatter
+import os
+
+
+class RenamePP(PostProcessor):
+
+ def __init__(self, job, options):
+ PostProcessor.__init__(self, job)
+
+ self.skip = options.get("skip", True)
+ old = options.get("from")
+ new = options.get("to")
+
+ if old:
+ self._old = self._apply_format(old)
+ self._new = (self._apply_format(new) if new else
+ self._apply_pathfmt)
+ job.register_hooks({
+ "prepare": self.rename_from,
+ }, options)
+
+ elif new:
+ self._old = self._apply_pathfmt
+ self._new = self._apply_format(new)
+ job.register_hooks({
+ "skip" : self.rename_to_skip,
+ "prepare-after": self.rename_to_pafter,
+ }, options)
+
+ else:
+ raise ValueError("Option 'from' or 'to' is required")
+
+ def rename_from(self, pathfmt):
+ name_old = self._old(pathfmt)
+ path_old = pathfmt.realdirectory + name_old
+
+ if os.path.exists(path_old):
+ name_new = self._new(pathfmt)
+ path_new = pathfmt.realdirectory + name_new
+ self._rename(path_old, name_old, path_new, name_new)
+
+ def rename_to_skip(self, pathfmt):
+ name_old = self._old(pathfmt)
+ path_old = pathfmt.realdirectory + name_old
+
+ if os.path.exists(path_old):
+ pathfmt.filename = name_new = self._new(pathfmt)
+ pathfmt.path = pathfmt.directory + name_new
+ pathfmt.realpath = path_new = pathfmt.realdirectory + name_new
+ self._rename(path_old, name_old, path_new, name_new)
+
+ def rename_to_pafter(self, pathfmt):
+ pathfmt.filename = name_new = self._new(pathfmt)
+ pathfmt.path = pathfmt.directory + name_new
+ pathfmt.realpath = pathfmt.realdirectory + name_new
+ pathfmt.kwdict["_file_recheck"] = True
+
+ def _rename(self, path_old, name_old, path_new, name_new):
+ if self.skip and os.path.exists(path_new):
+ return self.log.warning(
+ "Not renaming '%s' to '%s' since another file with the "
+ "same name exists", name_old, name_new)
+
+ self.log.info("'%s' -> '%s'", name_old, name_new)
+ os.replace(path_old, path_new)
+
+ def _apply_pathfmt(self, pathfmt):
+ return pathfmt.build_filename(pathfmt.kwdict)
+
+ def _apply_format(self, format_string):
+ fmt = formatter.parse(format_string).format_map
+
+ def apply(pathfmt):
+ return pathfmt.clean_path(pathfmt.clean_segment(fmt(
+ pathfmt.kwdict)))
+
+ return apply
+
+
+__postprocessor__ = RenamePP
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 9e60ce2..f053afa 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -36,7 +36,8 @@ class UgoiraPP(PostProcessor):
self.delete = not options.get("keep-files", False)
self.repeat = options.get("repeat-last-frame", True)
self.mtime = options.get("mtime", True)
- self.uniform = False
+ self.skip = options.get("skip", True)
+ self.uniform = self._convert_zip = self._convert_files = False
ffmpeg = options.get("ffmpeg-location")
self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg"
@@ -90,33 +91,44 @@ class UgoiraPP(PostProcessor):
if self.prevent_odd:
args += ("-vf", "crop=iw-mod(iw\\,2):ih-mod(ih\\,2)")
- job.register_hooks(
- {"prepare": self.prepare, "file": self.convert}, options)
+ job.register_hooks({
+ "prepare": self.prepare,
+ "file" : self.convert_zip,
+ "after" : self.convert_files,
+ }, options)
def prepare(self, pathfmt):
- self._frames = None
-
- if pathfmt.extension != "zip":
+ if "frames" not in pathfmt.kwdict:
+ self._frames = None
return
- kwdict = pathfmt.kwdict
- if "frames" in kwdict:
- self._frames = kwdict["frames"]
- elif "pixiv_ugoira_frame_data" in kwdict:
- self._frames = kwdict["pixiv_ugoira_frame_data"]["data"]
+ self._frames = pathfmt.kwdict["frames"]
+ if pathfmt.extension == "zip":
+ self._convert_zip = True
+ if self.delete:
+ pathfmt.set_extension(self.extension)
+ pathfmt.build_path()
else:
- return
-
- if self.delete:
- pathfmt.set_extension(self.extension)
pathfmt.build_path()
+ index = pathfmt.kwdict["_ugoira_frame_index"]
+ frame = self._frames[index].copy()
+ frame["index"] = index
+ frame["path"] = pathfmt.realpath
+ frame["ext"] = pathfmt.kwdict["extension"]
+
+ if not index:
+ self._files = [frame]
+ else:
+ self._files.append(frame)
+ if len(self._files) >= len(self._frames):
+ self._convert_files = True
- def convert(self, pathfmt):
- if not self._frames:
+ def convert_zip(self, pathfmt):
+ if not self._convert_zip:
return
+ self._convert_zip = False
with tempfile.TemporaryDirectory() as tempdir:
- # extract frames
try:
with zipfile.ZipFile(pathfmt.temppath) as zfile:
zfile.extractall(tempdir)
@@ -124,53 +136,89 @@ class UgoiraPP(PostProcessor):
pathfmt.realpath = pathfmt.temppath
return
- # process frames and collect command-line arguments
- pathfmt.set_extension(self.extension)
- pathfmt.build_path()
-
- args = self._process(pathfmt, tempdir)
- if self.args_pp:
- args += self.args_pp
- if self.args:
- args += self.args
-
- # ensure target directory exists
- os.makedirs(pathfmt.realdirectory, exist_ok=True)
-
- # invoke ffmpeg
- try:
- if self.twopass:
- if "-f" not in self.args:
- args += ("-f", self.extension)
- args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
- self._exec(args + ["1", "-y", os.devnull])
- self._exec(args + ["2", pathfmt.realpath])
- else:
- args.append(pathfmt.realpath)
- self._exec(args)
- if self._finalize:
- self._finalize(pathfmt, tempdir)
- except OSError as exc:
- print()
- self.log.error("Unable to invoke FFmpeg (%s: %s)",
- exc.__class__.__name__, exc)
- pathfmt.realpath = pathfmt.temppath
- except Exception as exc:
- print()
- self.log.error("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("", exc_info=True)
- pathfmt.realpath = pathfmt.temppath
- else:
- if self.mtime:
- mtime = pathfmt.kwdict.get("_mtime")
- if mtime:
- util.set_mtime(pathfmt.realpath, mtime)
+ if self.convert(pathfmt, tempdir):
if self.delete:
pathfmt.delete = True
else:
+ self.log.info(pathfmt.filename)
pathfmt.set_extension("zip")
pathfmt.build_path()
+ def convert_files(self, pathfmt):
+ if not self._convert_files:
+ return
+ self._convert_files = False
+
+ with tempfile.TemporaryDirectory() as tempdir:
+ for frame in self._files:
+
+ # update frame filename extension
+ frame["file"] = name = "{}.{}".format(
+ frame["file"].partition(".")[0], frame["ext"])
+
+ # move frame into tempdir
+ try:
+ self._copy_file(frame["path"], tempdir + "/" + name)
+ except OSError as exc:
+ self.log.debug("Unable to copy frame %s (%s: %s)",
+ name, exc.__class__.__name__, exc)
+ return
+
+ pathfmt.kwdict["num"] = 0
+ self._frames = self._files
+ if self.convert(pathfmt, tempdir):
+ self.log.info(pathfmt.filename)
+ if self.delete:
+ self.log.debug("Deleting frames")
+ for frame in self._files:
+ util.remove_file(frame["path"])
+
+ def convert(self, pathfmt, tempdir):
+ pathfmt.set_extension(self.extension)
+ pathfmt.build_path()
+ if self.skip and pathfmt.exists():
+ return True
+
+ # process frames and collect command-line arguments
+ args = self._process(pathfmt, tempdir)
+ if self.args_pp:
+ args += self.args_pp
+ if self.args:
+ args += self.args
+
+ # ensure target directory exists
+ os.makedirs(pathfmt.realdirectory, exist_ok=True)
+
+ # invoke ffmpeg
+ try:
+ if self.twopass:
+ if "-f" not in self.args:
+ args += ("-f", self.extension)
+ args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
+ self._exec(args + ["1", "-y", os.devnull])
+ self._exec(args + ["2", pathfmt.realpath])
+ else:
+ args.append(pathfmt.realpath)
+ self._exec(args)
+ if self._finalize:
+ self._finalize(pathfmt, tempdir)
+ except OSError as exc:
+ print()
+ self.log.error("Unable to invoke FFmpeg (%s: %s)",
+ exc.__class__.__name__, exc)
+ pathfmt.realpath = pathfmt.temppath
+ except Exception as exc:
+ print()
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ self.log.debug("", exc_info=True)
+ pathfmt.realpath = pathfmt.temppath
+ else:
+ if self.mtime:
+ mtime = pathfmt.kwdict.get("_mtime")
+ if mtime:
+ util.set_mtime(pathfmt.realpath, mtime)
+ return True
+
def _exec(self, args):
self.log.debug(args)
out = None if self.output else subprocess.DEVNULL
@@ -182,6 +230,9 @@ class UgoiraPP(PostProcessor):
raise ValueError()
return retcode
+ def _copy_file(self, src, dst):
+ shutil.copyfile(src, dst)
+
def _process_concat(self, pathfmt, tempdir):
rate_in, rate_out = self.calculate_framerate(self._frames)
args = [self.ffmpeg, "-f", "concat"]