diff options
| author | 2023-11-27 17:57:07 -0500 | |
|---|---|---|
| committer | 2023-11-27 17:57:07 -0500 | |
| commit | 34eea9832b1b50aa9214481363cb3ca5c81bb2d8 (patch) | |
| tree | dbf7dc5cc36fbad64ac83e38d0ec0fec90f9f109 /gallery_dl/__init__.py | |
| parent | ecd8aa4d85bd87ed65abe0f22ba058bda95d699c (diff) | |
| parent | 2a817af4fe41289fa705bdc5ee61372333f43996 (diff) | |
Update upstream source from tag 'upstream/1.26.3'
Update to upstream version '1.26.3'
with Debian dir f685c0316a7fc7f226907bb4cb034eab193d2945
Diffstat (limited to 'gallery_dl/__init__.py')
| -rw-r--r-- | gallery_dl/__init__.py | 277 |
1 files changed, 235 insertions, 42 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index d3a0f58..287faf1 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -18,19 +18,6 @@ __email__ = "mike_faehrmann@web.de" __version__ = version.__version__ -def progress(urls, pformat): - """Wrapper around urls to output a simple progress indicator""" - if pformat is True: - pformat = "[{current}/{total}] {url}\n" - else: - pformat += "\n" - - pinfo = {"total": len(urls)} - for pinfo["current"], pinfo["url"] in enumerate(urls, 1): - output.stderr_write(pformat.format_map(pinfo)) - yield pinfo["url"] - - def main(): try: parser = option.build_parser() @@ -128,6 +115,7 @@ def main(): output.configure_logging(args.loglevel) if args.loglevel >= logging.ERROR: config.set(("output",), "mode", "null") + config.set(("downloader",), "progress", None) elif args.loglevel <= logging.DEBUG: import platform import requests @@ -224,7 +212,7 @@ def main(): return config.initialize() else: - if not args.urls and not args.inputfiles: + if not args.urls and not args.input_files: parser.error( "The following arguments are required: URL\n" "Use 'gallery-dl --help' to get a list of all options.") @@ -238,22 +226,6 @@ def main(): else: jobtype = args.jobtype or job.DownloadJob - urls = args.urls - if args.inputfiles: - for inputfile in args.inputfiles: - try: - if inputfile == "-": - if sys.stdin: - urls += util.parse_inputfile(sys.stdin, log) - else: - log.warning( - "input file: stdin is not readable") - else: - with open(inputfile, encoding="utf-8") as file: - urls += util.parse_inputfile(file, log) - except OSError as exc: - log.warning("input file: %s", exc) - # unsupported file logging handler handler = output.setup_logging_handler( "unsupportedfile", fmt="{message}") @@ -263,25 +235,44 @@ def main(): ulog.propagate = False job.Job.ulog = ulog + # collect input URLs + input_manager = InputManager() + input_manager.log = input_log = logging.getLogger("inputfile") + input_manager.add_list(args.urls) + + if args.input_files: + for input_file, action in args.input_files: + try: + path = util.expand_path(input_file) + input_manager.add_file(path, action) + except Exception as exc: + input_log.error(exc) + return getattr(exc, "code", 128) + pformat = config.get(("output",), "progress", True) - if pformat and len(urls) > 1 and args.loglevel < logging.ERROR: - urls = progress(urls, pformat) - else: - urls = iter(urls) + if pformat and len(input_manager.urls) > 1 and \ + args.loglevel < logging.ERROR: + input_manager.progress(pformat) + # process input URLs retval = 0 - url = next(urls, None) - - while url is not None: + for url in input_manager: try: log.debug("Starting %s for '%s'", jobtype.__name__, url) - if isinstance(url, util.ExtendedUrl): + + if isinstance(url, ExtendedUrl): for opts in url.gconfig: config.set(*opts) with config.apply(url.lconfig): - retval |= jobtype(url.value).run() + status = jobtype(url.value).run() + else: + status = jobtype(url).run() + + if status: + retval |= status else: - retval |= jobtype(url).run() + input_manager.success() + except exception.TerminateExtraction: pass except exception.RestartExtraction: @@ -291,8 +282,7 @@ def main(): log.error("Unsupported URL '%s'", url) retval |= 64 - url = next(urls, None) - + input_manager.next() return retval except KeyboardInterrupt: @@ -304,3 +294,206 @@ def main(): if exc.errno != errno.EPIPE: raise return 1 + + +class InputManager(): + + def __init__(self): + self.urls = [] + self.files = () + self._index = 0 + self._current = None + self._pformat = None + + def add_url(self, url): + self.urls.append(url) + + def add_list(self, urls): + self.urls += urls + + def add_file(self, path, action=None): + """Process an input file. + + Lines starting with '#' and empty lines will be ignored. + Lines starting with '-' will be interpreted as a key-value pair + separated by an '='. where + 'key' is a dot-separated option name and + 'value' is a JSON-parsable string. + These configuration options will be applied + while processing the next URL only. + Lines starting with '-G' are the same as above, except these options + will be applied for *all* following URLs, i.e. they are Global. + Everything else will be used as a potential URL. + + Example input file: + + # settings global options + -G base-directory = "/tmp/" + -G skip = false + + # setting local options for the next URL + -filename="spaces_are_optional.jpg" + -skip = true + + https://example.org/ + + # next URL uses default filename and 'skip' is false. + https://example.com/index.htm # comment1 + https://example.com/404.htm # comment2 + """ + if path == "-" and not action: + try: + lines = sys.stdin.readlines() + except Exception: + raise exception.InputFileError("stdin is not readable") + path = None + else: + try: + with open(path, encoding="utf-8") as fp: + lines = fp.readlines() + except Exception as exc: + raise exception.InputFileError(str(exc)) + + if self.files: + self.files[path] = lines + else: + self.files = {path: lines} + + if action == "c": + action = self._action_comment + elif action == "d": + action = self._action_delete + else: + action = None + + gconf = [] + lconf = [] + indicies = [] + strip_comment = None + append = self.urls.append + + for n, line in enumerate(lines): + line = line.strip() + + if not line or line[0] == "#": + # empty line or comment + continue + + elif line[0] == "-": + # config spec + if len(line) >= 2 and line[1] == "G": + conf = gconf + line = line[2:] + else: + conf = lconf + line = line[1:] + if action: + indicies.append(n) + + key, sep, value = line.partition("=") + if not sep: + raise exception.InputFileError( + "Invalid KEY=VALUE pair '%s' on line %s in %s", + line, n+1, path) + + try: + value = util.json_loads(value.strip()) + except ValueError as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) + raise exception.InputFileError( + "Unable to parse '%s' on line %s in %s", + value, n+1, path) + + key = key.strip().split(".") + conf.append((key[:-1], key[-1], value)) + + else: + # url + if " #" in line or "\t#" in line: + if strip_comment is None: + import re + strip_comment = re.compile(r"\s+#.*").sub + line = strip_comment("", line) + if gconf or lconf: + url = ExtendedUrl(line, gconf, lconf) + gconf = [] + lconf = [] + else: + url = line + + if action: + indicies.append(n) + append((url, path, action, indicies)) + indicies = [] + else: + append(url) + + def progress(self, pformat=True): + if pformat is True: + pformat = "[{current}/{total}] {url}\n" + else: + pformat += "\n" + self._pformat = pformat.format_map + + def next(self): + self._index += 1 + + def success(self): + if self._current: + url, path, action, indicies = self._current + lines = self.files[path] + action(lines, indicies) + try: + with open(path, "w", encoding="utf-8") as fp: + fp.writelines(lines) + except Exception as exc: + self.log.warning( + "Unable to update '%s' (%s: %s)", + path, exc.__class__.__name__, exc) + + @staticmethod + def _action_comment(lines, indicies): + for i in indicies: + lines[i] = "# " + lines[i] + + @staticmethod + def _action_delete(lines, indicies): + for i in indicies: + lines[i] = "" + + def __iter__(self): + self._index = 0 + return self + + def __next__(self): + try: + item = self.urls[self._index] + except IndexError: + raise StopIteration + + if isinstance(item, tuple): + self._current = item + item = item[0] + else: + self._current = None + + if self._pformat: + output.stderr_write(self._pformat({ + "total" : len(self.urls), + "current": self._index + 1, + "url" : item, + })) + return item + + +class ExtendedUrl(): + """URL with attached config key-value pairs""" + __slots__ = ("value", "gconfig", "lconfig") + + def __init__(self, url, gconf, lconf): + self.value = url + self.gconfig = gconf + self.lconfig = lconf + + def __str__(self): + return self.value |
