Update upstream source from tag 'upstream/1.26.3'

Update to upstream version '1.26.3' with Debian dir f685c0316a7fc7f226907bb4cb034eab193d2945
author: Unit 193 <unit193@unit193.net> 2023-11-27 17:57:07 -0500
committer: Unit 193 <unit193@unit193.net> 2023-11-27 17:57:07 -0500
commit: 34eea9832b1b50aa9214481363cb3ca5c81bb2d8 (patch)
tree: dbf7dc5cc36fbad64ac83e38d0ec0fec90f9f109 /gallery_dl/__init__.py
parent: ecd8aa4d85bd87ed65abe0f22ba058bda95d699c (diff)
parent: 2a817af4fe41289fa705bdc5ee61372333f43996 (diff)
1 files changed, 235 insertions, 42 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index d3a0f58..287faf1 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -18,19 +18,6 @@ __email__ = "mike_faehrmann@web.de"
 __version__ = version.__version__
 
 
-def progress(urls, pformat):
-    """Wrapper around urls to output a simple progress indicator"""
-    if pformat is True:
-        pformat = "[{current}/{total}] {url}\n"
-    else:
-        pformat += "\n"
-
-    pinfo = {"total": len(urls)}
-    for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
-        output.stderr_write(pformat.format_map(pinfo))
-        yield pinfo["url"]
-
-
 def main():
     try:
         parser = option.build_parser()
@@ -128,6 +115,7 @@ def main():
         output.configure_logging(args.loglevel)
         if args.loglevel >= logging.ERROR:
             config.set(("output",), "mode", "null")
+            config.set(("downloader",), "progress", None)
         elif args.loglevel <= logging.DEBUG:
             import platform
             import requests
@@ -224,7 +212,7 @@ def main():
             return config.initialize()
 
         else:
-            if not args.urls and not args.inputfiles:
+            if not args.urls and not args.input_files:
                 parser.error(
                     "The following arguments are required: URL\n"
                     "Use 'gallery-dl --help' to get a list of all options.")
@@ -238,22 +226,6 @@ def main():
             else:
                 jobtype = args.jobtype or job.DownloadJob
 
-            urls = args.urls
-            if args.inputfiles:
-                for inputfile in args.inputfiles:
-                    try:
-                        if inputfile == "-":
-                            if sys.stdin:
-                                urls += util.parse_inputfile(sys.stdin, log)
-                            else:
-                                log.warning(
-                                    "input file: stdin is not readable")
-                        else:
-                            with open(inputfile, encoding="utf-8") as file:
-                                urls += util.parse_inputfile(file, log)
-                    except OSError as exc:
-                        log.warning("input file: %s", exc)
-
             # unsupported file logging handler
             handler = output.setup_logging_handler(
                 "unsupportedfile", fmt="{message}")
@@ -263,25 +235,44 @@ def main():
                 ulog.propagate = False
                 job.Job.ulog = ulog
 
+            # collect input URLs
+            input_manager = InputManager()
+            input_manager.log = input_log = logging.getLogger("inputfile")
+            input_manager.add_list(args.urls)
+
+            if args.input_files:
+                for input_file, action in args.input_files:
+                    try:
+                        path = util.expand_path(input_file)
+                        input_manager.add_file(path, action)
+                    except Exception as exc:
+                        input_log.error(exc)
+                        return getattr(exc, "code", 128)
+
             pformat = config.get(("output",), "progress", True)
-            if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
-                urls = progress(urls, pformat)
-            else:
-                urls = iter(urls)
+            if pformat and len(input_manager.urls) > 1 and \
+                    args.loglevel < logging.ERROR:
+                input_manager.progress(pformat)
 
+            # process input URLs
             retval = 0
-            url = next(urls, None)
-
-            while url is not None:
+            for url in input_manager:
                 try:
                     log.debug("Starting %s for '%s'", jobtype.__name__, url)
-                    if isinstance(url, util.ExtendedUrl):
+
+                    if isinstance(url, ExtendedUrl):
                         for opts in url.gconfig:
                             config.set(*opts)
                         with config.apply(url.lconfig):
-                            retval |= jobtype(url.value).run()
+                            status = jobtype(url.value).run()
+                    else:
+                        status = jobtype(url).run()
+
+                    if status:
+                        retval |= status
                     else:
-                        retval |= jobtype(url).run()
+                        input_manager.success()
+
                 except exception.TerminateExtraction:
                     pass
                 except exception.RestartExtraction:
@@ -291,8 +282,7 @@ def main():
                     log.error("Unsupported URL '%s'", url)
                     retval |= 64
 
-                url = next(urls, None)
-
+                input_manager.next()
             return retval
 
     except KeyboardInterrupt:
@@ -304,3 +294,206 @@ def main():
         if exc.errno != errno.EPIPE:
             raise
     return 1
+
+
+class InputManager():
+
+    def __init__(self):
+        self.urls = []
+        self.files = ()
+        self._index = 0
+        self._current = None
+        self._pformat = None
+
+    def add_url(self, url):
+        self.urls.append(url)
+
+    def add_list(self, urls):
+        self.urls += urls
+
+    def add_file(self, path, action=None):
+        """Process an input file.
+
+        Lines starting with '#' and empty lines will be ignored.
+        Lines starting with '-' will be interpreted as a key-value pair
+          separated by an '='. where
+          'key' is a dot-separated option name and
+          'value' is a JSON-parsable string.
+          These configuration options will be applied
+          while processing the next URL only.
+        Lines starting with '-G' are the same as above, except these options
+          will be applied for *all* following URLs, i.e. they are Global.
+        Everything else will be used as a potential URL.
+
+        Example input file:
+
+        # settings global options
+        -G base-directory = "/tmp/"
+        -G skip = false
+
+        # setting local options for the next URL
+        -filename="spaces_are_optional.jpg"
+        -skip    = true
+
+        https://example.org/
+
+        # next URL uses default filename and 'skip' is false.
+        https://example.com/index.htm # comment1
+        https://example.com/404.htm   # comment2
+        """
+        if path == "-" and not action:
+            try:
+                lines = sys.stdin.readlines()
+            except Exception:
+                raise exception.InputFileError("stdin is not readable")
+            path = None
+        else:
+            try:
+                with open(path, encoding="utf-8") as fp:
+                    lines = fp.readlines()
+            except Exception as exc:
+                raise exception.InputFileError(str(exc))
+
+            if self.files:
+                self.files[path] = lines
+            else:
+                self.files = {path: lines}
+
+            if action == "c":
+                action = self._action_comment
+            elif action == "d":
+                action = self._action_delete
+            else:
+                action = None
+
+        gconf = []
+        lconf = []
+        indicies = []
+        strip_comment = None
+        append = self.urls.append
+
+        for n, line in enumerate(lines):
+            line = line.strip()
+
+            if not line or line[0] == "#":
+                # empty line or comment
+                continue
+
+            elif line[0] == "-":
+                # config spec
+                if len(line) >= 2 and line[1] == "G":
+                    conf = gconf
+                    line = line[2:]
+                else:
+                    conf = lconf
+                    line = line[1:]
+                    if action:
+                        indicies.append(n)
+
+                key, sep, value = line.partition("=")
+                if not sep:
+                    raise exception.InputFileError(
+                        "Invalid KEY=VALUE pair '%s' on line %s in %s",
+                        line, n+1, path)
+
+                try:
+                    value = util.json_loads(value.strip())
+                except ValueError as exc:
+                    self.log.debug("%s: %s", exc.__class__.__name__, exc)
+                    raise exception.InputFileError(
+                        "Unable to parse '%s' on line %s in %s",
+                        value, n+1, path)
+
+                key = key.strip().split(".")
+                conf.append((key[:-1], key[-1], value))
+
+            else:
+                # url
+                if " #" in line or "\t#" in line:
+                    if strip_comment is None:
+                        import re
+                        strip_comment = re.compile(r"\s+#.*").sub
+                    line = strip_comment("", line)
+                if gconf or lconf:
+                    url = ExtendedUrl(line, gconf, lconf)
+                    gconf = []
+                    lconf = []
+                else:
+                    url = line
+
+                if action:
+                    indicies.append(n)
+                    append((url, path, action, indicies))
+                    indicies = []
+                else:
+                    append(url)
+
+    def progress(self, pformat=True):
+        if pformat is True:
+            pformat = "[{current}/{total}] {url}\n"
+        else:
+            pformat += "\n"
+        self._pformat = pformat.format_map
+
+    def next(self):
+        self._index += 1
+
+    def success(self):
+        if self._current:
+            url, path, action, indicies = self._current
+            lines = self.files[path]
+            action(lines, indicies)
+            try:
+                with open(path, "w", encoding="utf-8") as fp:
+                    fp.writelines(lines)
+            except Exception as exc:
+                self.log.warning(
+                    "Unable to update '%s' (%s: %s)",
+                    path, exc.__class__.__name__, exc)
+
+    @staticmethod
+    def _action_comment(lines, indicies):
+        for i in indicies:
+            lines[i] = "# " + lines[i]
+
+    @staticmethod
+    def _action_delete(lines, indicies):
+        for i in indicies:
+            lines[i] = ""
+
+    def __iter__(self):
+        self._index = 0
+        return self
+
+    def __next__(self):
+        try:
+            item = self.urls[self._index]
+        except IndexError:
+            raise StopIteration
+
+        if isinstance(item, tuple):
+            self._current = item
+            item = item[0]
+        else:
+            self._current = None
+
+        if self._pformat:
+            output.stderr_write(self._pformat({
+                "total"  : len(self.urls),
+                "current": self._index + 1,
+                "url"    : item,
+            }))
+        return item
+
+
+class ExtendedUrl():
+    """URL with attached config key-value pairs"""
+    __slots__ = ("value", "gconfig", "lconfig")
+
+    def __init__(self, url, gconf, lconf):
+        self.value = url
+        self.gconfig = gconf
+        self.lconfig = lconf
+
+    def __str__(self):
+        return self.value
author	Unit 193 <unit193@unit193.net>	2023-11-27 17:57:07 -0500
committer	Unit 193 <unit193@unit193.net>	2023-11-27 17:57:07 -0500
commit	34eea9832b1b50aa9214481363cb3ca5c81bb2d8 (patch)
tree	dbf7dc5cc36fbad64ac83e38d0ec0fec90f9f109 /gallery_dl/__init__.py
parent	ecd8aa4d85bd87ed65abe0f22ba058bda95d699c (diff)
parent	2a817af4fe41289fa705bdc5ee61372333f43996 (diff)