New upstream version 1.29.4.upstream/1.29.4

author: Unit 193 <unit193@unit193.net> 2025-04-15 05:25:37 -0400
committer: Unit 193 <unit193@unit193.net> 2025-04-15 05:25:37 -0400
commit: b830dc03b3b7c9dd119648e1be9c1145d56e096c (patch)
tree: e9d03b6b4ab93990243c0038c20ada2464fa4072 /gallery_dl
parent: 662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (diff)
24 files changed, 213 insertions, 141 deletions
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 9d653b3..7a20dc2 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -200,6 +200,7 @@ class YoutubeDLDownloader(DownloaderBase):
             return None
 
         info_dict = {
+            "extractor": "",
             "id"       : video_id,
             "title"    : video_id,
             "formats"  : fmts,
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index c9ccb7d..600d231 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -18,19 +18,23 @@ class CheveretoExtractor(BaseExtractor):
     directory_fmt = ("{category}", "{user}", "{album}",)
     archive_fmt = "{id}"
 
-    def __init__(self, match):
-        BaseExtractor.__init__(self, match)
-        self.path = match.group(match.lastindex)
+    def _init(self):
+        self.path = self.groups[-1]
 
     def _pagination(self, url):
-        while url:
+        while True:
             page = self.request(url).text
 
             for item in text.extract_iter(
                     page, '<div class="list-item-image ', 'image-container'):
-                yield text.extr(item, '<a href="', '"')
+                yield text.urljoin(self.root, text.extr(
+                    item, '<a href="', '"'))
 
-            url = text.extr(page, '<a data-pagination="next" href="', '" ><')
+            url = text.extr(page, 'data-pagination="next" href="', '"')
+            if not url:
+                return
+            if url[0] == "/":
+                url = self.root + url
 
 
 BASE_PATTERN = CheveretoExtractor.update({
@@ -42,6 +46,10 @@ BASE_PATTERN = CheveretoExtractor.update({
         "root": "https://img.kiwi",
         "pattern": r"img\.kiwi",
     },
+    "imagepond": {
+        "root": "https://imagepond.net",
+        "pattern": r"imagepond\.net",
+    },
 })
 
 
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 741800c..06c31b9 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -282,10 +282,11 @@ class DanbooruPoolExtractor(DanbooruExtractor):
     example = "https://danbooru.donmai.us/pools/12345"
 
     def metadata(self):
-        return self._collection_metadata(self.groups[-1], "pool")
+        self.pool_id = self.groups[-1]
+        return self._collection_metadata(self.pool_id, "pool")
 
     def posts(self):
-        return self._collection_posts(self.groups[-1], "pool")
+        return self._collection_posts(self.pool_id, "pool")
 
 
 class DanbooruFavgroupExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 3a862c1..378c7ec 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -687,7 +687,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
             for folder in folders:
                 if match(folder["name"]):
                     return folder
-                elif folder["has_subfolders"]:
+                elif folder.get("has_subfolders"):
                     for subfolder in folder["subfolders"]:
                         if match(subfolder["name"]):
                             return subfolder
@@ -695,7 +695,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
             for folder in folders:
                 if folder["folderid"] == uuid:
                     return folder
-                elif folder["has_subfolders"]:
+                elif folder.get("has_subfolders"):
                     for subfolder in folder["subfolders"]:
                         if subfolder["folderid"] == uuid:
                             return subfolder
diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py
index 6a5fcc9..ac21fec 100644
--- a/gallery_dl/extractor/discord.py
+++ b/gallery_dl/extractor/discord.py
@@ -49,7 +49,10 @@ class DiscordExtractor(Extractor):
                     text_content.append(field.get("name", ""))
                     text_content.append(field.get("value", ""))
 
-                text_content.append(embed.get("footer", {}).get("text", ""))
+                try:
+                    text_content.append(embed["footer"]["text"])
+                except Exception:
+                    pass
 
         if message.get("poll"):
             text_content.append(message["poll"]["question"]["text"])
@@ -224,10 +227,12 @@ class DiscordExtractor(Extractor):
         return self.server_metadata
 
     def build_server_and_channels(self, server_id):
-        server = self.api.get_server(server_id)
-        self.parse_server(server)
+        self.parse_server(self.api.get_server(server_id))
 
-        for channel in self.api.get_server_channels(server_id):
+        for channel in sorted(
+            self.api.get_server_channels(server_id),
+            key=lambda ch: ch["type"] != 4
+        ):
             self.parse_channel(channel)
 
 
@@ -353,7 +358,8 @@ class DiscordAPI():
                 "limit": MESSAGES_BATCH,
                 "before": before
             })
-            before = messages[-1]["id"]
+            if messages:
+                before = messages[-1]["id"]
             return messages
 
         return self._pagination(_method, MESSAGES_BATCH)
diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py
index 94444ff..e41f6f6 100644
--- a/gallery_dl/extractor/everia.py
+++ b/gallery_dl/extractor/everia.py
@@ -52,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor):
     def items(self):
         url = self.root + self.groups[0]
         page = self.request(url).text
-        content = text.extr(page, 'itemprop="text">', "</div>")
+        content = text.extr(page, 'itemprop="text">', "<h3")
         urls = re.findall(r'img.*?src="([^"]+)', content)
 
         data = {
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 37c776e..eb07739 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -114,11 +114,12 @@ class GelbooruBase():
             md5 = post["md5"]
             path = "/images/{}/{}/{}.webm".format(md5[0:2], md5[2:4], md5)
             post["_fallback"] = GelbooruBase._video_fallback(path)
-            url = "https://img3.gelbooru.com" + path
+            url = "https://img4.gelbooru.com" + path
         return url
 
     @staticmethod
     def _video_fallback(path):
+        yield "https://img3.gelbooru.com" + path
         yield "https://img2.gelbooru.com" + path
         yield "https://img1.gelbooru.com" + path
 
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index 9ab1411..1317ce9 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -25,26 +25,30 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
     pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))"
     example = "https://hentai2read.com/TITLE/1/"
 
-    def __init__(self, match):
-        self.chapter = match.group(2)
-        ChapterExtractor.__init__(self, match)
-
     def metadata(self, page):
         title, pos = text.extract(page, "<title>", "</title>")
         manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
         chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
-        chapter, sep, minor = self.chapter.partition(".")
-        match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
+        chapter, sep, minor = self.groups[1].partition(".")
+
+        match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - "
                          r"([^:]+): (.+) . Page 1 ", title)
+        if match:
+            manga, type, author, _, title = match.groups()
+        else:
+            self.log.warning("Failed to extract 'manga', 'type', 'author', "
+                             "and 'title' metadata")
+            manga = type = author = title = ""
+
         return {
-            "manga": match.group(1),
+            "manga": manga,
             "manga_id": text.parse_int(manga_id),
             "chapter": text.parse_int(chapter),
             "chapter_minor": sep + minor,
             "chapter_id": text.parse_int(chapter_id),
-            "type": match.group(2),
-            "author": match.group(3),
-            "title": match.group(5),
+            "type": type,
+            "author": author,
+            "title": title,
             "lang": "en",
             "language": "English",
         }
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index aa26408..432a7ad 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -29,6 +29,7 @@ class InstagramExtractor(Extractor):
     root = "https://www.instagram.com"
     cookies_domain = ".instagram.com"
     cookies_names = ("sessionid",)
+    useragent = util.USERAGENT_CHROME
     request_interval = (6.0, 12.0)
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 65717b4..abbdfd5 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -29,9 +29,11 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
     example = "https://issuu.com/issuu/docs/TITLE/"
 
     def metadata(self, page):
-        pos = page.rindex('id="initial-data"')
-        data = util.json_loads(text.unescape(text.rextract(
-            page, '<script data-json="', '"', pos)[0]))
+
+        data = text.extr(
+            page, '{\\"documentTextVersion\\":', ']\\n"])</script>')
+        data = util.json_loads(text.unescape(
+            '{"":' + data.replace('\\"', '"')))
 
         doc = data["initialDocumentData"]["document"]
         doc["date"] = text.parse_datetime(
@@ -39,7 +41,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
 
         self._cnt = text.parse_int(doc["pageCount"])
         self._tpl = "https://{}/{}-{}/jpg/page_{{}}.jpg".format(
-            data["config"]["hosts"]["image"],
+            "image.isu.pub",  # data["config"]["hosts"]["image"],
             doc["revisionId"],
             doc["publicationId"],
         )
@@ -66,9 +68,8 @@ class IssuuUserExtractor(IssuuBase, Extractor):
             url = base + "/" + str(pnum) if pnum > 1 else base
             try:
                 html = self.request(url).text
-                data = util.json_loads(text.unescape(text.extr(
-                    html, '</main></div><script data-json="', '" id="')))
-                docs = data["docs"]
+                data = text.extr(html, '\\"docs\\":', '}]\\n"]')
+                docs = util.json_loads(data.replace('\\"', '"'))
             except Exception as exc:
                 self.log.debug("", exc_info=exc)
                 return
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 860e771..de7d040 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -123,6 +123,9 @@ class KemonopartyExtractor(Extractor):
                     g(post) for g in generators):
                 url = file["path"]
 
+                if "\\" in url:
+                    file["path"] = url = url.replace("\\", "/")
+
                 match = find_hash(url)
                 if match:
                     file["hash"] = hash = match.group(1)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8a4905d..e8050b3 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -15,7 +15,7 @@ from datetime import datetime, timedelta
 import itertools
 import hashlib
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
 USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
 
 
@@ -531,7 +531,7 @@ class PixivMeExtractor(PixivExtractor):
 class PixivWorkExtractor(PixivExtractor):
     """Extractor for a single pixiv work/illustration"""
     subcategory = "work"
-    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
+    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?ph?ixiv\.net"
                r"/(?:(?:en/)?artworks/"
                r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
                r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index c0374eb..2f2daca 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -85,7 +85,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
         replacements = re.findall(
             r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)", page)
 
-        for block in page.split("    pth = '")[1:]:
+        for block in page.split("\t\tpht = '")[1:]:
             pth = text.extr(block, "", "'")
 
             for needle, repl in re.findall(
@@ -129,7 +129,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
 
 
 def baeu(url, root="", root_blogspot="https://2.bp.blogspot.com"):
-    """https://readcomiconline.li/Scripts/rguard.min.js"""
+    """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.4"""
     if not root:
         root = root_blogspot
 
diff --git a/gallery_dl/extractor/rule34xyz.py b/gallery_dl/extractor/rule34xyz.py
index 3b8d344..411a71a 100644
--- a/gallery_dl/extractor/rule34xyz.py
+++ b/gallery_dl/extractor/rule34xyz.py
@@ -23,10 +23,18 @@ class Rule34xyzExtractor(BooruExtractor):
     per_page = 60
 
     TAG_TYPES = {
-        0: "general",
-        1: "copyright",
-        2: "character",
-        3: "artist",
+        None: "general",
+        0   : "general",
+        1   : "general",
+        2   : "copyright",
+        4   : "character",
+        8   : "artist",
+    }
+    FORMATS = {
+        "10" : "pic.jpg",
+        "100": "mov.mp4",
+        "101": "mov720.mp4",
+        "102": "mov480.mp4",
     }
 
     def _init(self):
@@ -36,49 +44,49 @@ class Rule34xyzExtractor(BooruExtractor):
                 formats = formats.split(",")
             self.formats = formats
         else:
-            self.formats = ("10", "40", "41", "2")
+            self.formats = ("100", "101", "102", "10")
 
     def _file_url(self, post):
-        post["files"] = files = {
-            str(link["type"]): link["url"]
-            for link in post.pop("imageLinks")
-        }
+        files = post["files"]
 
         for fmt in self.formats:
             if fmt in files:
+                extension = self.FORMATS.get(fmt)
                 break
         else:
-            fmt = "2"
             self.log.warning("%s: Requested format not available", post["id"])
+            fmt = next(iter(files))
 
-        post["file_url"] = url = files[fmt]
+        post_id = post["id"]
+        root = self.root_cdn if files[fmt][0] else self.root
+        post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
+            root, post_id // 1000, post_id, post_id, extension)
         post["format_id"] = fmt
-        post["format"] = url.rsplit(".", 2)[1]
+        post["format"] = extension.partition(".")[0]
+
         return url
 
     def _prepare(self, post):
-        post.pop("filesPreview", None)
-        post.pop("tagsWithType", None)
+        post.pop("files", None)
         post["date"] = text.parse_datetime(
-            post["created"][:19], "%Y-%m-%dT%H:%M:%S")
+            post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+        post["filename"], _, post["format"] = post["filename"].rpartition(".")
+        if "tags" in post:
+            post["tags"] = [t["value"] for t in post["tags"]]
 
     def _tags(self, post, _):
-        if post.get("tagsWithType") is None:
+        if "tags" not in post:
             post.update(self._fetch_post(post["id"]))
 
         tags = collections.defaultdict(list)
-        tagslist = []
-        for tag in post["tagsWithType"]:
-            value = tag["value"]
-            tagslist.append(value)
-            tags[tag["type"]].append(value)
+        for tag in post["tags"]:
+            tags[tag["type"]].append(tag["value"])
         types = self.TAG_TYPES
         for type, values in tags.items():
             post["tags_" + types[type]] = values
-        post["tags"] = tagslist
 
     def _fetch_post(self, post_id):
-        url = "{}/api/post/{}".format(self.root, post_id)
+        url = "{}/api/v2/post/{}".format(self.root, post_id)
         return self.request(url).json()
 
     def _pagination(self, endpoint, params=None):
@@ -86,22 +94,22 @@ class Rule34xyzExtractor(BooruExtractor):
 
         if params is None:
             params = {}
-        params["IncludeLinks"] = "true"
-        params["IncludeTags"] = "true"
-        params["OrderBy"] = "0"
         params["Skip"] = self.page_start * self.per_page
-        params["Take"] = self.per_page
-        params["DisableTotal"] = "true"
+        params["take"] = self.per_page
+        params["CountTotal"] = False
+        params["IncludeLinks"] = True
+        params["OrderBy"] = 0
         threshold = self.per_page
 
         while True:
-            data = self.request(url, params=params).json()
+            data = self.request(url, method="POST", json=params).json()
 
             yield from data["items"]
 
             if len(data["items"]) < threshold:
                 return
-            params["Skip"] += params["Take"]
+            params["Skip"] += self.per_page
+            params["cursor"] = data["cursor"]
 
 
 class Rule34xyzPostExtractor(Rule34xyzExtractor):
@@ -125,9 +133,8 @@ class Rule34xyzPlaylistExtractor(Rule34xyzExtractor):
         return {"playlist_id": self.groups[0]}
 
     def posts(self):
-        endpoint = "/playlist-item"
-        params = {"PlaylistId": self.groups[0]}
-        return self._pagination(endpoint, params)
+        endpoint = "/v2/post/search/playlist/" + self.groups[0]
+        return self._pagination(endpoint)
 
 
 class Rule34xyzTagExtractor(Rule34xyzExtractor):
@@ -138,10 +145,11 @@ class Rule34xyzTagExtractor(Rule34xyzExtractor):
     example = "https://rule34.xyz/TAG"
 
     def metadata(self):
-        self.tags = text.unquote(self.groups[0]).replace("_", " ")
-        return {"search_tags": self.tags}
+        self.tags = text.unquote(text.unquote(
+            self.groups[0]).replace("_", " ")).split("|")
+        return {"search_tags": ", ".join(self.tags)}
 
     def posts(self):
-        endpoint = "/post/search"
-        params = {"Tag": self.tags}
+        endpoint = "/v2/post/search/root"
+        params = {"includeTags": self.tags}
         return self._pagination(endpoint, params)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 8d1fcde..6f2114e 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -17,7 +17,7 @@ import re
 BASE_PATTERN = (
     r"(?:tumblr:(?:https?://)?([^/]+)|"
     r"(?:https?://)?"
-    r"(?:www\.tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
+    r"(?:(?:www\.)?tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
     r"([\w-]+\.tumblr\.com)))"
 )
 
@@ -357,7 +357,7 @@ class TumblrLikesExtractor(TumblrExtractor):
 class TumblrSearchExtractor(TumblrExtractor):
     """Extractor for a Tumblr search"""
     subcategory = "search"
-    pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
+    pattern = (r"(?:https?://)?(?:www\.)?tumblr\.com/search/([^/?#]+)"
                r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
     example = "https://www.tumblr.com/search/QUERY"
 
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 008ae6e..8ff32af 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -12,13 +12,15 @@
 from .common import GalleryExtractor, Extractor, Message
 from .. import exception, text, util
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
+LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
 
 
 class WebtoonsBase():
     category = "webtoons"
     root = "https://www.webtoons.com"
     cookies_domain = ".webtoons.com"
+    request_interval = (0.5, 1.5)
 
     def setup_agegate_cookies(self):
         self.cookies_update({
@@ -34,7 +36,7 @@ class WebtoonsBase():
         response = Extractor.request(self, url, **kwargs)
         if response.history and "/ageGate" in response.url:
             raise exception.StopExtraction(
-                "HTTP redirect to age gate check ('%s')", response.request.url)
+                "HTTP redirect to age gate check ('%s')", response.url)
         return response
 
 
@@ -44,47 +46,19 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
     directory_fmt = ("{category}", "{comic}")
     filename_fmt = "{episode_no}-{num:>02}.{extension}"
     archive_fmt = "{title_no}_{episode_no}_{num}"
-    pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/(?:[^/?#]+))"
-               r"/viewer(?:\?([^#'\"]+))")
+    pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
+               r"/viewer\?([^#'\"]+)")
     example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
                "?title_no=123&episode_no=12345")
-    test = (
-        (("https://www.webtoons.com/en/comedy/safely-endangered"
-          "/ep-572-earth/viewer?title_no=352&episode_no=572"), {
-            "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38",
-            "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7",
-                        "42055e44659f6ffc410b3fb6557346dfbb993df3",
-                        "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
-            "count": 5,
-        }),
-        (("https://www.webtoons.com/en/challenge/punderworld"
-          "/happy-earth-day-/viewer?title_no=312584&episode_no=40"), {
-            "exception": exception.NotFoundError,
-            "keyword": {
-                "comic": "punderworld",
-                "description": str,
-                "episode": "36",
-                "episode_no": "40",
-                "genre": "challenge",
-                "title": r"re:^Punderworld - .+",
-                "title_no": "312584",
-            },
-        }),
-    )
-
-    def __init__(self, match):
-        self.path, self.lang, self.genre, self.comic, self.query = \
-            match.groups()
-
-        url = "{}/{}/viewer?{}".format(self.root, self.path, self.query)
-        GalleryExtractor.__init__(self, match, url)
 
     def _init(self):
         self.setup_agegate_cookies()
 
-        params = text.parse_query(self.query)
+        path, self.lang, self.genre, self.comic, query = self.groups
+        params = text.parse_query(query)
         self.title_no = params.get("title_no")
         self.episode_no = params.get("episode_no")
+        self.gallery_url = "{}/{}/viewer?{}".format(self.root, path, query)
 
     def metadata(self, page):
         extr = text.extract_from(page)
@@ -124,32 +98,49 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
             "language"    : util.code_to_language(self.lang),
         }
 
-    @staticmethod
-    def images(page):
-        return [
-            (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None)
-            for url in text.extract_iter(
-                page, 'class="_images" data-url="', '"')
-        ]
+    def images(self, page):
+        quality = self.config("quality")
+        if quality is None or quality == "original":
+            quality = {"jpg": False, "jpeg": False, "webp": False}
+        elif not quality:
+            quality = None
+        elif isinstance(quality, str):
+            quality = {"jpg": quality, "jpeg": quality}
+        elif isinstance(quality, int):
+            quality = "q" + str(quality)
+            quality = {"jpg": quality, "jpeg": quality}
+        elif not isinstance(quality, dict):
+            quality = None
+
+        results = []
+        for url in text.extract_iter(
+                page, 'class="_images" data-url="', '"'):
+
+            if quality is not None:
+                path, _, query = url.rpartition("?")
+                type = quality.get(path.rpartition(".")[2].lower())
+                if type is False:
+                    url = path
+                elif type:
+                    url = "{}?type={}".format(path, type)
+
+            url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
+            results.append((url, None))
+        return results
 
 
 class WebtoonsComicExtractor(WebtoonsBase, Extractor):
     """Extractor for an entire comic on webtoons.com"""
     subcategory = "comic"
     categorytransfer = True
-    pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))"
-               r"/list(?:\?([^#]+))")
+    pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
     example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.path, self.lang, self.genre, self.comic, self.query = \
-            match.groups()
-
     def _init(self):
         self.setup_agegate_cookies()
 
-        params = text.parse_query(self.query)
+        self.path, self.lang, self.genre, self.comic, query = self.groups
+        params = text.parse_query(query)
         self.title_no = params.get("title_no")
         self.page_no = text.parse_int(params.get("page"), 1)
 
@@ -164,7 +155,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
             path = "/{}/list?title_no={}&page={}".format(
                 self.path, self.title_no, self.page_no)
 
-            if page and path not in page:
+            if page is not None and path not in page:
                 return
 
             response = self.request(self.root + path)
@@ -182,11 +173,47 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
 
             self.page_no += 1
 
-    @staticmethod
-    def get_episode_urls(page):
+    def get_episode_urls(self, page):
         """Extract and return all episode urls in 'page'"""
         page = text.extr(page, 'id="_listUl"', '</ul>')
         return [
             match.group(0)
             for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
         ]
+
+
+class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
+    """Extractor for webtoons.com artists"""
+    subcategory = "artist"
+    pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)"
+    example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
+
+    def items(self):
+        self.setup_agegate_cookies()
+
+        for comic in self.comics():
+            comic["_extractor"] = WebtoonsComicExtractor
+            comic_url = self.root + comic["extra"]["episodeListPath"]
+            yield Message.Queue, comic_url, comic
+
+    def comics(self):
+        lang, artist = self.groups
+        language = util.code_to_language(lang).upper()
+
+        url = "{}/p/community/{}/u/{}".format(
+            self.root, lang, artist)
+        page = self.request(url).text
+        creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
+
+        url = "{}/p/community/api/v1/creator/{}/titles".format(
+            self.root, creator_id)
+        params = {
+            "language": language,
+            "nextSize": "50",
+        }
+        headers = {
+            "language": language,
+        }
+        data = self.request(url, params=params, headers=headers).json()
+
+        return data["result"]["titles"]
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index ac1400e..0ad73c0 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -74,7 +74,6 @@ class ZerochanExtractor(BooruExtractor):
         extr = text.extract_from(page)
         data = {
             "id"      : text.parse_int(entry_id),
-            "author"  : jsonld["author"]["name"],
             "file_url": jsonld["contentUrl"],
             "date"    : text.parse_datetime(jsonld["datePublished"]),
             "width"   : text.parse_int(jsonld["width"][:-3]),
@@ -88,6 +87,11 @@ class ZerochanExtractor(BooruExtractor):
                 'id="source-url"', '</p>').rpartition("</s>")[2])),
         }
 
+        try:
+            data["author"] = jsonld["author"]["name"]
+        except Exception:
+            data["author"] = ""
+
         html = data["tags"]
         tags = data["tags"] = []
         for tag in html.split("<li class=")[1:]:
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py
index 05b12b4..20454b4 100644
--- a/gallery_dl/extractor/zzup.py
+++ b/gallery_dl/extractor/zzup.py
@@ -16,7 +16,7 @@ class ZzupGalleryExtractor(GalleryExtractor):
     filename_fmt = "{num:>03}.{extension}"
     archive_fmt = "{slug}_{num}"
     root = "https://zzup.com"
-    pattern = (r"(?:https?://)?(up\.|www\.)?zzup\.com(/(?:viewalbum|content)"
+    pattern = (r"(?:https?://)?(up\.|w+\.)?zzup\.com(/(?:viewalbum|content)"
                r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
     example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
 
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index e662c34..6affc3e 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -495,6 +495,8 @@ _CONVERSIONS = {
     "s": str,
     "r": repr,
     "a": ascii,
+    "i": int,
+    "f": float,
 }
 _FORMAT_SPECIFIERS = {
     "?": _parse_optional,
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 21e1aa0..54cf126 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -269,7 +269,7 @@ class PathFormat():
         try:
             for fmt in self.directory_formatters:
                 segment = fmt(kwdict).strip()
-                if strip and segment != "..":
+                if strip and segment not in {".", ".."}:
                     # remove trailing dots and spaces (#647)
                     segment = segment.rstrip(strip)
                 if segment:
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 3ef9fbc..fbb3fb8 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -108,6 +108,7 @@ class MetadataPP(PostProcessor):
         self.omode = options.get("open", omode)
         self.encoding = options.get("encoding", "utf-8")
         self.skip = options.get("skip", False)
+        self.meta_path = options.get("metadata-path")
 
     def run(self, pathfmt):
         archive = self.archive
@@ -120,6 +121,9 @@ class MetadataPP(PostProcessor):
             directory = self._directory(pathfmt)
         path = directory + self._filename(pathfmt)
 
+        if self.meta_path is not None:
+            pathfmt.kwdict[self.meta_path] = path
+
         if self.skip and os.path.exists(path):
             return
 
@@ -180,7 +184,10 @@ class MetadataPP(PostProcessor):
             pathfmt.directory_formatters = self._directory_formatters
             pathfmt.directory_conditions = ()
             segments = pathfmt.build_directory(pathfmt.kwdict)
-            directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
+            if segments:
+                directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
+            else:
+                directory = "." + os.sep
             return os.path.join(self._base(pathfmt), directory)
         finally:
             pathfmt.directory_conditions = conditions
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 3a32b39..c1bfc20 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -156,12 +156,7 @@ class UgoiraPP(PostProcessor):
                     return self.log.debug("", exc_info=exc)
 
             if self.convert(pathfmt, tempdir):
-                if self.delete:
-                    pathfmt.delete = True
-                elif pathfmt.extension != "zip":
-                    self.log.info(pathfmt.filename)
-                    pathfmt.set_extension("zip")
-                    pathfmt.build_path()
+                pathfmt.delete = self.delete
 
     def convert_from_files(self, pathfmt):
         if not self._convert_files:
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 76e6517..eabd4ab 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -700,6 +700,9 @@ EXECUTABLE = getattr(sys, "frozen", False)
 USERAGENT = "gallery-dl/" + version.__version__
 USERAGENT_FIREFOX = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{}.0) "
                      "Gecko/20100101 Firefox/{}.0").format(_ff_ver, _ff_ver)
+USERAGENT_CHROME = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 "
+                    "Safari/537.36")
 SPECIAL_EXTRACTORS = {"oauth", "recursive", "generic"}
 GLOBALS = {
     "contains" : contains,
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 43b234d..87169e2 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.29.3"
+__version__ = "1.29.4"
 __variant__ = None
author	Unit 193 <unit193@unit193.net>	2025-04-15 05:25:37 -0400
committer	Unit 193 <unit193@unit193.net>	2025-04-15 05:25:37 -0400
commit	b830dc03b3b7c9dd119648e1be9c1145d56e096c (patch)
tree	e9d03b6b4ab93990243c0038c20ada2464fa4072 /gallery_dl
parent	662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (diff)