New upstream version 1.30.8.upstream/1.30.8

author: Unit 193 <unit193@unit193.net> 2025-09-23 07:44:37 -0400
committer: Unit 193 <unit193@unit193.net> 2025-09-23 07:44:37 -0400
commit: 42b62671fabfdcf983a9575221420d85f7fbcac1 (patch)
tree: fa6b2af249a7216aae5c70a926c6d08be1ac55a6 /gallery_dl
parent: 3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (diff)
30 files changed, 933 insertions, 236 deletions
diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py
index f5bb7b7..912a251 100644
--- a/gallery_dl/extractor/2ch.py
+++ b/gallery_dl/extractor/2ch.py
@@ -4,37 +4,41 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://2ch.hk/"""
+"""Extractors for https://2ch.su/"""
 
 from .common import Extractor, Message
 from .. import text, util
 
+BASE_PATTERN = r"(?:https?://)?2ch\.(su|life|hk)"
+
 
 class _2chThreadExtractor(Extractor):
     """Extractor for 2ch threads"""
     category = "2ch"
     subcategory = "thread"
-    root = "https://2ch.hk"
+    root = "https://2ch.su"
     directory_fmt = ("{category}", "{board}", "{thread} {title}")
     filename_fmt = "{tim}{filename:? //}.{extension}"
     archive_fmt = "{board}_{thread}_{tim}"
-    pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
-    example = "https://2ch.hk/a/res/12345.html"
+    pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)"
+    example = "https://2ch.su/a/res/12345.html"
 
     def __init__(self, match):
+        tld = match[1]
+        self.root = f"https://2ch.{'su' if tld == 'hk' else tld}"
         Extractor.__init__(self, match)
-        self.board, self.thread = match.groups()
 
     def items(self):
-        url = f"{self.root}/{self.board}/res/{self.thread}.json"
+        _, board, thread = self.groups
+        url = f"{self.root}/{board}/res/{thread}.json"
         posts = self.request_json(url)["threads"][0]["posts"]
 
         op = posts[0]
         title = op.get("subject") or text.remove_html(op["comment"])
 
         thread = {
-            "board" : self.board,
-            "thread": self.thread,
+            "board" : board,
+            "thread": thread,
             "title" : text.unescape(title)[:50],
         }
 
@@ -61,16 +65,17 @@ class _2chBoardExtractor(Extractor):
     """Extractor for 2ch boards"""
     category = "2ch"
     subcategory = "board"
-    root = "https://2ch.hk"
-    pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
-    example = "https://2ch.hk/a/"
+    root = "https://2ch.su"
+    pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$"
+    example = "https://2ch.su/a/"
 
     def __init__(self, match):
+        tld = match[1]
+        self.root = f"https://2ch.{'su' if tld == 'hk' else tld}"
         Extractor.__init__(self, match)
-        self.board = match[1]
 
     def items(self):
-        base = f"{self.root}/{self.board}"
+        base = f"{self.root}/{self.groups[1]}"
 
         # index page
         url = f"{base}/index.json"
diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py
index c9be2a4..4c43464 100644
--- a/gallery_dl/extractor/4archive.py
+++ b/gallery_dl/extractor/4archive.py
@@ -62,7 +62,8 @@ class _4archiveThreadExtractor(Extractor):
         data = {
             "name": extr('class="name">', "</span>"),
             "date": text.parse_datetime(
-                extr('class="dateTime postNum" >', "<").strip(),
+                (extr('class="dateTime">', "<") or
+                 extr('class="dateTime postNum" >', "<")).strip(),
                 "%Y-%m-%d %H:%M:%S"),
             "no"  : text.parse_int(extr(">Post No.", "<")),
         }
@@ -70,8 +71,7 @@ class _4archiveThreadExtractor(Extractor):
             extr('class="fileText"', ">File: <a")
             data.update({
                 "url"     : extr('href="', '"'),
-                "filename": extr(
-                    'rel="noreferrer noopener"', "</a>").strip()[1:],
+                "filename": extr('alt="Image: ', '"'),
                 "size"    : text.parse_bytes(extr(" (", ", ")[:-1]),
                 "width"   : text.parse_int(extr("", "x")),
                 "height"  : text.parse_int(extr("", "px")),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b32fcd1..abdb6cc 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -73,6 +73,7 @@ modules = [
     "girlswithmuscle",
     "gofile",
     "hatenablog",
+    "hdoujin",
     "hentai2read",
     "hentaicosplays",
     "hentaifoundry",
@@ -88,6 +89,7 @@ modules = [
     "imagefap",
     "imgbb",
     "imgbox",
+    "imgpile",
     "imgth",
     "imgur",
     "imhentai",
@@ -118,6 +120,7 @@ modules = [
     "manganelo",
     "mangapark",
     "mangaread",
+    "mangataro",
     "mangoxo",
     "misskey",
     "motherless",
@@ -188,6 +191,7 @@ modules = [
     "tcbscans",
     "telegraph",
     "tenor",
+    "thehentaiworld",
     "tiktok",
     "tmohentai",
     "toyhouse",
diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py
index 5c9b9cd..5dcb6a5 100644
--- a/gallery_dl/extractor/bellazon.py
+++ b/gallery_dl/extractor/bellazon.py
@@ -20,32 +20,61 @@ class BellazonExtractor(Extractor):
     root = "https://www.bellazon.com/main"
     directory_fmt = ("{category}", "{thread[section]}",
                      "{thread[title]} ({thread[id]})")
-    filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}"
-    archive_fmt = "{post[id]}/{filename}"
+    filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
+    archive_fmt = "{post[id]}/{id}_{filename}"
 
     def items(self):
-        extract_urls = text.re(r'<a ([^>]*?href="([^"]+)".*?)</a>').findall
-        native = f"{self.root}/"
+        native = (f"{self.root}/", f"{self.root[6:]}/")
+        extract_urls = text.re(
+            r'(?s)<('
+            r'(?:video .*?<source src|a [^>]*?href)="([^"]+).*?</a>'
+            r'|img [^>]*?src="([^"]+)"[^>]*>'
+            r')'
+        ).findall
+
+        if self.config("quoted", False):
+            strip_quoted = None
+        else:
+            strip_quoted = text.re(r"(?s)<blockquote .*?</blockquote>").sub
 
         for post in self.posts():
-            urls = extract_urls(post["content"])
+            if strip_quoted is None:
+                urls = extract_urls(post["content"])
+            else:
+                urls = extract_urls(strip_quoted("", post["content"]))
+
             data = {"post": post}
             post["count"] = data["count"] = len(urls)
 
             yield Message.Directory, data
-            for data["num"], (info, url) in enumerate(urls, 1):
-                url = text.unescape(url)
+            data["num"] = 0
+            for info, url, url_img in urls:
+                url = text.unescape(url or url_img)
+
                 if url.startswith(native):
+                    if "/uploads/emoticons/" in url or "/profile/" in url:
+                        continue
+                    data["num"] += 1
                     if not (alt := text.extr(info, ' alt="', '"')) or (
                             alt.startswith("post-") and "_thumb." in alt):
                         name = url
                     else:
                         name = text.unescape(alt)
+
                     dc = text.nameext_from_url(name, data.copy())
                     dc["id"] = text.extr(info, 'data-fileid="', '"')
                     if ext := text.extr(info, 'data-fileext="', '"'):
                         dc["extension"] = ext
+                    elif "/core/interface/file/attachment.php" in url:
+                        if not dc["id"]:
+                            dc["id"] = url.rpartition("?id=")[2]
+                        if name := text.extr(info, ">", "<").strip():
+                            text.nameext_from_url(name, dc)
+
+                    if url[0] == "/":
+                        url = f"https:{url}"
                     yield Message.Url, url, dc
+
                 else:
                     yield Message.Queue, url, data
 
@@ -70,6 +99,28 @@ class BellazonExtractor(Extractor):
             pnum += 1
             url = f"{base}/page/{pnum}/"
 
+    def _pagination_reverse(self, base, pnum=None):
+        base = f"{self.root}{base}"
+
+        url = f"{base}/page/9999/"  # force redirect to highest page number
+        with self.request(url) as response:
+            parts = response.url.rsplit("/", 3)
+            pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1
+            page = response.text
+
+        while True:
+            yield page
+
+            pnum -= 1
+            if pnum > 1:
+                url = f"{base}/page/{pnum}/"
+            elif pnum == 1:
+                url = f"{base}/"
+            else:
+                return
+
+            page = self.request(url).text
+
     def _parse_thread(self, page):
         schema = self._extract_jsonld(page)
         author = schema["author"]
@@ -88,7 +139,7 @@ class BellazonExtractor(Extractor):
             "posts": stats[1]["userInteractionCount"],
             "date" : text.parse_datetime(schema["datePublished"]),
             "date_updated": text.parse_datetime(schema["dateModified"]),
-            "description" : text.unescape(schema["text"]),
+            "description" : text.unescape(schema["text"]).strip(),
             "section"     : path[-2],
             "author"      : author["name"],
             "author_url"  : url_a,
@@ -123,7 +174,7 @@ class BellazonExtractor(Extractor):
 class BellazonPostExtractor(BellazonExtractor):
     subcategory = "post"
     pattern = (rf"{BASE_PATTERN}(/topic/\d+-[\w-]+(?:/page/\d+)?)"
-               rf"/?#findComment-(\d+)")
+               rf"/?#(?:findC|c)omment-(\d+)")
     example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345"
 
     def posts(self):
@@ -145,10 +196,22 @@ class BellazonThreadExtractor(BellazonExtractor):
     example = "https://www.bellazon.com/main/topic/123-SLUG/"
 
     def posts(self):
-        for page in self._pagination(*self.groups):
+        if (order := self.config("order-posts")) and \
+                order[0] not in ("d", "r"):
+            pages = self._pagination(*self.groups)
+            reverse = False
+        else:
+            pages = self._pagination_reverse(*self.groups)
+            reverse = True
+
+        for page in pages:
             if "thread" not in self.kwdict:
                 self.kwdict["thread"] = self._parse_thread(page)
-            for html in text.extract_iter(page, "<article ", "</article>"):
+            posts = text.extract_iter(page, "<article ", "</article>")
+            if reverse:
+                posts = list(posts)
+                posts.reverse()
+            for html in posts:
                 yield self._parse_post(html)
 
 
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index cf5bce1..14ebc48 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -162,7 +162,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
                 file["name"] = util.json_loads(text.extr(
                     item, 'original:', ',\n').replace("\\'", "'"))
                 file["slug"] = util.json_loads(text.extr(
-                    item, 'slug: ', ',\n'))
+                    item, 'slug: ', ',\n').replace("\\'", "'"))
                 file["uuid"] = text.extr(
                     item, 'name: "', ".")
                 file["size"] = text.parse_int(text.extr(
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 6ba4d08..67fdb39 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -50,6 +50,10 @@ BASE_PATTERN = CheveretoExtractor.update({
         "root": "https://imagepond.net",
         "pattern": r"imagepond\.net",
     },
+    "imglike": {
+        "root": "https://imglike.com",
+        "pattern": r"imglike\.com",
+    },
 })
 
 
@@ -152,6 +156,18 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
                 yield Message.Queue, image, data
 
 
+class CheveretoCategoryExtractor(CheveretoExtractor):
+    """Extractor for chevereto galleries"""
+    subcategory = "category"
+    pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
+    example = "https://imglike.com/category/TITLE"
+
+    def items(self):
+        data = {"_extractor": CheveretoImageExtractor}
+        for image in self._pagination(self.root + self.path):
+            yield Message.Queue, image, data
+
+
 class CheveretoUserExtractor(CheveretoExtractor):
     """Extractor for chevereto users"""
     subcategory = "user"
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index f8ad07a..29c7763 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -278,6 +278,23 @@ class DanbooruTagExtractor(DanbooruExtractor):
         return self._pagination("/posts.json", {"tags": self.tags}, prefix)
 
 
+class DanbooruRandomExtractor(DanbooruTagExtractor):
+    """Extractor for a random danbooru post"""
+    subcategory = "random"
+    pattern = BASE_PATTERN + r"/posts/random(?:\?(?:[^&#]*&)*tags=([^&#]*))?"
+    example = "https://danbooru.donmai.us/posts/random?tags=TAG"
+
+    def metadata(self):
+        tags = self.groups[-1] or ""
+        self.tags = text.unquote(tags.replace("+", " "))
+        return {"search_tags": self.tags}
+
+    def posts(self):
+        posts = self.request_json(self.root + "/posts/random.json",
+                                  params={"tags": self.tags or None})
+        return (posts,) if isinstance(posts, dict) else posts
+
+
 class DanbooruPoolExtractor(DanbooruExtractor):
     """Extractor for Danbooru pools"""
     subcategory = "pool"
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index bf24941..6061737 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -369,6 +369,16 @@ class FacebookExtractor(Extractor):
                 for edge in (user["profile_tabs"]["profile_user"]
                              ["timeline_nav_app_sections"]["edges"])
             ]
+
+            if bio := text.extr(page, '"best_description":{"text":"', '"'):
+                user["biography"] = self.decode_all(bio)
+            elif (pos := page.find(
+                    '"__module_operation_ProfileCometTileView_profileT')) >= 0:
+                user["biography"] = self.decode_all(text.rextr(
+                    page, '"text":"', '"', pos))
+            else:
+                user["biography"] = text.unescape(text.remove_html(text.extr(
+                    page, "</span></span></h2>", "<ul>")))
         except Exception:
             if user is None:
                 self.log.debug("Failed to extract user data: %s", data)
diff --git a/gallery_dl/extractor/hdoujin.py b/gallery_dl/extractor/hdoujin.py
new file mode 100644
index 0000000..080b899
--- /dev/null
+++ b/gallery_dl/extractor/hdoujin.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hdoujin.org/"""
+
+from . import schalenetwork
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?(hdoujin\.(?:org|net))"
+
+
+class HdoujinBase():
+    """Base class for hdoujin extractors"""
+    category = "hdoujin"
+    root = "https://hdoujin.org"
+    root_api = "https://api.hdoujin.org"
+    root_auth = "https://auth.hdoujin.org"
+
+
+class HdoujinGalleryExtractor(
+        HdoujinBase, schalenetwork.SchalenetworkGalleryExtractor):
+    pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
+    example = "https://hdoujin.org/g/12345/67890abcdef/"
+
+
+class HdoujinSearchExtractor(
+        HdoujinBase, schalenetwork.SchalenetworkSearchExtractor):
+    pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
+    example = "https://hdoujin.org/browse?s=QUERY"
+
+
+class HdoujinFavoriteExtractor(
+        HdoujinBase, schalenetwork.SchalenetworkFavoriteExtractor):
+    pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
+    example = "https://hdoujin.org/favorites"
+
+
+HdoujinBase.extr_class = HdoujinGalleryExtractor
diff --git a/gallery_dl/extractor/imgpile.py b/gallery_dl/extractor/imgpile.py
new file mode 100644
index 0000000..9fc3a9c
--- /dev/null
+++ b/gallery_dl/extractor/imgpile.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://imgpile.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgpile\.com"
+
+
+class ImgpileExtractor(Extractor):
+    """Base class for imgpile extractors"""
+    category = "imgpile"
+    root = "https://imgpile.com"
+    directory_fmt = ("{category}", "{post[author]}",
+                     "{post[title]} ({post[id_slug]})")
+    archive_fmt = "{post[id_slug]}_{id}"
+
+    def items(self):
+        pass
+
+
+class ImgpilePostExtractor(ImgpileExtractor):
+    subcategory = "post"
+    pattern = rf"{BASE_PATTERN}/p/(\w+)"
+    example = "https://imgpile.com/p/AbCdEfG"
+
+    def items(self):
+        post_id = self.groups[0]
+        url = f"{self.root}/p/{post_id}"
+        page = self.request(url).text
+        extr = text.extract_from(page)
+
+        post = {
+            "id_slug": post_id,
+            "title"  : text.unescape(extr("<title>", " - imgpile<")),
+            "id"     : text.parse_int(extr('data-post-id="', '"')),
+            "author" : extr('/u/', '"'),
+            "score"  : text.parse_int(text.remove_html(extr(
+                'class="post-score">', "</"))),
+            "views"  : text.parse_int(extr(
+                'class="meta-value">', "<").replace(",", "")),
+            "tags"   : text.split_html(extr(
+                " <!-- Tags -->", '<!-- "')),
+        }
+
+        files = self._extract_files(extr)
+        data = {"post": post}
+        data["count"] = post["count"] = len(files)
+
+        yield Message.Directory, data
+        for data["num"], file in enumerate(files, 1):
+            data.update(file)
+            url = file["url"]
+            yield Message.Url, url, text.nameext_from_url(url, data)
+
+    def _extract_files(self, extr):
+        files = []
+
+        while True:
+            media = extr('lass="post-media', '</div>')
+            if not media:
+                break
+            files.append({
+                "id_slug": text.extr(media, 'data-id="', '"'),
+                "id" : text.parse_int(text.extr(
+                    media, 'data-media-id="', '"')),
+                "url": f"""http{text.extr(media, '<a href="http', '"')}""",
+            })
+        return files
+
+
+class ImgpileUserExtractor(ImgpileExtractor):
+    subcategory = "user"
+    pattern = rf"{BASE_PATTERN}/u/([^/?#]+)"
+    example = "https://imgpile.com/u/USER"
+
+    def items(self):
+        url = f"{self.root}/api/v1/posts"
+        params = {
+            "limit"     : "100",
+            "sort"      : "latest",
+            "period"    : "all",
+            "visibility": "public",
+            #  "moderation_status": "approved",
+            "username"  : self.groups[0],
+        }
+        headers = {
+            "Accept"        : "application/json",
+            #  "Referer"       : "https://imgpile.com/u/USER",
+            "Content-Type"  : "application/json",
+            #  "X-CSRF-TOKEN": "",
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-origin",
+        }
+
+        base = f"{self.root}/p/"
+        while True:
+            data = self.request_json(url, params=params, headers=headers)
+
+            if params is not None:
+                params = None
+                self.kwdict["total"] = data["meta"]["total"]
+
+            for item in data["data"]:
+                item["_extractor"] = ImgpilePostExtractor
+                url = f"{base}{item['slug']}"
+                yield Message.Queue, url, item
+
+            url = data["links"].get("next")
+            if not url:
+                return
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 9b8f8c9..00e06b5 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -95,7 +95,7 @@ class InstagramExtractor(Extractor):
                     if videos:
                         file["_http_headers"] = videos_headers
                         text.nameext_from_url(url, file)
-                        if videos_dash:
+                        if videos_dash and "_ytdl_manifest_data" in post:
                             file["_fallback"] = (url,)
                             file["_ytdl_manifest"] = "dash"
                             url = f"ytdl:{post['post_url']}{file['num']}.mp4"
@@ -505,10 +505,12 @@ class InstagramTaggedExtractor(InstagramExtractor):
     def metadata(self):
         if self.item.startswith("id:"):
             self.user_id = self.item[3:]
-            return {"tagged_owner_id": self.user_id}
-
-        self.user_id = self.api.user_id(self.item)
-        user = self.api.user_by_name(self.item)
+            if not self.config("metadata"):
+                return {"tagged_owner_id": self.user_id}
+            user = self.api.user_by_id(self.user_id)
+        else:
+            self.user_id = self.api.user_id(self.item)
+            user = self.api.user_by_name(self.item)
 
         return {
             "tagged_owner_id" : user["id"],
diff --git a/gallery_dl/extractor/iwara.py b/gallery_dl/extractor/iwara.py
index 179909b..8af2f42 100644
--- a/gallery_dl/extractor/iwara.py
+++ b/gallery_dl/extractor/iwara.py
@@ -45,6 +45,7 @@ class IwaraExtractor(Extractor):
                                image["id"], exc.__class__.__name__, exc)
                 continue
 
+            group_info["type"] = "image"
             group_info["count"] = len(files)
             yield Message.Directory, group_info
             for num, file in enumerate(files, 1):
@@ -102,34 +103,37 @@ class IwaraExtractor(Extractor):
         raise exception.AbortExtraction(f"Unsupported result type '{type}'")
 
     def extract_media_info(self, item, key, include_file_info=True):
-        title = t.strip() if (t := item.get("title")) else ""
+        info = {
+            "id"      : item["id"],
+            "slug"    : item.get("slug"),
+            "rating"  : item.get("rating"),
+            "likes"   : item.get("numLikes"),
+            "views"   : item.get("numViews"),
+            "comments": item.get("numComments"),
+            "tags"    : [t["id"] for t in item.get("tags") or ()],
+            "title"   : t.strip() if (t := item.get("title")) else "",
+            "description": t.strip() if (t := item.get("body")) else "",
+        }
 
         if include_file_info:
             file_info = item if key is None else item.get(key) or {}
             filename, _, extension = file_info.get("name", "").rpartition(".")
 
-            return {
-                "id"       : item["id"],
-                "file_id"  : file_info.get("id"),
-                "title"    : title,
-                "filename" : filename,
-                "extension": extension,
-                "date"        : text.parse_datetime(
-                    file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ"),
-                "date_updated": text.parse_datetime(
-                    file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ"),
-                "mime"     : file_info.get("mime"),
-                "size"     : file_info.get("size"),
-                "width"    : file_info.get("width"),
-                "height"   : file_info.get("height"),
-                "duration" : file_info.get("duration"),
-                "type"     : file_info.get("type"),
-            }
-        else:
-            return {
-                "id"   : item["id"],
-                "title": title,
-            }
+            info["file_id"] = file_info.get("id")
+            info["filename"] = filename
+            info["extension"] = extension
+            info["date"] = text.parse_datetime(
+                file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
+            info["date_updated"] = text.parse_datetime(
+                file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
+            info["mime"] = file_info.get("mime")
+            info["size"] = file_info.get("size")
+            info["width"] = file_info.get("width")
+            info["height"] = file_info.get("height")
+            info["duration"] = file_info.get("duration")
+            info["type"] = file_info.get("type")
+
+        return info
 
     def extract_user_info(self, profile):
         user = profile.get("user") or {}
diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py
index fc5972c..1f70031 100644
--- a/gallery_dl/extractor/kemono.py
+++ b/gallery_dl/extractor/kemono.py
@@ -407,7 +407,11 @@ class KemonoDiscordExtractor(KemonoExtractor):
             r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
         find_hash = util.re(HASH_PATTERN).match
 
-        posts = self.api.discord_channel(channel_id)
+        if (order := self.config("order-posts")) and order[0] in ("r", "d"):
+            posts = self.api.discord_channel(channel_id, channel["post_count"])
+        else:
+            posts = self.api.discord_channel(channel_id)
+
         if max_posts := self.config("max-posts"):
             posts = itertools.islice(posts, max_posts)
 
@@ -627,9 +631,12 @@ class KemonoAPI():
         endpoint = f"/{service}/user/{creator_id}/tags"
         return self._call(endpoint)
 
-    def discord_channel(self, channel_id):
+    def discord_channel(self, channel_id, post_count=None):
         endpoint = f"/discord/channel/{channel_id}"
-        return self._pagination(endpoint, {}, 150)
+        if post_count is None:
+            return self._pagination(endpoint, {}, 150)
+        else:
+            return self._pagination_reverse(endpoint, {}, 150, post_count)
 
     def discord_channel_lookup(self, server_id):
         endpoint = f"/discord/channel/lookup/{server_id}"
@@ -670,3 +677,18 @@ class KemonoAPI():
             if len(data) < batch:
                 return
             params["o"] += batch
+
+    def _pagination_reverse(self, endpoint, params, batch, count):
+        params["o"] = count // batch * batch
+
+        while True:
+            data = self._call(endpoint, params)
+
+            if not data:
+                return
+            data.reverse()
+            yield from data
+
+            if not params["o"]:
+                return
+            params["o"] -= batch
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index c700a29..b0198d5 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -100,7 +100,8 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
     filename_fmt = "{category}_{id}{title:?_//}.{extension}"
     directory_fmt = ("{category}",)
     archive_fmt = "{id}"
-    pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
+    pattern = (r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)"
+               r"/(?:i/)?(\w+)")
     example = "https://lensdump.com/i/ID"
 
     def items(self):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 225560d..fbed328 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -96,6 +96,57 @@ class MangadexExtractor(Extractor):
         return data
 
 
+class MangadexCoversExtractor(MangadexExtractor):
+    """Extractor for mangadex manga covers"""
+    subcategory = "covers"
+    directory_fmt = ("{category}", "{manga}", "Covers")
+    filename_fmt = "{volume:>02}_{lang}.{extension}"
+    archive_fmt = "c_{cover_id}"
+    pattern = (rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
+               r"(?:/[^/?#]+)?\?tab=art")
+    example = ("https://mangadex.org/title"
+               "/01234567-89ab-cdef-0123-456789abcdef?tab=art")
+
+    def items(self):
+        base = f"{self.root}/covers/{self.uuid}/"
+        for cover in self.api.covers_manga(self.uuid):
+            data = self._transform_cover(cover)
+            name = data["cover"]
+            text.nameext_from_url(name, data)
+            data["cover_id"] = data["filename"]
+            yield Message.Directory, data
+            yield Message.Url, f"{base}{name}", data
+
+    def _transform_cover(self, cover):
+        relationships = defaultdict(list)
+        for item in cover["relationships"]:
+            relationships[item["type"]].append(item)
+        manga = self.api.manga(relationships["manga"][0]["id"])
+        for item in manga["relationships"]:
+            relationships[item["type"]].append(item)
+
+        cattributes = cover["attributes"]
+        mattributes = manga["attributes"]
+
+        return {
+            "manga"   : (mattributes["title"].get("en") or
+                         next(iter(mattributes["title"].values()))),
+            "manga_id": manga["id"],
+            "status"  : mattributes["status"],
+            "author"  : [author["attributes"]["name"]
+                         for author in relationships["author"]],
+            "artist"  : [artist["attributes"]["name"]
+                         for artist in relationships["artist"]],
+            "tags"    : [tag["attributes"]["name"]["en"]
+                         for tag in mattributes["tags"]],
+            "cover"   : cattributes["fileName"],
+            "lang"    : cattributes.get("locale"),
+            "volume"  : text.parse_int(cattributes["volume"]),
+            "date"    : text.parse_datetime(cattributes["createdAt"]),
+            "date_updated": text.parse_datetime(cattributes["updatedAt"]),
+        }
+
+
 class MangadexChapterExtractor(MangadexExtractor):
     """Extractor for manga-chapters from mangadex.org"""
     subcategory = "chapter"
@@ -239,6 +290,10 @@ class MangadexAPI():
         params = {"includes[]": ("scanlation_group",)}
         return self._call("/chapter/" + uuid, params)["data"]
 
+    def covers_manga(self, uuid):
+        params = {"manga[]": uuid}
+        return self._pagination_covers("/cover", params)
+
     def list(self, uuid):
         return self._call("/list/" + uuid, None, True)["data"]
 
@@ -374,6 +429,20 @@ class MangadexAPI():
 
         return self._pagination(endpoint, params, auth)
 
+    def _pagination_covers(self, endpoint, params=None, auth=False):
+        if params is None:
+            params = {}
+
+        lang = self.extractor.config("lang")
+        if isinstance(lang, str) and "," in lang:
+            lang = lang.split(",")
+        params["locales"] = lang
+        params["contentRating"] = None
+        params["order[volume]"] = \
+            "desc" if self.extractor.config("chapter-reverse") else "asc"
+
+        return self._pagination(endpoint, params, auth)
+
     def _pagination(self, endpoint, params, auth=False):
         config = self.extractor.config
 
diff --git a/gallery_dl/extractor/mangataro.py b/gallery_dl/extractor/mangataro.py
new file mode 100644
index 0000000..f4cc058
--- /dev/null
+++ b/gallery_dl/extractor/mangataro.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangataro.org/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?mangataro\.org"
+
+
+class MangataroBase():
+    """Base class for mangataro extractors"""
+    category = "mangataro"
+    root = "https://mangataro.org"
+
+
+class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
+    """Extractor for mangataro manga chapters"""
+    pattern = rf"{BASE_PATTERN}(/read/([^/?#]+)/(?:[^/?#]*-)?(\d+))"
+    example = "https://mangataro.org/read/MANGA/ch123-12345"
+
+    def metadata(self, page):
+        _, slug, chapter_id = self.groups
+        comic = self._extract_jsonld(page)["@graph"][0]
+        chapter = comic["position"]
+        minor = chapter - int(chapter)
+        desc = comic["description"].split(" - ", 3)
+
+        return {
+            **_manga_info(self, slug),
+            "title"    : desc[1] if len(desc) > 3 else "",
+            "chapter"  : int(chapter),
+            "chapter_minor": str(round(minor, 5))[1:] if minor else "",
+            "chapter_id"   : text.parse_int(chapter_id),
+            "chapter_url"  : comic["url"],
+            "date"         : text.parse_datetime(
+                comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
+            "date_updated" : text.parse_datetime(
+                comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
+        }
+
+    def images(self, page):
+        pos = page.find('class="comic-image-container')
+        img, pos = text.extract(page, ' src="', '"', pos)
+
+        images = [(img, None)]
+        images.extend(
+            (url, None)
+            for url in text.extract_iter(page, 'data-src="', '"', pos)
+        )
+        return images
+
+
+class MangataroMangaExtractor(MangataroBase, MangaExtractor):
+    """Extractor for mangataro manga"""
+    chapterclass = MangataroChapterExtractor
+    pattern = rf"{BASE_PATTERN}(/manga/([^/?#]+))"
+    example = "https://mangataro.org/manga/MANGA"
+
+    def chapters(self, page):
+        slug = self.groups[1]
+        manga = _manga_info(self, slug)
+
+        results = []
+        for url in text.extract_iter(text.extr(
+                page, '<div class="chapter-list', '<div id="tab-gallery"'),
+                '<a href="', '"'):
+            chapter, _, chapter_id = url[url.rfind("/")+3:].rpartition("-")
+            chapter, sep, minor = chapter.partition("-")
+            results.append((url, {
+                **manga,
+                "chapter"      : text.parse_int(chapter),
+                "chapter_minor": f".{minor}" if sep else "",
+                "chapter_id"   : text.parse_int(chapter_id),
+            }))
+        return results
+
+
+@memcache(keyarg=1)
+def _manga_info(self, slug):
+    url = f"{self.root}/manga/{slug}"
+    page = self.request(url).text
+    manga = self._extract_jsonld(page)
+
+    return {
+        "manga"      : manga["name"].rpartition(" | ")[0].rpartition(" ")[0],
+        "manga_url"  : manga["url"],
+        "cover"      : manga["image"],
+        "author"     : manga["author"]["name"].split(", "),
+        "genre"      : manga["genre"],
+        "status"     : manga["status"],
+        "description": text.unescape(text.extr(
+            page, 'id="description-content-tab">', "</div></div>")),
+        "tags"       : text.split_html(text.extr(
+            page, ">Genres</h4>", "</div>")),
+        "publisher"  : text.remove_html(text.extr(
+            page, '>Serialization</h4>', "</div>")),
+    }
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 9c335ad..ff771fb 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -204,58 +204,6 @@ class PinterestExtractor(Extractor):
         return media
 
 
-class PinterestPinExtractor(PinterestExtractor):
-    """Extractor for images from a single pin from pinterest.com"""
-    subcategory = "pin"
-    pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
-    example = "https://www.pinterest.com/pin/12345/"
-
-    def __init__(self, match):
-        PinterestExtractor.__init__(self, match)
-        self.pin_id = match[1]
-        self.pin = None
-
-    def metadata(self):
-        self.pin = self.api.pin(self.pin_id)
-        return self.pin
-
-    def pins(self):
-        return (self.pin,)
-
-
-class PinterestBoardExtractor(PinterestExtractor):
-    """Extractor for images from a board from pinterest.com"""
-    subcategory = "board"
-    directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
-    archive_fmt = "{board[id]}_{id}"
-    pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
-               r"/(?!_saved|_created|pins/)([^/?#]+)/?(?:$|\?|#)")
-    example = "https://www.pinterest.com/USER/BOARD/"
-
-    def __init__(self, match):
-        PinterestExtractor.__init__(self, match)
-        self.user = text.unquote(match[1])
-        self.board_name = text.unquote(match[2])
-        self.board = None
-
-    def metadata(self):
-        self.board = self.api.board(self.user, self.board_name)
-        return {"board": self.board}
-
-    def pins(self):
-        board = self.board
-        pins = self.api.board_pins(board["id"])
-
-        if board["section_count"] and self.config("sections", True):
-            base = f"{self.root}{board['url']}id:"
-            data = {"_extractor": PinterestSectionExtractor}
-            sections = [(base + section["id"], data)
-                        for section in self.api.board_sections(board["id"])]
-            pins = itertools.chain(pins, sections)
-
-        return pins
-
-
 class PinterestUserExtractor(PinterestExtractor):
     """Extractor for a user's boards"""
     subcategory = "user"
@@ -357,6 +305,58 @@ class PinterestSearchExtractor(PinterestExtractor):
         return self.api.search(self.search)
 
 
+class PinterestPinExtractor(PinterestExtractor):
+    """Extractor for images from a single pin from pinterest.com"""
+    subcategory = "pin"
+    pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
+    example = "https://www.pinterest.com/pin/12345/"
+
+    def __init__(self, match):
+        PinterestExtractor.__init__(self, match)
+        self.pin_id = match[1]
+        self.pin = None
+
+    def metadata(self):
+        self.pin = self.api.pin(self.pin_id)
+        return self.pin
+
+    def pins(self):
+        return (self.pin,)
+
+
+class PinterestBoardExtractor(PinterestExtractor):
+    """Extractor for images from a board from pinterest.com"""
+    subcategory = "board"
+    directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
+    archive_fmt = "{board[id]}_{id}"
+    pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
+               r"/([^/?#]+)/?(?!.*#related$)")
+    example = "https://www.pinterest.com/USER/BOARD/"
+
+    def __init__(self, match):
+        PinterestExtractor.__init__(self, match)
+        self.user = text.unquote(match[1])
+        self.board_name = text.unquote(match[2])
+        self.board = None
+
+    def metadata(self):
+        self.board = self.api.board(self.user, self.board_name)
+        return {"board": self.board}
+
+    def pins(self):
+        board = self.board
+        pins = self.api.board_pins(board["id"])
+
+        if board["section_count"] and self.config("sections", True):
+            base = f"{self.root}{board['url']}id:"
+            data = {"_extractor": PinterestSectionExtractor}
+            sections = [(base + section["id"], data)
+                        for section in self.api.board_sections(board["id"])]
+            pins = itertools.chain(pins, sections)
+
+        return pins
+
+
 class PinterestRelatedPinExtractor(PinterestPinExtractor):
     """Extractor for related pins of another pin from pinterest.com"""
     subcategory = "related-pin"
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 9febda9..e20d80e 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -56,6 +56,7 @@ class RedditExtractor(Extractor):
                 urls = []
 
                 if submission:
+                    submission["comment"] = None
                     submission["date"] = text.parse_timestamp(
                         submission["created_utc"])
                     yield Message.Directory, submission
@@ -99,14 +100,13 @@ class RedditExtractor(Extractor):
                     elif not submission["is_self"]:
                         urls.append((url, submission))
 
+                    if selftext and (txt := submission["selftext_html"]):
+                        for url in text.extract_iter(txt, ' href="', '"'):
+                            urls.append((url, submission))
+
                 elif parentdir:
                     yield Message.Directory, comments[0]
 
-                if selftext and submission:
-                    for url in text.extract_iter(
-                            submission["selftext_html"] or "", ' href="', '"'):
-                        urls.append((url, submission))
-
                 if self.api.comments:
                     if comments and not submission:
                         submission = comments[0]
@@ -115,24 +115,24 @@ class RedditExtractor(Extractor):
                             yield Message.Directory, submission
 
                     for comment in comments:
+                        media = (embeds and "media_metadata" in comment)
                         html = comment["body_html"] or ""
                         href = (' href="' in html)
-                        media = (embeds and "media_metadata" in comment)
 
-                        if media or href:
-                            comment["date"] = text.parse_timestamp(
-                                comment["created_utc"])
-                            if submission:
-                                data = submission.copy()
-                                data["comment"] = comment
-                            else:
-                                data = comment
+                        if not media and not href:
+                            continue
+
+                        data = submission.copy()
+                        data["comment"] = comment
+                        comment["date"] = text.parse_timestamp(
+                            comment["created_utc"])
 
                         if media:
-                            for embed in self._extract_embed(comment):
-                                submission["num"] += 1
-                                text.nameext_from_url(embed, submission)
-                                yield Message.Url, embed, submission
+                            for url in self._extract_embed(comment):
+                                data["num"] += 1
+                                text.nameext_from_url(url, data)
+                                yield Message.Url, url, data
+                            submission["num"] = data["num"]
 
                         if href:
                             for url in text.extract_iter(html, ' href="', '"'):
diff --git a/gallery_dl/extractor/schalenetwork.py b/gallery_dl/extractor/schalenetwork.py
index d517287..dc42417 100644
--- a/gallery_dl/extractor/schalenetwork.py
+++ b/gallery_dl/extractor/schalenetwork.py
@@ -10,7 +10,6 @@
 
 from .common import GalleryExtractor, Extractor, Message
 from .. import text, exception
-from ..cache import cache
 import collections
 
 BASE_PATTERN = (
@@ -27,6 +26,8 @@ class SchalenetworkExtractor(Extractor):
     category = "schalenetwork"
     root = "https://niyaniya.moe"
     root_api = "https://api.schale.network"
+    root_auth = "https://auth.schale.network"
+    extr_class = None
     request_interval = (0.5, 1.5)
 
     def _init(self):
@@ -38,6 +39,7 @@ class SchalenetworkExtractor(Extractor):
 
     def _pagination(self, endpoint, params):
         url_api = self.root_api + endpoint
+        cls = self.extr_class
 
         while True:
             data = self.request_json(
@@ -49,8 +51,8 @@ class SchalenetworkExtractor(Extractor):
                 return
 
             for entry in entries:
-                url = f"{self.root}/g/{entry['id']}/{entry['public_key']}"
-                entry["_extractor"] = SchalenetworkGalleryExtractor
+                url = f"{self.root}/g/{entry['id']}/{entry['key']}"
+                entry["_extractor"] = cls
                 yield Message.Queue, url, entry
 
             try:
@@ -60,6 +62,34 @@ class SchalenetworkExtractor(Extractor):
                 pass
             params["page"] += 1
 
+    def _token(self):
+        if token := self.config("token"):
+            return f"Bearer {token.rpartition(' ')[2]}"
+        raise exception.AuthRequired("'token'", "your favorites")
+
+    def _crt(self):
+        crt = self.config("crt")
+        if not crt:
+            self._require_auth()
+
+        if not text.re(r"^[0-9a-f-]+$").match(crt):
+            path, _, qs = crt.partition("?")
+            if not qs:
+                qs = path
+            crt = text.parse_query(qs).get("crt")
+            if not crt:
+                self._require_auth()
+
+        return crt
+
+    def _require_auth(self, exc=None):
+        if exc is None:
+            msg = None
+        else:
+            msg = f"{exc.status} {exc.response.reason}"
+        raise exception.AuthRequired(
+            "'crt' query parameter & matching '--user-agent'", None, msg)
+
 
 class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
     """Extractor for schale.network galleries"""
@@ -67,7 +97,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
     directory_fmt = ("{category}", "{id} {title}")
     archive_fmt = "{id}_{num}"
     request_interval = 0.0
-    pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
+    pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
     example = "https://niyaniya.moe/g/12345/67890abcde/"
 
     TAG_TYPES = {
@@ -86,27 +116,10 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
         12: "other",
     }
 
-    def __init__(self, match):
-        GalleryExtractor.__init__(self, match)
-        self.page_url = None
-
-    def _init(self):
-        self.headers = {
-            "Accept" : "*/*",
-            "Referer": self.root + "/",
-            "Origin" : self.root,
-        }
-
-        self.fmt = self.config("format")
-        self.cbz = self.config("cbz", True)
-
-        if self.cbz:
-            self.filename_fmt = "{id} {title}.{extension}"
-            self.directory_fmt = ("{category}",)
-
     def metadata(self, _):
-        url = f"{self.root_api}/books/detail/{self.groups[1]}/{self.groups[2]}"
-        self.data = data = self.request_json(url, headers=self.headers)
+        _, gid, gkey = self.groups
+        url = f"{self.root_api}/books/detail/{gid}/{gkey}"
+        data = self.request_json(url, headers=self.headers)
         data["date"] = text.parse_timestamp(data["created_at"] // 1000)
 
         tags = []
@@ -127,53 +140,42 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
                 data["tags_" + types[type]] = values
 
         try:
-            if self.cbz:
-                data["count"] = len(data["thumbnails"]["entries"])
+            data["count"] = len(data["thumbnails"]["entries"])
             del data["thumbnails"]
-            del data["rels"]
         except Exception:
             pass
 
         return data
 
     def images(self, _):
-        data = self.data
-        fmt = self._select_format(data["data"])
+        crt = self._crt()
+        _, gid, gkey = self.groups
+        url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}"
+        try:
+            data = self.request_json(url, method="POST", headers=self.headers)
+        except exception.HttpError as exc:
+            self._require_auth(exc)
 
-        url = (f"{self.root_api}/books/data/{data['id']}/"
-               f"{data['public_key']}/{fmt['id']}/{fmt['public_key']}")
-        params = {
-            "v": data["updated_at"],
-            "w": fmt["w"],
-        }
+        fmt = self._select_format(data["data"])
 
-        if self.cbz:
-            params["action"] = "dl"
-            base = self.request_json(
-                url, method="POST", params=params, headers=self.headers,
-            )["base"]
-            url = f"{base}?v={data['updated_at']}&w={fmt['w']}"
-            info = text.nameext_from_url(base)
-            if not info["extension"]:
-                info["extension"] = "cbz"
-            return ((url, info),)
-
-        data = self.request_json(url, params=params, headers=self.headers)
+        url = (f"{self.root_api}/books/data/{gid}/{gkey}"
+               f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}")
+        data = self.request_json(url, headers=self.headers)
         base = data["base"]
 
         results = []
         for entry in data["entries"]:
             dimensions = entry["dimensions"]
             info = {
-                "w": dimensions[0],
-                "h": dimensions[1],
+                "width" : dimensions[0],
+                "height": dimensions[1],
                 "_http_headers": self.headers,
             }
             results.append((base + entry["path"], info))
         return results
 
     def _select_format(self, formats):
-        fmt = self.fmt
+        fmt = self.config("format")
 
         if not fmt or fmt == "best":
             fmtids = ("0", "1600", "1280", "980", "780")
@@ -182,7 +184,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
         elif isinstance(fmt, list):
             fmtids = fmt
         else:
-            fmtids = (str(self.fmt),)
+            fmtids = (str(fmt),)
 
         for fmtid in fmtids:
             try:
@@ -203,44 +205,39 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
 class SchalenetworkSearchExtractor(SchalenetworkExtractor):
     """Extractor for schale.network search results"""
     subcategory = "search"
-    pattern = BASE_PATTERN + r"/\?([^#]*)"
-    example = "https://niyaniya.moe/?s=QUERY"
+    pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
+    example = "https://niyaniya.moe/browse?s=QUERY"
 
     def items(self):
-        params = text.parse_query(self.groups[1])
+        _, tag, qs = self.groups
+
+        params = text.parse_query(qs)
         params["page"] = text.parse_int(params.get("page"), 1)
+
+        if tag is not None:
+            ns, sep, tag = text.unquote(tag).partition(":")
+            if "+" in tag:
+                tag = tag.replace("+", " ")
+                q = '"'
+            else:
+                q = ""
+            q = '"' if " " in tag else ""
+            params["s"] = f"{ns}{sep}{q}^{tag}${q}"
+
         return self._pagination("/books", params)
 
 
 class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
     """Extractor for schale.network favorites"""
     subcategory = "favorite"
-    pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+    pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
     example = "https://niyaniya.moe/favorites"
 
     def items(self):
-        self.login()
-
         params = text.parse_query(self.groups[1])
         params["page"] = text.parse_int(params.get("page"), 1)
-        return self._pagination("/favorites", params)
-
-    def login(self):
-        username, password = self._get_auth_info()
-        if username:
-            self.headers["Authorization"] = \
-                "Bearer " + self._login_impl(username, password)
-            return
-
-        raise exception.AuthenticationError("Username and password required")
-
-    @cache(maxage=86400, keyarg=1)
-    def _login_impl(self, username, password):
-        self.log.info("Logging in as %s", username)
+        self.headers["Authorization"] = self._token()
+        return self._pagination(f"/books/favorites?crt={self._crt()}", params)
 
-        url = "https://auth.schale.network/login"
-        data = {"uname": username, "passwd": password}
-        response = self.request(
-            url, method="POST", headers=self.headers, data=data)
 
-        return response.json()["session"]
+SchalenetworkExtractor.extr_class = SchalenetworkGalleryExtractor
diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py
index 8cc7e38..3354289 100644
--- a/gallery_dl/extractor/simpcity.py
+++ b/gallery_dl/extractor/simpcity.py
@@ -20,18 +20,20 @@ class SimpcityExtractor(Extractor):
     root = "https://simpcity.cr"
 
     def items(self):
-        extract_urls = text.re(r' href="([^"]+)').findall
+        extract_urls = text.re(
+            r'<(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)').findall
 
         for post in self.posts():
             urls = extract_urls(post["content"])
             data = {"post": post}
             post["count"] = data["count"] = len(urls)
+            yield Message.Directory, data
             for data["num"], url in enumerate(urls, 1):
                 yield Message.Queue, url, data
 
     def request_page(self, url):
         try:
-            return self.request(url).text
+            return self.request(url)
         except exception.HttpError as exc:
             if exc.status == 403 and b">Log in<" in exc.response.content:
                 msg = text.extr(exc.response.text, "blockMessage--error", "</")
@@ -44,14 +46,14 @@ class SimpcityExtractor(Extractor):
         base = f"{self.root}{base}"
 
         if pnum is None:
-            url = base
+            url = f"{base}/"
             pnum = 1
         else:
             url = f"{base}/page-{pnum}"
             pnum = None
 
         while True:
-            page = self.request_page(url)
+            page = self.request_page(url).text
 
             yield page
 
@@ -60,6 +62,31 @@ class SimpcityExtractor(Extractor):
             pnum += 1
             url = f"{base}/page-{pnum}"
 
+    def _pagination_reverse(self, base, pnum=None):
+        base = f"{self.root}{base}"
+
+        url = f"{base}/page-9999"  # force redirect to last page
+        with self.request_page(url) as response:
+            url = response.url
+            if url[-1] == "/":
+                pnum = 1
+            else:
+                pnum = text.parse_int(url[url.rfind("-")+1:], 1)
+            page = response.text
+
+        while True:
+            yield page
+
+            pnum -= 1
+            if pnum > 1:
+                url = f"{base}/page-{pnum}"
+            elif pnum == 1:
+                url = f"{base}/"
+            else:
+                return
+
+            page = self.request_page(url).text
+
     def _parse_thread(self, page):
         schema = self._extract_jsonld(page)["mainEntity"]
         author = schema["author"]
@@ -92,7 +119,8 @@ class SimpcityExtractor(Extractor):
             "id": extr('data-content="post-', '"'),
             "author_url": extr('itemprop="url" content="', '"'),
             "date": text.parse_datetime(extr('datetime="', '"')),
-            "content": extr('<div itemprop="text">', "\t\t</div>").strip(),
+            "content": extr('<div itemprop="text">',
+                            '<div class="js-selectToQuote').strip(),
         }
 
         url_a = post["author_url"]
@@ -109,7 +137,7 @@ class SimpcityPostExtractor(SimpcityExtractor):
     def posts(self):
         post_id = self.groups[0]
         url = f"{self.root}/posts/{post_id}/"
-        page = self.request_page(url)
+        page = self.request_page(url).text
 
         pos = page.find(f'data-content="post-{post_id}"')
         if pos < 0:
@@ -126,10 +154,22 @@ class SimpcityThreadExtractor(SimpcityExtractor):
     example = "https://simpcity.cr/threads/TITLE.12345/"
 
     def posts(self):
-        for page in self._pagination(*self.groups):
+        if (order := self.config("order-posts")) and \
+                order[0] not in ("d", "r"):
+            pages = self._pagination(*self.groups)
+            reverse = False
+        else:
+            pages = self._pagination_reverse(*self.groups)
+            reverse = True
+
+        for page in pages:
             if "thread" not in self.kwdict:
                 self.kwdict["thread"] = self._parse_thread(page)
-            for html in text.extract_iter(page, "<article ", "</article>"):
+            posts = text.extract_iter(page, "<article ", "</article>")
+            if reverse:
+                posts = list(posts)
+                posts.reverse()
+            for html in posts:
                 yield self._parse_post(html)
 
 
diff --git a/gallery_dl/extractor/thehentaiworld.py b/gallery_dl/extractor/thehentaiworld.py
new file mode 100644
index 0000000..055d7d8
--- /dev/null
+++ b/gallery_dl/extractor/thehentaiworld.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://thehentaiworld.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+import collections
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?thehentaiworld\.com"
+
+
+class ThehentaiworldExtractor(Extractor):
+    """Base class for thehentaiworld extractors"""
+    category = "thehentaiworld"
+    root = "https://thehentaiworld.com"
+    filename_fmt = "{title} ({id}{num:?-//}).{extension}"
+    archive_fmt = "{id}_{num}"
+    request_interval = (0.5, 1.5)
+
+    def items(self):
+        for url in self.posts():
+            try:
+                post = self._extract_post(url)
+            except Exception as exc:
+                self.status |= 1
+                self.log.warning("Failed to extract post %s (%s: %s)",
+                                 url, exc.__class__.__name__, exc)
+                continue
+
+            if "file_urls" in post:
+                urls = post["file_urls"]
+                post["count"] = len(urls)
+                yield Message.Directory, post
+                for post["num"], url in enumerate(urls, 1):
+                    text.nameext_from_url(url, post)
+                    yield Message.Url, url, post
+            else:
+                yield Message.Directory, post
+                url = post["file_url"]
+                text.nameext_from_url(url, post)
+                yield Message.Url, url, post
+
+    def _extract_post(self, url):
+        extr = text.extract_from(self.request(url).text)
+
+        post = {
+            "num"     : 0,
+            "count"   : 1,
+            "title"   : text.unescape(extr("<title>", "<").strip()),
+            "id"      : text.parse_int(extr(" postid-", " ")),
+            "slug"    : extr(" post-", '"'),
+            "tags"    : extr('id="tagsHead">', "</ul>"),
+            "date"    : text.parse_datetime(extr(
+                "<li>Posted: ", "<"), "%Y-%m-%d"),
+        }
+
+        if "/videos/" in url:
+            post["type"] = "video"
+            post["width"] = post["height"] = 0
+            post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
+            post["score"] = text.parse_float(extr("<strong>", "<"))
+            post["file_url"] = extr('<source src="', '"')
+        else:
+            post["type"] = "image"
+            post["width"] = text.parse_int(extr("<li>Size: ", " "))
+            post["height"] = text.parse_int(extr("x ", "<"))
+            post["file_url"] = extr('a href="', '"')
+            post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
+            post["score"] = text.parse_float(extr("<strong>", "<"))
+
+            if doujin := extr('<a id="prev-page"', "</div></div><"):
+                repl = text.re(r"-220x\d+\.").sub
+                post["file_urls"] = [
+                    repl(".", url)
+                    for url in text.extract_iter(
+                        doujin, 'class="border" src="', '"')
+                ]
+
+        tags = collections.defaultdict(list)
+        pattern = text.re(r'<li><a class="([^"]*)" href="[^"]*">([^<]+)')
+        for tag_type, tag_name in pattern.findall(post["tags"]):
+            tags[tag_type].append(tag_name)
+        post["tags"] = tags_list = []
+        for key, value in tags.items():
+            tags_list.extend(value)
+            post[f"tags_{key}" if key else "tags_general"] = value
+
+        return post
+
+    def _pagination(self, endpoint):
+        base = f"{self.root}{endpoint}"
+        pnum = self.page_start
+
+        while True:
+            url = base if pnum < 2 else f"{base}page/{pnum}/"
+            page = self.request(url).text
+
+            yield from text.extract_iter(text.extr(
+                page, 'id="thumbContainer"', "<script"), ' href="', '"')
+
+            if 'class="next"' not in page:
+                return
+            pnum += 1
+
+
+class ThehentaiworldPostExtractor(ThehentaiworldExtractor):
+    subcategory = "post"
+    pattern = (rf"{BASE_PATTERN}"
+               rf"(/(?:(?:3d-cgi-)?hentai-image|video)s/([^/?#]+))")
+    example = "https://thehentaiworld.com/hentai-images/SLUG/"
+
+    def posts(self):
+        return (f"{self.root}{self.groups[0]}/",)
+
+
+class ThehentaiworldTagExtractor(ThehentaiworldExtractor):
+    subcategory = "tag"
+    per_page = 24
+    page_start = 1
+    post_start = 0
+    directory_fmt = ("{category}", "{search_tags}")
+    pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)"
+    example = "https://thehentaiworld.com/tag/TAG/"
+
+    def posts(self):
+        self.kwdict["search_tags"] = tag = self.groups[0]
+        return util.advance(self._pagination(f"/tag/{tag}/"), self.post_start)
+
+    def skip(self, num):
+        pages, posts = divmod(num, self.per_page)
+        self.page_start += pages
+        self.post_start += posts
+        return num
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ed3cfae..e6c84d1 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -2070,7 +2070,7 @@ class TwitterAPI():
                         quoted = tweet["quoted_status_result"]["result"]
                         quoted["legacy"]["quoted_by"] = (
                             tweet["core"]["user_results"]["result"]
-                            ["legacy"]["screen_name"])
+                            ["core"]["screen_name"])
                         quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
                         quoted["sortIndex"] = entry.get("sortIndex")
 
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index e53ecf4..294fc57 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -51,8 +51,16 @@ class VipergirlsExtractor(Extractor):
                 like = False
 
         posts = root.iter("post")
-        if self.page:
-            util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+        if (order := self.config("order-posts")) and \
+                order[0] not in ("d", "r"):
+            if self.page:
+                util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+        else:
+            posts = list(posts)
+            if self.page:
+                offset = text.parse_int(self.page[5:]) * 15
+                posts = posts[:offset]
+            posts.reverse()
 
         for post in posts:
             images = list(post)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 9d98e68..9369e5d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -489,9 +489,6 @@ class DownloadJob(Job):
 
             self.extractor.cookies_store()
 
-            if "finalize" in hooks:
-                for callback in hooks["finalize"]:
-                    callback(pathfmt)
             if self.status:
                 if "finalize-error" in hooks:
                     for callback in hooks["finalize-error"]:
@@ -500,6 +497,9 @@ class DownloadJob(Job):
                 if "finalize-success" in hooks:
                     for callback in hooks["finalize-success"]:
                         callback(pathfmt)
+            if "finalize" in hooks:
+                for callback in hooks["finalize"]:
+                    callback(pathfmt)
 
     def handle_skip(self):
         pathfmt = self.pathfmt
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index 8da8417..9992c56 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -54,7 +54,11 @@ class PostProcessor():
             else:
                 self.log.debug(
                     "Using %s archive '%s'", self.name, archive_path)
+                job.register_hooks({"finalize": self._close_archive})
                 return True
 
         self.archive = None
         return False
+
+    def _close_archive(self, _):
+        self.archive.close()
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index c74f92f..a6d2b7f 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -45,6 +45,15 @@ class MetadataPP(PostProcessor):
                 cfmt = "\n".join(cfmt) + "\n"
             self._content_fmt = formatter.parse(cfmt).format_map
             ext = "txt"
+        elif mode == "print":
+            nl = "\n"
+            if isinstance(cfmt, list):
+                cfmt = f"{nl.join(cfmt)}{nl}"
+            if cfmt[-1] != nl and (cfmt[0] != "\f" or cfmt[1] == "F"):
+                cfmt = f"{cfmt}{nl}"
+            self.write = self._write_custom
+            self._content_fmt = formatter.parse(cfmt).format_map
+            filename = "-"
         elif mode == "jsonl":
             self.write = self._write_json
             self._json_encode = self._make_encoder(options).encode
diff --git a/gallery_dl/postprocessor/python.py b/gallery_dl/postprocessor/python.py
index db71da2..66d9343 100644
--- a/gallery_dl/postprocessor/python.py
+++ b/gallery_dl/postprocessor/python.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2023 Mike Fährmann
+# Copyright 2023-2025 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -17,13 +17,14 @@ class PythonPP(PostProcessor):
     def __init__(self, job, options):
         PostProcessor.__init__(self, job)
 
-        spec = options["function"]
-        module_name, _, function_name = spec.rpartition(":")
-        module = util.import_file(module_name)
-        self.function = getattr(module, function_name)
-
-        if self._init_archive(job, options):
-            self.run = self.run_archive
+        mode = options.get("mode")
+        if mode == "eval" or not mode and options.get("expression"):
+            self.function = util.compile_expression(options["expression"])
+        else:
+            spec = options["function"]
+            module_name, _, function_name = spec.rpartition(":")
+            module = util.import_file(module_name)
+            self.function = getattr(module, function_name)
 
         events = options.get("event")
         if events is None:
@@ -32,6 +33,9 @@ class PythonPP(PostProcessor):
             events = events.split(",")
         job.register_hooks({event: self.run for event in events}, options)
 
+        if self._init_archive(job, options):
+            self.run = self.run_archive
+
     def run(self, pathfmt):
         self.function(pathfmt.kwdict)
 
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 7b9ce99..49c1ba8 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -542,6 +542,7 @@ def language_to_code(lang, default=None):
 CODES = {
     "ar": "Arabic",
     "bg": "Bulgarian",
+    "bn": "Bengali",
     "ca": "Catalan",
     "cs": "Czech",
     "da": "Danish",
@@ -549,9 +550,11 @@ CODES = {
     "el": "Greek",
     "en": "English",
     "es": "Spanish",
+    "fa": "Persian",
     "fi": "Finnish",
     "fr": "French",
     "he": "Hebrew",
+    "hi": "Hindi",
     "hu": "Hungarian",
     "id": "Indonesian",
     "it": "Italian",
@@ -564,9 +567,13 @@ CODES = {
     "pt": "Portuguese",
     "ro": "Romanian",
     "ru": "Russian",
+    "sk": "Slovak",
+    "sl": "Slovenian",
+    "sr": "Serbian",
     "sv": "Swedish",
     "th": "Thai",
     "tr": "Turkish",
+    "uk": "Ukrainian",
     "vi": "Vietnamese",
     "zh": "Chinese",
 }
@@ -634,6 +641,12 @@ class NullResponse():
         self.url = url
         self.reason = str(reason)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
     def __str__(self):
         return "900 " + self.reason
 
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 277d679..4861a9d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.30.7"
+__version__ = "1.30.8"
 __variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index cfc6b50..0296498 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -80,7 +80,10 @@ def parse_command_line(module, argv):
     parser, opts, args = module.parseOpts(argv)
 
     ytdlp = hasattr(module, "cookies")
-    std_headers = module.std_headers
+    try:
+        std_headers = module.utils.networking.std_headers
+    except AttributeError:
+        std_headers = module.std_headers
 
     try:
         parse_bytes = module.parse_bytes
@@ -345,7 +348,7 @@ def parse_command_line(module, argv):
         "nopart": opts.nopart,
         "updatetime": opts.updatetime,
         "writedescription": opts.writedescription,
-        "writeannotations": opts.writeannotations,
+        "writeannotations": getattr(opts, "writeannotations", None),
         "writeinfojson": opts.writeinfojson,
         "allow_playlist_files": opts.allow_playlist_files,
         "clean_infojson": opts.clean_infojson,
@@ -378,7 +381,8 @@ def parse_command_line(module, argv):
         "max_views": opts.max_views,
         "daterange": date,
         "cachedir": opts.cachedir,
-        "youtube_print_sig_code": opts.youtube_print_sig_code,
+        "youtube_print_sig_code": getattr(
+            opts, "youtube_print_sig_code", None),
         "age_limit": opts.age_limit,
         "download_archive": download_archive_fn,
         "break_on_existing": getattr(opts, "break_on_existing", None),
@@ -394,8 +398,8 @@ def parse_command_line(module, argv):
         "socket_timeout": opts.socket_timeout,
         "bidi_workaround": opts.bidi_workaround,
         "debug_printtraffic": opts.debug_printtraffic,
-        "prefer_ffmpeg": opts.prefer_ffmpeg,
-        "include_ads": opts.include_ads,
+        "prefer_ffmpeg": getattr(opts, "prefer_ffmpeg", None),
+        "include_ads": getattr(opts, "include_ads", None),
         "default_search": opts.default_search,
         "dynamic_mpd": getattr(opts, "dynamic_mpd", None),
         "extractor_args": getattr(opts, "extractor_args", None),
@@ -420,7 +424,7 @@ def parse_command_line(module, argv):
             opts, "sleep_interval_subtitles", None),
         "external_downloader": opts.external_downloader,
         "playlist_items": opts.playlist_items,
-        "xattr_set_filesize": opts.xattr_set_filesize,
+        "xattr_set_filesize": getattr(opts, "xattr_set_filesize", None),
         "match_filter": match_filter,
         "no_color": getattr(opts, "no_color", None),
         "ffmpeg_location": opts.ffmpeg_location,
@@ -430,7 +434,7 @@ def parse_command_line(module, argv):
             opts, "hls_split_discontinuity", None),
         "external_downloader_args": opts.external_downloader_args,
         "postprocessor_args": opts.postprocessor_args,
-        "cn_verification_proxy": opts.cn_verification_proxy,
+        "cn_verification_proxy": getattr(opts, "cn_verification_proxy", None),
         "geo_verification_proxy": opts.geo_verification_proxy,
         "geo_bypass": getattr(
             opts, "geo_bypass", "default"),
author	Unit 193 <unit193@unit193.net>	2025-09-23 07:44:37 -0400
committer	Unit 193 <unit193@unit193.net>	2025-09-23 07:44:37 -0400
commit	42b62671fabfdcf983a9575221420d85f7fbcac1 (patch)
tree	fa6b2af249a7216aae5c70a926c6d08be1ac55a6 /gallery_dl
parent	3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (diff)