Update upstream source from tag 'upstream/1.30.9'

Update to upstream version '1.30.9' with Debian dir 46cc56e13f05f4465cc64f67b4d7b775a95bd87a
author: Unit 193 <unit193@unit193.net> 2025-10-07 02:11:52 -0400
committer: Unit 193 <unit193@unit193.net> 2025-10-07 02:11:52 -0400
commit: 83e1e051b8c0e622ef5f61c1955c47b4bde95b57 (patch)
tree: 544a434cb398d2adb8b8a2d553dc1c9a44b4ee1d /gallery_dl/extractor
parent: f1612851ae9fe68c7444fb31e786503868aeaa7c (diff)
parent: bbe7fac03d881662a458e7fbf870c9d71f5257f4 (diff)
20 files changed, 801 insertions, 175 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index abdb6cc..a3df634 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -115,11 +115,13 @@ modules = [
     "lynxchan",
     "madokami",
     "mangadex",
+    "mangafire",
     "mangafox",
     "mangahere",
     "manganelo",
     "mangapark",
     "mangaread",
+    "mangareader",
     "mangataro",
     "mangoxo",
     "misskey",
@@ -166,6 +168,7 @@ modules = [
     "rule34us",
     "rule34vault",
     "rule34xyz",
+    "s3ndpics",
     "saint",
     "sankaku",
     "sankakucomplex",
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 67fdb39..1552899 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -40,19 +40,15 @@ class CheveretoExtractor(BaseExtractor):
 BASE_PATTERN = CheveretoExtractor.update({
     "jpgfish": {
         "root": "https://jpg6.su",
-        "pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
-    },
-    "imgkiwi": {
-        "root": "https://img.kiwi",
-        "pattern": r"img\.kiwi",
+        "pattern": r"(?:www\.)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
     },
     "imagepond": {
         "root": "https://imagepond.net",
-        "pattern": r"imagepond\.net",
+        "pattern": r"(?:www\.)?imagepond\.net",
     },
     "imglike": {
         "root": "https://imglike.com",
-        "pattern": r"imglike\.com",
+        "pattern": r"(?:www\.)?imglike\.com",
     },
 })
 
@@ -79,7 +75,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
                     fromhex=True)
 
         file = {
-            "id"   : self.path.rpartition(".")[2],
+            "id"   : self.path.rpartition("/")[2].rpartition(".")[2],
             "url"  : url,
             "album": text.remove_html(extr(
                 "Added to <a", "</a>").rpartition(">")[2]),
@@ -144,7 +140,8 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
 
     def items(self):
         url = self.root + self.path
-        data = {"_extractor": CheveretoImageExtractor}
+        data_image = {"_extractor": CheveretoImageExtractor}
+        data_video = {"_extractor": CheveretoVideoExtractor}
 
         if self.path.endswith("/sub"):
             albums = self._pagination(url)
@@ -152,8 +149,9 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
             albums = (url,)
 
         for album in albums:
-            for image in self._pagination(album):
-                yield Message.Queue, image, data
+            for item_url in self._pagination(album):
+                data = data_video if "/video/" in item_url else data_image
+                yield Message.Queue, item_url, data
 
 
 class CheveretoCategoryExtractor(CheveretoExtractor):
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index fccc466..817d2c4 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -125,8 +125,18 @@ class ImxtoGalleryExtractor(ImagehostImageExtractor):
             "title": text.unescape(title.partition(">")[2]).strip(),
         }
 
-        for url in text.extract_iter(page, "<a href=", " ", pos):
-            yield Message.Queue, url.strip("\"'"), data
+        params = {"page": 1}
+        while True:
+            for url in text.extract_iter(page, "<a href=", " ", pos):
+                if "/i/" in url:
+                    yield Message.Queue, url.strip("\"'"), data
+
+            if 'class="pagination' not in page or \
+                    'class="disabled">Last' in page:
+                return
+
+            params["page"] += 1
+            page = self.request(self.page_url, params=params).text
 
 
 class AcidimgImageExtractor(ImagehostImageExtractor):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 00e06b5..0e6c480 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -39,7 +39,6 @@ class InstagramExtractor(Extractor):
         self.www_claim = "0"
         self.csrf_token = util.generate_token()
         self._find_tags = util.re(r"#\w+").findall
-        self._warn_video_ua = True
         self._logged_in = True
         self._cursor = None
         self._user = None
@@ -52,6 +51,12 @@ class InstagramExtractor(Extractor):
         else:
             self.api = InstagramRestAPI(self)
 
+        self._warn_video = True if self.config("warn-videos", True) else False
+        self._warn_image = (
+            9 if not (wi := self.config("warn-images", True)) else
+            1 if wi in ("all", "both") else
+            0)
+
     def items(self):
         self.login()
 
@@ -172,6 +177,7 @@ class InstagramExtractor(Extractor):
                 "post_id": reel_id,
                 "post_shortcode": shortcode_from_id(reel_id),
                 "post_url": post_url,
+                "type": "story" if expires else "highlight",
             }
             if "title" in post:
                 data["highlight_title"] = post["title"]
@@ -182,7 +188,6 @@ class InstagramExtractor(Extractor):
             data = {
                 "post_id" : post["pk"],
                 "post_shortcode": post["code"],
-                "post_url": f"{self.root}/p/{post['code']}/",
                 "likes": post.get("like_count", 0),
                 "liked": post.get("has_liked", False),
                 "pinned": self._extract_pinned(post),
@@ -239,8 +244,8 @@ class InstagramExtractor(Extractor):
                 manifest = item.get("video_dash_manifest")
                 media = video
 
-                if self._warn_video_ua:
-                    self._warn_video_ua = False
+                if self._warn_video:
+                    self._warn_video = False
                     pattern = text.re(
                         r"Chrome/\d{3,}\.\d+\.\d+\.\d+(?!\d* Mobile)")
                     if not pattern.search(self.session.headers["User-Agent"]):
@@ -250,8 +255,9 @@ class InstagramExtractor(Extractor):
                 video = manifest = None
                 media = image
 
-                if image["width"] < item.get("original_width", 0) or \
-                        image["height"] < item.get("original_height", 0):
+                if self._warn_image < (
+                        (image["width"] < item.get("original_width", 0)) +
+                        (image["height"] < item.get("original_height", 0))):
                     self.log.warning(
                         "%s: Available image resolutions lower than the "
                         "original (%sx%s < %sx%s). "
@@ -278,7 +284,7 @@ class InstagramExtractor(Extractor):
             if manifest is not None:
                 media["_ytdl_manifest_data"] = manifest
             if "owner" in item:
-                media["owner2"] = item["owner"]
+                media["owner"] = item["owner"]
             if "reshared_story_media_author" in item:
                 media["author"] = item["reshared_story_media_author"]
             if "expiring_at" in item:
@@ -287,6 +293,14 @@ class InstagramExtractor(Extractor):
             self._extract_tagged_users(item, media)
             files.append(media)
 
+        if "type" not in data:
+            if len(files) == 1 and files[0]["video_url"]:
+                data["type"] = "reel"
+                data["post_url"] = f"{self.root}/reel/{post['code']}/"
+            else:
+                data["type"] = "post"
+                data["post_url"] = f"{self.root}/p/{post['code']}/"
+
         return data
 
     def _parse_post_graphql(self, post):
@@ -443,6 +457,32 @@ class InstagramExtractor(Extractor):
                 user[key] = 0
 
 
+class InstagramPostExtractor(InstagramExtractor):
+    """Extractor for an Instagram post"""
+    subcategory = "post"
+    pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+               r"/(?:share/()|[^/?#]+/)?(?:p|tv|reels?())/([^/?#]+)")
+    example = "https://www.instagram.com/p/abcdefg/"
+
+    def __init__(self, match):
+        if match[2] is not None:
+            self.subcategory = "reel"
+        InstagramExtractor.__init__(self, match)
+
+    def posts(self):
+        share, reel, shortcode = self.groups
+        if share is not None:
+            url = text.ensure_http_scheme(self.url)
+            headers = {
+                "Sec-Fetch-Dest": "empty",
+                "Sec-Fetch-Mode": "navigate",
+                "Sec-Fetch-Site": "same-origin",
+            }
+            location = self.request_location(url, headers=headers)
+            shortcode = location.split("/")[-2]
+        return self.api.media(shortcode)
+
+
 class InstagramUserExtractor(Dispatch, InstagramExtractor):
     """Extractor for an Instagram user profile"""
     pattern = USER_PATTERN + r"/?(?:$|[?#])"
@@ -740,27 +780,6 @@ class InstagramAvatarExtractor(InstagramExtractor):
         },)
 
 
-class InstagramPostExtractor(InstagramExtractor):
-    """Extractor for an Instagram post"""
-    subcategory = "post"
-    pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
-               r"/(?:share/()|[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)")
-    example = "https://www.instagram.com/p/abcdefg/"
-
-    def posts(self):
-        share, shortcode = self.groups
-        if share is not None:
-            url = text.ensure_http_scheme(self.url)
-            headers = {
-                "Sec-Fetch-Dest": "empty",
-                "Sec-Fetch-Mode": "navigate",
-                "Sec-Fetch-Site": "same-origin",
-            }
-            location = self.request_location(url, headers=headers)
-            shortcode = location.split("/")[-2]
-        return self.api.media(shortcode)
-
-
 class InstagramRestAPI():
 
     def __init__(self, extractor):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index fbed328..30d6848 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -39,7 +39,7 @@ class MangadexExtractor(Extractor):
             data = self._transform(chapter)
             data["_extractor"] = MangadexChapterExtractor
             self._cache[uuid] = data
-            yield Message.Queue, self.root + "/chapter/" + uuid, data
+            yield Message.Queue, f"{self.root}/chapter/{uuid}", data
 
     def _items_manga(self):
         data = {"_extractor": MangadexMangaExtractor}
@@ -51,13 +51,8 @@ class MangadexExtractor(Extractor):
         relationships = defaultdict(list)
         for item in chapter["relationships"]:
             relationships[item["type"]].append(item)
-        manga = self.api.manga(relationships["manga"][0]["id"])
-        for item in manga["relationships"]:
-            relationships[item["type"]].append(item)
 
         cattributes = chapter["attributes"]
-        mattributes = manga["attributes"]
-
         if lang := cattributes.get("translatedLanguage"):
             lang = lang.partition("-")[0]
 
@@ -66,35 +61,21 @@ class MangadexExtractor(Extractor):
         else:
             chnum, sep, minor = 0, "", ""
 
-        data = {
-            "manga"   : (mattributes["title"].get("en") or
-                         next(iter(mattributes["title"].values()))),
-            "manga_id": manga["id"],
+        return {
+            **_manga_info(self, relationships["manga"][0]["id"]),
             "title"   : cattributes["title"],
             "volume"  : text.parse_int(cattributes["volume"]),
             "chapter" : text.parse_int(chnum),
-            "chapter_minor": sep + minor,
+            "chapter_minor": f"{sep}{minor}",
             "chapter_id": chapter["id"],
             "date"    : text.parse_datetime(cattributes["publishAt"]),
+            "group"   : [group["attributes"]["name"]
+                         for group in relationships["scanlation_group"]],
             "lang"    : lang,
-            "language": util.code_to_language(lang),
             "count"   : cattributes["pages"],
             "_external_url": cattributes.get("externalUrl"),
         }
 
-        data["artist"] = [artist["attributes"]["name"]
-                          for artist in relationships["artist"]]
-        data["author"] = [author["attributes"]["name"]
-                          for author in relationships["author"]]
-        data["group"] = [group["attributes"]["name"]
-                         for group in relationships["scanlation_group"]]
-
-        data["status"] = mattributes["status"]
-        data["tags"] = [tag["attributes"]["name"]["en"]
-                        for tag in mattributes["tags"]]
-
-        return data
-
 
 class MangadexCoversExtractor(MangadexExtractor):
     """Extractor for mangadex manga covers"""
@@ -103,7 +84,7 @@ class MangadexCoversExtractor(MangadexExtractor):
     filename_fmt = "{volume:>02}_{lang}.{extension}"
     archive_fmt = "c_{cover_id}"
     pattern = (rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
-               r"(?:/[^/?#]+)?\?tab=art")
+               rf"(?:/[^/?#]+)?\?tab=art")
     example = ("https://mangadex.org/title"
                "/01234567-89ab-cdef-0123-456789abcdef?tab=art")
 
@@ -121,24 +102,10 @@ class MangadexCoversExtractor(MangadexExtractor):
         relationships = defaultdict(list)
         for item in cover["relationships"]:
             relationships[item["type"]].append(item)
-        manga = self.api.manga(relationships["manga"][0]["id"])
-        for item in manga["relationships"]:
-            relationships[item["type"]].append(item)
-
         cattributes = cover["attributes"]
-        mattributes = manga["attributes"]
 
         return {
-            "manga"   : (mattributes["title"].get("en") or
-                         next(iter(mattributes["title"].values()))),
-            "manga_id": manga["id"],
-            "status"  : mattributes["status"],
-            "author"  : [author["attributes"]["name"]
-                         for author in relationships["author"]],
-            "artist"  : [artist["attributes"]["name"]
-                         for artist in relationships["artist"]],
-            "tags"    : [tag["attributes"]["name"]["en"]
-                         for tag in mattributes["tags"]],
+            **_manga_info(self, relationships["manga"][0]["id"]),
             "cover"   : cattributes["fileName"],
             "lang"    : cattributes.get("locale"),
             "volume"  : text.parse_int(cattributes["volume"]),
@@ -150,7 +117,7 @@ class MangadexCoversExtractor(MangadexExtractor):
 class MangadexChapterExtractor(MangadexExtractor):
     """Extractor for manga-chapters from mangadex.org"""
     subcategory = "chapter"
-    pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
+    pattern = rf"{BASE_PATTERN}/chapter/([0-9a-f-]+)"
     example = ("https://mangadex.org/chapter"
                "/01234567-89ab-cdef-0123-456789abcdef")
 
@@ -177,13 +144,13 @@ class MangadexChapterExtractor(MangadexExtractor):
             "page-reverse") else enumerate
         for data["page"], page in enum(chapter["data"], 1):
             text.nameext_from_url(page, data)
-            yield Message.Url, base + page, data
+            yield Message.Url, f"{base}{page}", data
 
 
 class MangadexMangaExtractor(MangadexExtractor):
     """Extractor for manga from mangadex.org"""
     subcategory = "manga"
-    pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
+    pattern = rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
     example = ("https://mangadex.org/title"
                "/01234567-89ab-cdef-0123-456789abcdef")
 
@@ -194,7 +161,7 @@ class MangadexMangaExtractor(MangadexExtractor):
 class MangadexFeedExtractor(MangadexExtractor):
     """Extractor for chapters from your Updates Feed"""
     subcategory = "feed"
-    pattern = BASE_PATTERN + r"/titles?/feed$()"
+    pattern = rf"{BASE_PATTERN}/titles?/feed$()"
     example = "https://mangadex.org/title/feed"
 
     def chapters(self):
@@ -204,7 +171,7 @@ class MangadexFeedExtractor(MangadexExtractor):
 class MangadexFollowingExtractor(MangadexExtractor):
     """Extractor for followed manga from your Library"""
     subcategory = "following"
-    pattern = BASE_PATTERN + r"/titles?/follows(?:\?([^#]+))?$"
+    pattern = rf"{BASE_PATTERN}/titles?/follows(?:\?([^#]+))?$"
     example = "https://mangadex.org/title/follows"
 
     items = MangadexExtractor._items_manga
@@ -216,8 +183,8 @@ class MangadexFollowingExtractor(MangadexExtractor):
 class MangadexListExtractor(MangadexExtractor):
     """Extractor for mangadex MDLists"""
     subcategory = "list"
-    pattern = (BASE_PATTERN +
-               r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
+    pattern = (rf"{BASE_PATTERN}"
+               rf"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
     example = ("https://mangadex.org/list"
                "/01234567-89ab-cdef-0123-456789abcdef/NAME")
 
@@ -242,7 +209,7 @@ class MangadexListExtractor(MangadexExtractor):
 class MangadexAuthorExtractor(MangadexExtractor):
     """Extractor for mangadex authors"""
     subcategory = "author"
-    pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)"
+    pattern = rf"{BASE_PATTERN}/author/([0-9a-f-]+)"
     example = ("https://mangadex.org/author"
                "/01234567-89ab-cdef-0123-456789abcdef/NAME")
 
@@ -280,30 +247,30 @@ class MangadexAPI():
                      else text.ensure_http_scheme(server).rstrip("/"))
 
     def athome_server(self, uuid):
-        return self._call("/at-home/server/" + uuid)
+        return self._call(f"/at-home/server/{uuid}")
 
     def author(self, uuid, manga=False):
         params = {"includes[]": ("manga",)} if manga else None
-        return self._call("/author/" + uuid, params)["data"]
+        return self._call(f"/author/{uuid}", params)["data"]
 
     def chapter(self, uuid):
         params = {"includes[]": ("scanlation_group",)}
-        return self._call("/chapter/" + uuid, params)["data"]
+        return self._call(f"/chapter/{uuid}", params)["data"]
 
     def covers_manga(self, uuid):
         params = {"manga[]": uuid}
         return self._pagination_covers("/cover", params)
 
     def list(self, uuid):
-        return self._call("/list/" + uuid, None, True)["data"]
+        return self._call(f"/list/{uuid}", None, True)["data"]
 
     def list_feed(self, uuid):
-        return self._pagination_chapters("/list/" + uuid + "/feed", None, True)
+        return self._pagination_chapters(f"/list/{uuid}/feed", None, True)
 
     @memcache(keyarg=1)
     def manga(self, uuid):
         params = {"includes[]": ("artist", "author")}
-        return self._call("/manga/" + uuid, params)["data"]
+        return self._call(f"/manga/{uuid}", params)["data"]
 
     def manga_author(self, uuid_author):
         params = {"authorOrArtist": uuid_author}
@@ -315,7 +282,7 @@ class MangadexAPI():
             "order[volume]" : order,
             "order[chapter]": order,
         }
-        return self._pagination_chapters("/manga/" + uuid + "/feed", params)
+        return self._pagination_chapters(f"/manga/{uuid}/feed", params)
 
     def user_follows_manga(self):
         params = {"contentRating": None}
@@ -366,17 +333,17 @@ class MangadexAPI():
             _refresh_token_cache.update(
                 (username, "personal"), data["refresh_token"])
 
-        return "Bearer " + access_token
+        return f"Bearer {access_token}"
 
     @cache(maxage=900, keyarg=1)
     def _authenticate_impl_legacy(self, username, password):
         if refresh_token := _refresh_token_cache(username):
             self.extractor.log.info("Refreshing access token")
-            url = self.root + "/auth/refresh"
+            url = f"{self.root}/auth/refresh"
             json = {"token": refresh_token}
         else:
             self.extractor.log.info("Logging in as %s", username)
-            url = self.root + "/auth/login"
+            url = f"{self.root}/auth/login"
             json = {"username": username, "password": password}
 
         self.extractor.log.debug("Using legacy login method")
@@ -387,10 +354,10 @@ class MangadexAPI():
 
         if refresh_token != data["token"]["refresh"]:
             _refresh_token_cache.update(username, data["token"]["refresh"])
-        return "Bearer " + data["token"]["session"]
+        return f"Bearer {data['token']['session']}"
 
     def _call(self, endpoint, params=None, auth=False):
-        url = self.root + endpoint
+        url = f"{self.root}{endpoint}"
         headers = self.headers_auth if auth else self.headers
 
         while True:
@@ -470,3 +437,33 @@ class MangadexAPI():
 @cache(maxage=90*86400, keyarg=0)
 def _refresh_token_cache(username):
     return None
+
+
+@memcache(keyarg=1)
+def _manga_info(self, uuid):
+    manga = self.api.manga(uuid)
+
+    rel = defaultdict(list)
+    for item in manga["relationships"]:
+        rel[item["type"]].append(item)
+    mattr = manga["attributes"]
+
+    return {
+        "manga" : (mattr["title"].get("en") or
+                   next(iter(mattr["title"].values()))),
+        "manga_id": manga["id"],
+        "manga_titles": [t.popitem()[1]
+                         for t in mattr.get("altTitles") or ()],
+        "manga_date"  : text.parse_datetime(mattr.get("createdAt")),
+        "description" : (mattr["description"].get("en") or
+                         next(iter(mattr["description"].values()))),
+        "demographic": mattr.get("publicationDemographic"),
+        "origin": mattr.get("originalLanguage"),
+        "status": mattr.get("status"),
+        "year"  : mattr.get("year"),
+        "rating": mattr.get("contentRating"),
+        "links" : mattr.get("links"),
+        "tags"  : [tag["attributes"]["name"]["en"] for tag in mattr["tags"]],
+        "artist": [artist["attributes"]["name"] for artist in rel["artist"]],
+        "author": [author["attributes"]["name"] for author in rel["author"]],
+    }
diff --git a/gallery_dl/extractor/mangafire.py b/gallery_dl/extractor/mangafire.py
new file mode 100644
index 0000000..5ccb732
--- /dev/null
+++ b/gallery_dl/extractor/mangafire.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangafire.to/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, exception
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to"
+
+
+class MangafireBase():
+    """Base class for mangafire extractors"""
+    category = "mangafire"
+    root = "https://mangafire.to"
+
+
+class MangafireChapterExtractor(MangafireBase, ChapterExtractor):
+    """Extractor for mangafire manga chapters"""
+    directory_fmt = (
+        "{category}", "{manga}",
+        "{volume:?v/ />02}{chapter:?c//>03}{chapter_minor:?//}{title:?: //}")
+    filename_fmt = (
+        "{manga}{volume:?_v//>02}{chapter:?_c//>03}{chapter_minor:?//}_"
+        "{page:>03}.{extension}")
+    archive_fmt = (
+        "{manga_id}_{chapter_id}_{page}")
+    pattern = (rf"{BASE_PATTERN}/read/([\w-]+\.(\w+))/([\w-]+)"
+               rf"/((chapter|volume)-\d+(?:\D.*)?)")
+    example = "https://mangafire.to/read/MANGA.ID/LANG/chapter-123"
+
+    def metadata(self, _):
+        manga_path, manga_id, lang, chapter_info, self.type = self.groups
+
+        try:
+            chapters = _manga_chapters(self, (manga_id, self.type, lang))
+            anchor = chapters[chapter_info]
+        except KeyError:
+            raise exception.NotFoundError("chapter")
+        self.chapter_id = text.extr(anchor, 'data-id="', '"')
+
+        return {
+            **_manga_info(self, manga_path),
+            **_chapter_info(anchor),
+        }
+
+    def images(self, page):
+        url = f"{self.root}/ajax/read/{self.type}/{self.chapter_id}"
+        headers = {"x-requested-with": "XMLHttpRequest"}
+        data = self.request_json(url, headers=headers)
+
+        return [
+            (image[0], None)
+            for image in data["result"]["images"]
+        ]
+
+
+class MangafireMangaExtractor(MangafireBase, MangaExtractor):
+    """Extractor for mangafire manga"""
+    chapterclass = MangafireChapterExtractor
+    pattern = rf"{BASE_PATTERN}/manga/([\w-]+)\.(\w+)"
+    example = "https://mangafire.to/manga/MANGA.ID"
+
+    def chapters(self, page):
+        manga_slug, manga_id = self.groups
+        lang = self.config("lang") or "en"
+
+        manga = _manga_info(self, f"{manga_slug}.{manga_id}")
+        chapters = _manga_chapters(self, (manga_id, "chapter", lang))
+
+        return [
+            (f"""{self.root}{text.extr(anchor, 'href="', '"')}""", {
+                **manga,
+                **_chapter_info(anchor),
+            })
+            for anchor in chapters.values()
+        ]
+
+
+@memcache(keyarg=1)
+def _manga_info(self, manga_path, page=None):
+    if page is None:
+        url = f"{self.root}/manga/{manga_path}"
+        page = self.request(url).text
+    slug, _, mid = manga_path.rpartition(".")
+
+    extr = text.extract_from(page)
+    manga = {
+        "cover": text.extr(extr(
+            'class="poster">', '</div>'), 'src="', '"'),
+        "status": extr("<p>", "<").replace("_", " ").title(),
+        "manga"     : text.unescape(extr(
+            'itemprop="name">', "<")),
+        "manga_id": mid,
+        "manga_slug": slug,
+        "manga_titles": text.unescape(extr(
+            "<h6>", "<")).split("; "),
+        "type": text.remove_html(extr(
+            'class="min-info">', "</a>")),
+        "author": text.unescape(text.remove_html(extr(
+            "<span>Author:</span>", "</div>"))).split(" , "),
+        "published": text.remove_html(extr(
+            "<span>Published:</span>", "</div>")),
+        "tags": text.split_html(extr(
+            "<span>Genres:</span>", "</div>"))[::2],
+        "publisher": text.unescape(text.remove_html(extr(
+            "<span>Mangazines:</span>", "</div>"))).split(" , "),
+        "score": text.parse_float(text.remove_html(extr(
+            'class="score">', " / "))),
+        "description": text.remove_html(extr(
+            'id="synopsis">', "<script>")),
+    }
+
+    if len(lst := manga["author"]) == 1 and not lst[0]:
+        manga["author"] = ()
+    if len(lst := manga["publisher"]) == 1 and not lst[0]:
+        manga["publisher"] = ()
+
+    return manga
+
+
+@memcache(keyarg=1)
+def _manga_chapters(self, manga_info):
+    manga_id, type, lang = manga_info
+    url = f"{self.root}/ajax/read/{manga_id}/{type}/{lang}"
+    headers = {"x-requested-with": "XMLHttpRequest"}
+    data = self.request_json(url, headers=headers)
+
+    needle = f"{manga_id}/{lang}/"
+    return {
+        text.extr(anchor, needle, '"'): anchor
+        for anchor in text.extract_iter(data["result"]["html"], "<a ", ">")
+    }
+
+
+@memcache(keyarg=0)
+def _chapter_info(info):
+    _, lang, chapter_info = text.extr(info, 'href="', '"').rsplit("/", 2)
+
+    if chapter_info.startswith("vol"):
+        volume = text.extr(info, 'data-number="', '"')
+        volume_id = text.parse_int(text.extr(info, 'data-id="', '"'))
+        return {
+            "volume"        : text.parse_int(volume),
+            "volume_id"     : volume_id,
+            "chapter"       : 0,
+            "chapter_minor" : "",
+            "chapter_string": chapter_info,
+            "chapter_id"    : volume_id,
+            "title"         : text.unescape(text.extr(info, 'title="', '"')),
+            "lang"          : lang,
+        }
+
+    chapter, sep, minor = text.extr(info, 'data-number="', '"').partition(".")
+    return {
+        "chapter"       : text.parse_int(chapter),
+        "chapter_minor" : f"{sep}{minor}",
+        "chapter_string": chapter_info,
+        "chapter_id"    : text.parse_int(text.extr(info, 'data-id="', '"')),
+        "title"         : text.unescape(text.extr(info, 'title="', '"')),
+        "lang"          : lang,
+    }
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
new file mode 100644
index 0000000..eb53998
--- /dev/null
+++ b/gallery_dl/extractor/mangareader.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangareader.to/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, util
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangareader\.to"
+
+
+class MangareaderBase():
+    """Base class for mangareader extractors"""
+    category = "mangareader"
+    root = "https://mangareader.to"
+
+
+class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
+    """Extractor for mangareader manga chapters"""
+    directory_fmt = (
+        "{category}", "{manga}",
+        "{volume:?v/ />02}{chapter:?c//>03}{chapter_minor:?//}{title:?: //}")
+    filename_fmt = (
+        "{manga}{volume:?_v//>02}{chapter:?_c//>03}{chapter_minor:?//}_"
+        "{page:>03}.{extension}")
+    archive_fmt = (
+        "{manga_id}_{chapter_id}_{page}")
+    pattern = (rf"{BASE_PATTERN}/read/([\w-]+-\d+)/([^/?#]+)"
+               rf"/(chapter|volume)-(\d+[^/?#]*)")
+    example = "https://mangareader.to/read/MANGA-123/LANG/chapter-123"
+
+    def metadata(self, _):
+        path, lang, type, chstr = self.groups
+
+        settings = util.json_dumps({
+            "readingMode"     : "vertical",
+            "readingDirection": "rtl",
+            "quality"         : "high",
+        })
+        self.cookies.set("mr_settings", settings, domain="mangareader.to")
+
+        url = f"{self.root}/read/{path}/{lang}/{type}-{chstr}"
+        page = self.request(url).text
+        self.cid = cid = text.extr(page, 'data-reading-id="', '"')
+
+        manga = _manga_info(self, path)
+        return {
+            **manga,
+            **manga[f"_{type}s"][lang][chstr],
+            "chapter_id": text.parse_int(cid),
+        }
+
+    def images(self, page):
+        key = "chap" if self.groups[2] == "chapter" else "vol"
+        url = f"{self.root}/ajax/image/list/{key}/{self.cid}"
+        params = {
+            "mode"       : "vertical,",
+            "quality"    : "high,",
+            "hozPageSize": "1,",
+        }
+        headers = {
+            "X-Requested-With": "XMLHttpRequest",
+            "Sec-Fetch-Dest"  : "empty",
+            "Sec-Fetch-Mode"  : "cors",
+            "Sec-Fetch-Site"  : "same-origin",
+        }
+        html = self.request_json(url, params=params, headers=headers)["html"]
+
+        return [
+            (url, None)
+            for url in text.extract_iter(html, 'data-url="', '"')
+        ]
+
+
+class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
+    """Extractor for mangareader manga"""
+    chapterclass = MangareaderChapterExtractor
+    pattern = rf"{BASE_PATTERN}/([\w-]+-\d+)"
+    example = "https://mangareader.to/MANGA-123"
+
+    def chapters(self, page):
+        manga = _manga_info(self, self.groups[0])
+        lang = self.config("lang") or "en"
+
+        return [
+            (info["chapter_url"], {**manga, **info})
+            for info in manga["_chapters"][lang].values()
+        ]
+
+
+@memcache(keyarg=1)
+def _manga_info(self, manga_path):
+    url = f"{self.root}/{manga_path}"
+    html = self.request(url).text
+
+    slug, _, mid = manga_path.rpartition("-")
+    extr = text.extract_from(html)
+    url = extr('property="og:url" content="', '"')
+    manga = {
+        "manga_url": url,
+        "manga_slug": url.rpartition("/")[2].rpartition("-")[0],
+        "manga_id": text.parse_int(mid),
+        "manga": text.unescape(extr('class="manga-name">', "<")),
+        "manga_alt": text.unescape(extr('class="manga-name-or">', "<")),
+        "tags": text.split_html(extr('class="genres">', "</div>")),
+        "type": text.remove_html(extr('>Type:', "</div>")),
+        "status": text.remove_html(extr('>Status:', "</div>")),
+        "author": text.split_html(extr('>Authors:', "</div>"))[0::2],
+        "published": text.remove_html(extr('>Published:', "</div>")),
+        "score": text.parse_float(text.remove_html(extr(
+            '>Score:', "</div>"))),
+        "views": text.parse_int(text.remove_html(extr(
+            '>Views:', "</div>")).replace(",", "")),
+    }
+
+    base = self.root
+
+    # extract all chapters
+    html = extr('class="chapters-list-ul">', "    </div>")
+    manga["_chapters"] = chapters = {}
+    for group in text.extract_iter(html, "<ul", "</ul>"):
+        lang = text.extr(group, ' id="', '-chapters"')
+
+        chapters[lang] = current = {}
+        lang = lang.partition("-")[0]
+        for ch in text.extract_iter(group, "<li ", "</li>"):
+            path = text.extr(ch, 'href="', '"')
+            chap = text.extr(ch, 'data-number="', '"')
+            name = text.unescape(text.extr(ch, 'class="name">', "<"))
+
+            chapter, sep, minor = chap.partition(".")
+            current[chap] = {
+                "title"         : name.partition(":")[2].strip(),
+                "chapter"       : text.parse_int(chapter),
+                "chapter_minor" : f"{sep}{minor}",
+                "chapter_string": chap,
+                "chapter_url"   : f"{base}{path}",
+                "lang"          : lang,
+            }
+
+    # extract all volumes
+    html = extr('class="volume-list-ul">', "</section>")
+    manga["_volumes"] = volumes = {}
+    for group in html.split('<div class="manga_list-wrap')[1:]:
+        lang = text.extr(group, ' id="', '-volumes"')
+
+        volumes[lang] = current = {}
+        lang = lang.partition("-")[0]
+        for vol in text.extract_iter(group, 'class="item">', "</div>"):
+            path = text.extr(vol, 'href="', '"')
+            voln = text.extr(vol, 'tick-vol">', '<').rpartition(" ")[2]
+
+            current[voln] = {
+                "volume"        : text.parse_int(voln),
+                "volume_cover"  : text.extr(vol, ' src="', '"'),
+                "chapter"       : 0,
+                "chapter_minor" : "",
+                "chapter_string": voln,
+                "chapter_url"   : f"{base}{path}",
+                "lang"          : lang,
+            }
+
+    # extract remaining metadata
+    manga["description"] = text.unescape(extr(
+        'class="description-modal">', "</div>")).strip()
+
+    return manga
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
index 5ff601a..42eaeef 100644
--- a/gallery_dl/extractor/misskey.py
+++ b/gallery_dl/extractor/misskey.py
@@ -25,8 +25,8 @@ class MisskeyExtractor(BaseExtractor):
     def _init(self):
         self.api = MisskeyAPI(self)
         self.instance = self.root.rpartition("://")[2]
-        self.renotes = self.config("renotes", False)
-        self.replies = self.config("replies", True)
+        self.renotes = True if self.config("renotes", False) else False
+        self.replies = True if self.config("replies", True) else False
 
     def items(self):
         for note in self.notes():
@@ -254,6 +254,8 @@ class MisskeyAPI():
 
     def _pagination(self, endpoint, data):
         data["limit"] = 100
+        data["withRenotes"] = self.extractor.renotes
+
         while True:
             notes = self._call(endpoint, data)
             if not notes:
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 21c361c..528aff2 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -173,7 +173,7 @@ class NozomiSearchExtractor(NozomiExtractor):
 
         for tag in self.tags:
             (negative if tag[0] == "-" else positive).append(
-                tag.replace("/", ""))
+                text.quote(tag.replace("/", "")))
 
         for tag in positive:
             ids = nozomi("nozomi/" + tag)
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 5245f31..490243a 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -9,7 +9,7 @@
 """Extractors for https://rule34.paheal.net/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, exception
 
 
 class PahealExtractor(Extractor):
@@ -97,7 +97,12 @@ class PahealTagExtractor(PahealExtractor):
         base = f"{self.root}/post/list/{self.groups[0]}/"
 
         while True:
-            page = self.request(base + str(pnum)).text
+            try:
+                page = self.request(f"{base}{pnum}").text
+            except exception.HttpError as exc:
+                if exc.status == 404:
+                    return
+                raise
 
             pos = page.find("id='image-list'")
             for post in text.extract_iter(
@@ -146,4 +151,9 @@ class PahealPostExtractor(PahealExtractor):
     example = "https://rule34.paheal.net/post/view/12345"
 
     def get_posts(self):
-        return (self._extract_post(self.groups[0]),)
+        try:
+            return (self._extract_post(self.groups[0]),)
+        except exception.HttpError as exc:
+            if exc.status == 404:
+                return ()
+            raise
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index fb2f32c..cf1a6d6 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -230,6 +230,16 @@ class PatreonExtractor(Extractor):
             attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
         return attr
 
+    def _collection(self, collection_id):
+        url = f"{self.root}/api/collection/{collection_id}"
+        data = self.request_json(url)
+        coll = data["data"]
+        attr = coll["attributes"]
+        attr["id"] = coll["id"]
+        attr["date"] = text.parse_datetime(
+            attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+        return attr
+
     def _filename(self, url):
         """Fetch filename from an URL's Content-Disposition header"""
         response = self.request(url, method="HEAD", fatal=False)
@@ -333,6 +343,33 @@ class PatreonExtractor(Extractor):
         raise exception.AbortExtraction("Unable to extract bootstrap data")
 
 
+class PatreonCollectionExtractor(PatreonExtractor):
+    """Extractor for a patreon collection"""
+    subcategory = "collection"
+    directory_fmt = ("{category}", "{creator[full_name]}",
+                     "Collections", "{collection[title]} ({collection[id]})")
+    pattern = r"(?:https?://)?(?:www\.)?patreon\.com/collection/(\d+)"
+    example = "https://www.patreon.com/collection/12345"
+
+    def posts(self):
+        collection_id = self.groups[0]
+        self.kwdict["collection"] = collection = \
+            self._collection(collection_id)
+        campaign_id = text.extr(
+            collection["thumbnail"]["url"], "/campaign/", "/")
+
+        url = self._build_url("posts", (
+            # patreon returns '400 Bad Request' without campaign_id filter
+            f"&filter[campaign_id]={campaign_id}"
+            "&filter[contains_exclusive_posts]=true"
+            "&filter[is_draft]=false"
+            f"&filter[collection_id]={collection_id}"
+            "&filter[include_drops]=true"
+            "&sort=collection_order"
+        ))
+        return self._pagination(url)
+
+
 class PatreonCreatorExtractor(PatreonExtractor):
     """Extractor for a creator's works"""
     subcategory = "creator"
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a72042c..6276a2a 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1232,7 +1232,7 @@ class PixivAppAPI():
         params = {"word": word, "search_target": target,
                   "sort": sort, "duration": duration,
                   "start_date": date_start, "end_date": date_end}
-        return self._pagination("/v1/search/illust", params)
+        return self._pagination_search("/v1/search/illust", params)
 
     def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
         """Return illusts bookmarked by a user"""
@@ -1322,6 +1322,48 @@ class PixivAppAPI():
             params = text.parse_query(query)
             data = self._call(endpoint, params)
 
+    def _pagination_search(self, endpoint, params):
+        sort = params["sort"]
+        if sort == "date_desc":
+            date_key = "end_date"
+            date_off = timedelta(days=1)
+            date_cmp = lambda lhs, rhs: lhs >= rhs  # noqa E731
+        elif sort == "date_asc":
+            date_key = "start_date"
+            date_off = timedelta(days=-1)
+            date_cmp = lambda lhs, rhs: lhs <= rhs  # noqa E731
+        else:
+            date_key = None
+        date_last = None
+
+        while True:
+            data = self._call(endpoint, params)
+
+            if date_last is None:
+                yield from data["illusts"]
+            else:
+                works = data["illusts"]
+                if date_cmp(date_last, works[-1]["create_date"]):
+                    for work in works:
+                        if date_last is None:
+                            yield work
+                        elif date_cmp(date_last, work["create_date"]):
+                            date_last = None
+
+            if not (next_url := data.get("next_url")):
+                return
+            query = next_url.rpartition("?")[2]
+            params = text.parse_query(query)
+
+            if date_key and text.parse_int(params.get("offset")) >= 5000:
+                date_last = data["illusts"][-1]["create_date"]
+                date_val = (text.parse_datetime(
+                    date_last) + date_off).strftime("%Y-%m-%d")
+                self.log.info("Reached 'offset' >= 5000; "
+                              "Updating '%s' to '%s'", date_key, date_val)
+                params[date_key] = date_val
+                params.pop("offset", None)
+
 
 @cache(maxage=36500*86400, keyarg=0)
 def _refresh_token_cache(username):
diff --git a/gallery_dl/extractor/s3ndpics.py b/gallery_dl/extractor/s3ndpics.py
new file mode 100644
index 0000000..215f160
--- /dev/null
+++ b/gallery_dl/extractor/s3ndpics.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://s3nd.pics/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?s3nd\.pics"
+
+
+class S3ndpicsExtractor(Extractor):
+    """Base class for s3ndpics extractors"""
+    category = "s3ndpics"
+    root = "https://s3nd.pics"
+    root_api = f"{root}/api"
+    directory_fmt = ("{category}", "{user[username]}",
+                     "{date} {title:?/ /}({id})")
+    filename_fmt = "{num:>02}.{extension}"
+    archive_fmt = "{id}_{num}"
+
+    def items(self):
+        base = "https://s3.s3nd.pics/s3nd-pics/"
+
+        for post in self.posts():
+            post["id"] = post.pop("_id", None)
+            post["user"] = post.pop("userId", None)
+            post["date"] = text.parse_datetime(
+                post["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+            post["date_updated"] = text.parse_datetime(
+                post["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+            files = post.pop("files", ())
+            post["count"] = len(files)
+
+            yield Message.Directory, post
+            for post["num"], file in enumerate(files, 1):
+                post["type"] = file["type"]
+                path = file["url"]
+                text.nameext_from_url(path, post)
+                yield Message.Url, f"{base}{path}", post
+
+    def _pagination(self, url, params):
+        params["page"] = 1
+
+        while True:
+            data = self.request_json(url, params=params)
+
+            self.kwdict["total"] = data["pagination"]["total"]
+            yield from data["posts"]
+
+            if params["page"] >= data["pagination"]["pages"]:
+                return
+            params["page"] += 1
+
+
+class S3ndpicsPostExtractor(S3ndpicsExtractor):
+    subcategory = "post"
+    pattern = rf"{BASE_PATTERN}/post/([0-9a-f]+)"
+    example = "https://s3nd.pics/post/0123456789abcdef01234567"
+
+    def posts(self):
+        url = f"{self.root_api}/posts/{self.groups[0]}"
+        return (self.request_json(url)["post"],)
+
+
+class S3ndpicsUserExtractor(S3ndpicsExtractor):
+    subcategory = "user"
+    pattern = rf"{BASE_PATTERN}/user/(\w+)"
+    example = "https://s3nd.pics/user/USER"
+
+    def posts(self):
+        url = f"{self.root_api}/users/username/{self.groups[0]}"
+        self.kwdict["user"] = user = self.request_json(url)["user"]
+
+        url = f"{self.root_api}/posts"
+        params = {
+            "userId": user["_id"],
+            "limit" : "12",
+            "sortBy": "newest",
+        }
+        return self._pagination(url, params)
+
+
+class S3ndpicsSearchExtractor(S3ndpicsExtractor):
+    subcategory = "search"
+    pattern = rf"{BASE_PATTERN}/search/?\?([^#]+)"
+    example = "https://s3nd.pics/search?QUERY"
+
+    def posts(self):
+        url = f"{self.root_api}/posts"
+        params = text.parse_query(self.groups[0])
+        params.setdefault("limit", "20")
+        self.kwdict["search_tags"] = \
+            params.get("tag") or params.get("tags") or params.get("q")
+        return self._pagination(url, params)
diff --git a/gallery_dl/extractor/schalenetwork.py b/gallery_dl/extractor/schalenetwork.py
index dc42417..a4ef3b0 100644
--- a/gallery_dl/extractor/schalenetwork.py
+++ b/gallery_dl/extractor/schalenetwork.py
@@ -62,10 +62,11 @@ class SchalenetworkExtractor(Extractor):
                 pass
             params["page"] += 1
 
-    def _token(self):
+    def _token(self, required=True):
         if token := self.config("token"):
             return f"Bearer {token.rpartition(' ')[2]}"
-        raise exception.AuthRequired("'token'", "your favorites")
+        if required:
+            raise exception.AuthRequired("'token'", "your favorites")
 
     def _crt(self):
         crt = self.config("crt")
@@ -88,7 +89,7 @@ class SchalenetworkExtractor(Extractor):
         else:
             msg = f"{exc.status} {exc.response.reason}"
         raise exception.AuthRequired(
-            "'crt' query parameter & matching '--user-agent'", None, msg)
+            "'crt' query parameter & matching 'user-agent'", None, msg)
 
 
 class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
@@ -114,19 +115,26 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
         10: "mixed",
         11: "language",
         12: "other",
+        13: "reclass",
     }
 
     def metadata(self, _):
         _, gid, gkey = self.groups
+
         url = f"{self.root_api}/books/detail/{gid}/{gkey}"
-        data = self.request_json(url, headers=self.headers)
-        data["date"] = text.parse_timestamp(data["created_at"] // 1000)
+        headers = self.headers
+        data = self.request_json(url, headers=headers)
+
+        try:
+            data["date"] = text.parse_timestamp(data["created_at"] // 1000)
+            data["count"] = len(data["thumbnails"]["entries"])
+            del data["thumbnails"]
+        except Exception:
+            pass
 
         tags = []
         types = self.TAG_TYPES
-        tags_data = data["tags"]
-
-        for tag in tags_data:
+        for tag in data["tags"]:
             name = tag["name"]
             namespace = tag.get("namespace", 0)
             tags.append(types[namespace] + ":" + name)
@@ -134,33 +142,34 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
 
         if self.config("tags", False):
             tags = collections.defaultdict(list)
-            for tag in tags_data    :
+            for tag in data["tags"]:
                 tags[tag.get("namespace", 0)].append(tag["name"])
             for type, values in tags.items():
                 data["tags_" + types[type]] = values
 
+        url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={self._crt()}"
+        if token := self._token(False):
+            headers = headers.copy()
+            headers["Authorization"] = token
         try:
-            data["count"] = len(data["thumbnails"]["entries"])
-            del data["thumbnails"]
-        except Exception:
-            pass
+            data_fmt = self.request_json(
+                url, method="POST", headers=headers)
+        except exception.HttpError as exc:
+            self._require_auth(exc)
+
+        self.fmt = self._select_format(data_fmt["data"])
+        data["source"] = data_fmt.get("source")
 
         return data
 
     def images(self, _):
-        crt = self._crt()
         _, gid, gkey = self.groups
-        url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}"
-        try:
-            data = self.request_json(url, method="POST", headers=self.headers)
-        except exception.HttpError as exc:
-            self._require_auth(exc)
-
-        fmt = self._select_format(data["data"])
+        fmt = self.fmt
 
         url = (f"{self.root_api}/books/data/{gid}/{gkey}"
-               f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}")
-        data = self.request_json(url, headers=self.headers)
+               f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={self._crt()}")
+        headers = self.headers
+        data = self.request_json(url, headers=headers)
         base = data["base"]
 
         results = []
@@ -169,7 +178,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
             info = {
                 "width" : dimensions[0],
                 "height": dimensions[1],
-                "_http_headers": self.headers,
+                "_http_headers": headers,
             }
             results.append((base + entry["path"], info))
         return results
diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py
index 3354289..d8227fa 100644
--- a/gallery_dl/extractor/simpcity.py
+++ b/gallery_dl/extractor/simpcity.py
@@ -92,7 +92,7 @@ class SimpcityExtractor(Extractor):
         author = schema["author"]
         stats = schema["interactionStatistic"]
         url_t = schema["url"]
-        url_a = author["url"]
+        url_a = author.get("url") or ""
 
         thread = {
             "id"   : url_t[url_t.rfind(".")+1:-1],
@@ -104,8 +104,9 @@ class SimpcityExtractor(Extractor):
             "tags" : (schema["keywords"].split(", ")
                       if "keywords" in schema else ()),
             "section"   : schema["articleSection"],
-            "author"    : author["name"],
-            "author_id" : url_a[url_a.rfind(".")+1:-1],
+            "author"    : author.get("name") or "",
+            "author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else
+                           (author.get("name") or "")[15:]),
             "author_url": url_a,
         }
 
diff --git a/gallery_dl/extractor/thehentaiworld.py b/gallery_dl/extractor/thehentaiworld.py
index 055d7d8..9a30654 100644
--- a/gallery_dl/extractor/thehentaiworld.py
+++ b/gallery_dl/extractor/thehentaiworld.py
@@ -60,14 +60,16 @@ class ThehentaiworldExtractor(Extractor):
                 "<li>Posted: ", "<"), "%Y-%m-%d"),
         }
 
-        if "/videos/" in url:
+        if (c := url[27]) == "v":
             post["type"] = "video"
             post["width"] = post["height"] = 0
             post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
             post["score"] = text.parse_float(extr("<strong>", "<"))
             post["file_url"] = extr('<source src="', '"')
         else:
-            post["type"] = "image"
+            post["type"] = ("animated" if c == "g" else
+                            "3d cgi" if c == "3" else
+                            "image")
             post["width"] = text.parse_int(extr("<li>Size: ", " "))
             post["height"] = text.parse_int(extr("x ", "<"))
             post["file_url"] = extr('a href="', '"')
@@ -109,16 +111,6 @@ class ThehentaiworldExtractor(Extractor):
             pnum += 1
 
 
-class ThehentaiworldPostExtractor(ThehentaiworldExtractor):
-    subcategory = "post"
-    pattern = (rf"{BASE_PATTERN}"
-               rf"(/(?:(?:3d-cgi-)?hentai-image|video)s/([^/?#]+))")
-    example = "https://thehentaiworld.com/hentai-images/SLUG/"
-
-    def posts(self):
-        return (f"{self.root}{self.groups[0]}/",)
-
-
 class ThehentaiworldTagExtractor(ThehentaiworldExtractor):
     subcategory = "tag"
     per_page = 24
@@ -137,3 +129,13 @@ class ThehentaiworldTagExtractor(ThehentaiworldExtractor):
         self.page_start += pages
         self.post_start += posts
         return num
+
+
+class ThehentaiworldPostExtractor(ThehentaiworldExtractor):
+    subcategory = "post"
+    pattern = (rf"{BASE_PATTERN}("
+               rf"/(?:video|(?:[\w-]+-)?hentai-image)s/([^/?#]+))")
+    example = "https://thehentaiworld.com/hentai-images/SLUG/"
+
+    def posts(self):
+        return (f"{self.root}{self.groups[0]}/",)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index e6c84d1..e7df4a3 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1026,11 +1026,12 @@ class TwitterTweetExtractor(TwitterExtractor):
             return
 
         while True:
+            parent_id = tweet["rest_id"]
             tweet_id = tweet["legacy"].get("quoted_status_id_str")
             if not tweet_id:
                 break
             tweet = self.api.tweet_result_by_rest_id(tweet_id)
-            tweet["legacy"]["quoted_by_id_str"] = tweet_id
+            tweet["legacy"]["quoted_by_id_str"] = parent_id
             yield tweet
 
     def _tweets_detail(self, tweet_id):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 823e8e0..07bed79 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -86,16 +86,25 @@ class WeiboExtractor(Extractor):
             status["count"] = len(files)
             yield Message.Directory, status
 
-            for num, file in enumerate(files, 1):
-                if file["url"].startswith("http:"):
-                    file["url"] = "https:" + file["url"][5:]
+            num = 0
+            for file in files:
+                url = file["url"]
+                if not url:
+                    continue
+                if url.startswith("http:"):
+                    url = f"https:{url[5:]}"
                 if "filename" not in file:
-                    text.nameext_from_url(file["url"], file)
+                    text.nameext_from_url(url, file)
                     if file["extension"] == "json":
                         file["extension"] = "mp4"
+                if file["extension"] == "m3u8":
+                    url = f"ytdl:{url}"
+                    file["_ytdl_manifest"] = "hls"
+                    file["extension"] = "mp4"
+                num += 1
                 file["status"] = status
                 file["num"] = num
-                yield Message.Url, file["url"], file
+                yield Message.Url, url, file
 
     def _extract_status(self, status, files):
         if "mix_media_info" in status:
@@ -143,10 +152,21 @@ class WeiboExtractor(Extractor):
             media = max(info["playback_list"],
                         key=lambda m: m["meta"]["quality_index"])
         except Exception:
-            return {"url": (info.get("stream_url_hd") or
-                            info.get("stream_url") or "")}
+            video = {"url": (info.get("replay_hd") or
+                             info.get("stream_url_hd") or
+                             info.get("stream_url") or "")}
         else:
-            return media["play_info"].copy()
+            video = media["play_info"].copy()
+
+        if "//wblive-out." in video["url"] and \
+                not text.ext_from_url(video["url"]):
+            try:
+                video["url"] = self.request_location(video["url"])
+            except exception.HttpError as exc:
+                self.log.warning("%s: %s", exc.__class__.__name__, exc)
+                video["url"] = ""
+
+        return video
 
     def _status_by_id(self, status_id):
         url = f"{self.root}/ajax/statuses/show?id={status_id}"
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 00266bd..5ba47d2 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -46,6 +46,12 @@ class WikimediaExtractor(BaseExtractor):
         else:
             self.api_url = None
 
+        # note: image revisions are different from page revisions
+        # ref:
+        # https://www.mediawiki.org/wiki/API:Revisions
+        # https://www.mediawiki.org/wiki/API:Imageinfo
+        self.image_revisions = self.config("image-revisions", 1)
+
     @cache(maxage=36500*86400, keyarg=1)
     def _search_api_path(self, root):
         self.log.debug("Probing possible API endpoints")
@@ -56,7 +62,10 @@ class WikimediaExtractor(BaseExtractor):
                 return url
         raise exception.AbortExtraction("Unable to find API endpoint")
 
-    def prepare(self, image):
+    def prepare_info(self, info):
+        """Adjust the content of an image info object"""
+
+    def prepare_image(self, image):
         """Adjust the content of an image object"""
         image["metadata"] = {
             m["name"]: m["value"]
@@ -74,14 +83,19 @@ class WikimediaExtractor(BaseExtractor):
     def items(self):
         for info in self._pagination(self.params):
             try:
-                image = info["imageinfo"][0]
-            except LookupError:
+                images = info.pop("imageinfo")
+            except KeyError:
                 self.log.debug("Missing 'imageinfo' for %s", info)
-                continue
+                images = ()
+
+            info["count"] = len(images)
+            self.prepare_info(info)
+            yield Message.Directory, info
 
-            self.prepare(image)
-            yield Message.Directory, image
-            yield Message.Url, image["url"], image
+            for info["num"], image in enumerate(images, 1):
+                self.prepare_image(image)
+                image.update(info)
+                yield Message.Url, image["url"], image
 
         if self.subcategories:
             base = self.root + "/wiki/"
@@ -108,6 +122,7 @@ class WikimediaExtractor(BaseExtractor):
             "timestamp|user|userid|comment|canonicaltitle|url|size|"
             "sha1|mime|metadata|commonmetadata|extmetadata|bitdepth"
         )
+        params["iilimit"] = self.image_revisions
 
         while True:
             data = self.request_json(url, params=params)
@@ -237,9 +252,8 @@ class WikimediaArticleExtractor(WikimediaExtractor):
                 "titles"   : path,
             }
 
-    def prepare(self, image):
-        WikimediaExtractor.prepare(self, image)
-        image["page"] = self.title
+    def prepare_info(self, info):
+        info["page"] = self.title
 
 
 class WikimediaWikiExtractor(WikimediaExtractor):
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index e1b4897..98c9331 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -26,6 +26,7 @@ class ZerochanExtractor(BooruExtractor):
     per_page = 250
     cookies_domain = ".zerochan.net"
     cookies_names = ("z_id", "z_hash")
+    useragent = util.USERAGENT
     request_interval = (0.5, 1.5)
 
     def login(self):
@@ -192,7 +193,13 @@ class ZerochanTagExtractor(ZerochanExtractor):
         metadata = self.config("metadata")
 
         while True:
-            page = self.request(url, params=params, expected=(500,)).text
+            try:
+                page = self.request(
+                    url, params=params, expected=(500,)).text
+            except exception.HttpError as exc:
+                if exc.status == 404:
+                    return
+                raise
             thumbs = text.extr(page, '<ul id="thumbs', '</ul>')
             extr = text.extract_from(thumbs)
 
@@ -231,7 +238,13 @@ class ZerochanTagExtractor(ZerochanExtractor):
         }
 
         while True:
-            response = self.request(url, params=params, allow_redirects=False)
+            try:
+                response = self.request(
+                    url, params=params, allow_redirects=False)
+            except exception.HttpError as exc:
+                if exc.status == 404:
+                    return
+                raise
 
             if response.status_code >= 300:
                 url = text.urljoin(self.root, response.headers["location"])
@@ -275,12 +288,18 @@ class ZerochanImageExtractor(ZerochanExtractor):
     pattern = BASE_PATTERN + r"/(\d+)"
     example = "https://www.zerochan.net/12345"
 
-    def __init__(self, match):
-        ZerochanExtractor.__init__(self, match)
-        self.image_id = match[1]
-
     def posts(self):
-        post = self._parse_entry_html(self.image_id)
+        image_id = self.groups[0]
+
+        try:
+            post = self._parse_entry_html(image_id)
+        except exception.HttpError as exc:
+            if exc.status in (404, 410):
+                if msg := text.extr(exc.response.text, "<h2>", "<"):
+                    self.log.warning(f"'{msg}'")
+                return ()
+            raise
+
         if self.config("metadata"):
-            post.update(self._parse_entry_api(self.image_id))
+            post.update(self._parse_entry_api(image_id))
         return (post,)
author	Unit 193 <unit193@unit193.net>	2025-10-07 02:11:52 -0400
committer	Unit 193 <unit193@unit193.net>	2025-10-07 02:11:52 -0400
commit	83e1e051b8c0e622ef5f61c1955c47b4bde95b57 (patch)
tree	544a434cb398d2adb8b8a2d553dc1c9a44b4ee1d /gallery_dl/extractor
parent	f1612851ae9fe68c7444fb31e786503868aeaa7c (diff)
parent	bbe7fac03d881662a458e7fbf870c9d71f5257f4 (diff)