New upstream version 1.30.7.upstream/1.30.7

author: Unit 193 <unit193@unit193.net> 2025-09-16 02:12:49 -0400
committer: Unit 193 <unit193@unit193.net> 2025-09-16 02:12:49 -0400
commit: 3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (patch)
tree: 1009e66478f4f0a64324acd92e0cc8709eb5f90f /gallery_dl/extractor
parent: 243b2597edb922fe7e0b0d887e80bb7ebbe72ab7 (diff)
15 files changed, 841 insertions, 236 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 574d1e2..b32fcd1 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -31,6 +31,7 @@ modules = [
     "batoto",
     "bbc",
     "behance",
+    "bellazon",
     "bilibili",
     "blogger",
     "bluesky",
@@ -44,6 +45,7 @@ modules = [
     "comick",
     "comicvine",
     "cyberdrop",
+    "cyberfile",
     "danbooru",
     "dankefuerslesen",
     "desktopography",
@@ -170,6 +172,7 @@ modules = [
     "senmanga",
     "sexcom",
     "shimmie2",
+    "simpcity",
     "simplyhentai",
     "sizebooru",
     "skeb",
diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py
index 2652acb..60380c4 100644
--- a/gallery_dl/extractor/ao3.py
+++ b/gallery_dl/extractor/ao3.py
@@ -102,8 +102,11 @@ class Ao3Extractor(Extractor):
     def _pagination(self, path, needle='<li id="work_'):
         while True:
             page = self.request(self.root + path).text
+
             yield from text.extract_iter(page, needle, '"')
-            path = text.extr(page, '<a rel="next" href="', '"')
+
+            path = (text.extr(page, '<a rel="next" href="', '"') or
+                    text.extr(page, '<li class="next"><a href="', '"'))
             if not path:
                 return
             path = text.unescape(path)
diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py
new file mode 100644
index 0000000..5c9b9cd
--- /dev/null
+++ b/gallery_dl/extractor/bellazon.py
@@ -0,0 +1,165 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.bellazon.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?bellazon\.com/main"
+
+
+class BellazonExtractor(Extractor):
+    """Base class for bellazon extractors"""
+    category = "bellazon"
+    root = "https://www.bellazon.com/main"
+    directory_fmt = ("{category}", "{thread[section]}",
+                     "{thread[title]} ({thread[id]})")
+    filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}"
+    archive_fmt = "{post[id]}/{filename}"
+
+    def items(self):
+        extract_urls = text.re(r'<a ([^>]*?href="([^"]+)".*?)</a>').findall
+        native = f"{self.root}/"
+
+        for post in self.posts():
+            urls = extract_urls(post["content"])
+            data = {"post": post}
+            post["count"] = data["count"] = len(urls)
+
+            yield Message.Directory, data
+            for data["num"], (info, url) in enumerate(urls, 1):
+                url = text.unescape(url)
+                if url.startswith(native):
+                    if not (alt := text.extr(info, ' alt="', '"')) or (
+                            alt.startswith("post-") and "_thumb." in alt):
+                        name = url
+                    else:
+                        name = text.unescape(alt)
+                    dc = text.nameext_from_url(name, data.copy())
+                    dc["id"] = text.extr(info, 'data-fileid="', '"')
+                    if ext := text.extr(info, 'data-fileext="', '"'):
+                        dc["extension"] = ext
+                    yield Message.Url, url, dc
+                else:
+                    yield Message.Queue, url, data
+
+    def _pagination(self, base, pnum=None):
+        base = f"{self.root}{base}"
+
+        if pnum is None:
+            url = f"{base}/"
+            pnum = 1
+        else:
+            url = f"{base}/page/{pnum}/"
+            pnum = None
+
+        while True:
+            page = self.request(url).text
+
+            yield page
+
+            if pnum is None or ' rel="next" ' not in page or text.extr(
+                    page, " rel=\"next\" data-page='", "'") == str(pnum):
+                return
+            pnum += 1
+            url = f"{base}/page/{pnum}/"
+
+    def _parse_thread(self, page):
+        schema = self._extract_jsonld(page)
+        author = schema["author"]
+        stats = schema["interactionStatistic"]
+        url_t = schema["url"]
+        url_a = author["url"]
+
+        path = text.split_html(text.extr(
+            page, '<nav class="ipsBreadcrumb', "</nav>"))[2:-1]
+
+        thread = {
+            "url"  : url_t,
+            "path" : path,
+            "title": schema["headline"],
+            "views": stats[0]["userInteractionCount"],
+            "posts": stats[1]["userInteractionCount"],
+            "date" : text.parse_datetime(schema["datePublished"]),
+            "date_updated": text.parse_datetime(schema["dateModified"]),
+            "description" : text.unescape(schema["text"]),
+            "section"     : path[-2],
+            "author"      : author["name"],
+            "author_url"  : url_a,
+        }
+
+        thread["id"], _, thread["slug"] = \
+            url_t.rsplit("/", 2)[1].partition("-")
+        thread["author_id"], _, thread["author_slug"] = \
+            url_a.rsplit("/", 2)[1].partition("-")
+
+        return thread
+
+    def _parse_post(self, html):
+        extr = text.extract_from(html)
+
+        post = {
+            "id": extr('id="elComment_', '"'),
+            "author_url": extr(" href='", "'"),
+            "date": text.parse_datetime(extr("datetime='", "'")),
+            "content": extr("<!-- Post content -->", "\n\t\t</div>"),
+        }
+
+        if (pos := post["content"].find(">")) >= 0:
+            post["content"] = post["content"][pos+1:].strip()
+
+        post["author_id"], _, post["author_slug"] = \
+            post["author_url"].rsplit("/", 2)[1].partition("-")
+
+        return post
+
+
+class BellazonPostExtractor(BellazonExtractor):
+    subcategory = "post"
+    pattern = (rf"{BASE_PATTERN}(/topic/\d+-[\w-]+(?:/page/\d+)?)"
+               rf"/?#findComment-(\d+)")
+    example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345"
+
+    def posts(self):
+        path, post_id = self.groups
+        page = self.request(f"{self.root}{path}").text
+
+        pos = page.find(f'id="elComment_{post_id}')
+        if pos < 0:
+            raise exception.NotFoundError("post")
+        html = text.extract(page, "<article ", "</article>", pos-100)[0]
+
+        self.kwdict["thread"] = self._parse_thread(page)
+        return (self._parse_post(html),)
+
+
+class BellazonThreadExtractor(BellazonExtractor):
+    subcategory = "thread"
+    pattern = rf"{BASE_PATTERN}(/topic/\d+-[\w-]+)(?:/page/(\d+))?"
+    example = "https://www.bellazon.com/main/topic/123-SLUG/"
+
+    def posts(self):
+        for page in self._pagination(*self.groups):
+            if "thread" not in self.kwdict:
+                self.kwdict["thread"] = self._parse_thread(page)
+            for html in text.extract_iter(page, "<article ", "</article>"):
+                yield self._parse_post(html)
+
+
+class BellazonForumExtractor(BellazonExtractor):
+    subcategory = "forum"
+    pattern = rf"{BASE_PATTERN}(/forum/\d+-[\w-]+)(?:/page/(\d+))?"
+    example = "https://www.bellazon.com/main/forum/123-SLUG/"
+
+    def items(self):
+        data = {"_extractor": BellazonThreadExtractor}
+        for page in self._pagination(*self.groups):
+            for row in text.extract_iter(
+                    page, '<li data-ips-hook="topicRow"', "</"):
+                yield Message.Queue, text.extr(row, 'href="', '"'), data
diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py
index e0383bf..22f3259 100644
--- a/gallery_dl/extractor/boosty.py
+++ b/gallery_dl/extractor/boosty.py
@@ -281,7 +281,7 @@ class BoostyAPI():
         if not access_token:
             if auth := self.extractor.cookies.get("auth", domain=".boosty.to"):
                 access_token = text.extr(
-                    auth, "%22accessToken%22%3A%22", "%22")
+                    text.unquote(auth), '"accessToken":"', '"')
         if access_token:
             self.headers["Authorization"] = "Bearer " + access_token
 
diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py
index a6aec38..c76694c 100644
--- a/gallery_dl/extractor/comick.py
+++ b/gallery_dl/extractor/comick.py
@@ -9,7 +9,7 @@
 """Extractors for https://comick.io/"""
 
 from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
-from .. import text
+from .. import text, exception
 from ..cache import memcache
 
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
@@ -67,9 +67,35 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
     def metadata(self, page):
         slug, chstr = self.groups
         manga = _manga_info(self, slug)
-        props = _chapter_info(self, manga, chstr)
 
-        ch = props["chapter"]
+        while True:
+            try:
+                props = _chapter_info(self, manga, chstr)
+            except exception.HttpError as exc:
+                if exc.response.status_code != 404:
+                    raise
+                if exc.response.headers.get(
+                        "Content-Type", "").startswith("text/html"):
+                    if locals().get("_retry_buildid"):
+                        raise
+                    self.log.debug("Updating Next.js build ID")
+                    _retry_buildid = True
+                    _manga_info.cache.clear()
+                    manga = _manga_info(self, slug)
+                    continue
+                if b'"notFound":true' in exc.response.content:
+                    raise exception.NotFoundError("chapter")
+                raise
+
+            if "__N_REDIRECT" in props:
+                path = props["__N_REDIRECT"]
+                self.log.debug("Following redirect to %s", path)
+                _, slug, chstr = path.rsplit("/", 2)
+                continue
+
+            ch = props["chapter"]
+            break
+
         self._images = ch["md_images"]
 
         if chapter := ch["chap"]:
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 568f435..01965f3 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -354,12 +354,11 @@ class Extractor():
             raise exception.AbortExtraction(
                 f"User input required ({prompt.strip(' :')})")
 
-    def _get_auth_info(self):
+    def _get_auth_info(self, password=None):
         """Return authentication information as (username, password) tuple"""
         username = self.config("username")
-        password = None
 
-        if username:
+        if username or password:
             password = self.config("password")
             if not password:
                 self._check_input_allowed("password")
@@ -667,12 +666,18 @@ class Extractor():
         return False
 
     def _extract_jsonld(self, page):
-        return util.json_loads(text.extr(
-            page, '<script type="application/ld+json">', "</script>"))
+        return util.json_loads(
+            text.extr(page, '<script type="application/ld+json">',
+                      "</script>") or
+            text.extr(page, "<script type='application/ld+json'>",
+                      "</script>"))
 
     def _extract_nextdata(self, page):
-        return util.json_loads(text.extr(
-            page, ' id="__NEXT_DATA__" type="application/json">', "</script>"))
+        return util.json_loads(
+            text.extr(page, ' id="__NEXT_DATA__" type="application/json">',
+                      "</script>") or
+            text.extr(page, " id='__NEXT_DATA__' type='application/json'>",
+                      "</script>"))
 
     def _cache(self, func, maxage, keyarg=None):
         #  return cache.DatabaseCacheDecorator(func, maxage, keyarg)
diff --git a/gallery_dl/extractor/cyberfile.py b/gallery_dl/extractor/cyberfile.py
new file mode 100644
index 0000000..2ea81d6
--- /dev/null
+++ b/gallery_dl/extractor/cyberfile.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://cyberfile.me/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberfile\.me"
+
+
+class CyberfileExtractor(Extractor):
+    """Base class for cyberfile extractors"""
+    category = "cyberfile"
+    root = "https://cyberfile.me"
+
+    def request_api(self, endpoint, data):
+        url = f"{self.root}{endpoint}"
+        headers = {
+            "X-Requested-With": "XMLHttpRequest",
+            "Origin": self.root,
+        }
+        resp = self.request_json(
+            url, method="POST", headers=headers, data=data)
+
+        if "albumPasswordModel" in resp.get("javascript", ""):
+            url_pw = f"{self.root}/ajax/folder_password_process"
+            data_pw = {
+                "folderPassword": self._get_auth_info(password=True)[1],
+                "folderId": text.extr(
+                    resp["html"], '<input type="hidden" value="', '"'),
+                "submitme": "1",
+            }
+            resp = self.request_json(
+                url_pw, method="POST", headers=headers, data=data_pw)
+            if not resp.get("success"):
+                raise exception.AuthorizationError(f"'{resp.get('msg')}'")
+            resp = self.request_json(
+                url, method="POST", headers=headers, data=data)
+
+        return resp
+
+
+class CyberfileFolderExtractor(CyberfileExtractor):
+    subcategory = "folder"
+    pattern = rf"{BASE_PATTERN}/folder/([0-9a-f]+)"
+    example = "https://cyberfile.me/folder/0123456789abcdef/NAME"
+
+    def items(self):
+        folder_hash = self.groups[0]
+        url = f"{self.root}/folder/{folder_hash}"
+        folder_num = text.extr(self.request(url).text, "ages('folder', '", "'")
+
+        extract_urls = text.re(r'dtfullurl="([^"]+)').findall
+        perpage = 600
+
+        data = {
+            "pageType" : "folder",
+            "nodeId"   : folder_num,
+            "pageStart": 1,
+            "perPage"  : perpage,
+            "filterOrderBy": "",
+        }
+        resp = self.request_api("/account/ajax/load_files", data)
+
+        folder = {
+            "_extractor" : CyberfileFileExtractor,
+            "folder_hash": folder_hash,
+            "folder_num" : text.parse_int(folder_num),
+            "folder"     : resp["page_title"],
+        }
+
+        while True:
+            urls = extract_urls(resp["html"])
+            for url in urls:
+                yield Message.Queue, url, folder
+
+            if len(urls) < perpage:
+                return
+            data["pageStart"] += 1
+            resp = self.request_api("/account/ajax/load_files", data)
+
+
+class CyberfileFileExtractor(CyberfileExtractor):
+    subcategory = "file"
+    directory_fmt = ("{category}", "{uploader}", "{folder}")
+    pattern = rf"{BASE_PATTERN}/([a-zA-Z0-9]+)"
+    example = "https://cyberfile.me/AbCdE"
+
+    def items(self):
+        file_id = self.groups[0]
+        url = f"{self.root}/{file_id}"
+        file_num = text.extr(self.request(url).text, "owFileInformation(", ")")
+
+        data = {"u": file_num}
+        resp = self.request_api("/account/ajax/file_details", data)
+        extr = text.extract_from(resp["html"])
+        info = text.split_html(extr('class="text-section">', "</span>"))
+        folder = info[0] if len(info) > 1 else ""
+
+        file = {
+            "file_id" : file_id,
+            "file_num": text.parse_int(file_num),
+            "name"    : resp["page_title"],
+            "folder"  : folder,
+            "uploader": info[-1][2:].strip(),
+            "size"    : text.parse_bytes(text.remove_html(extr(
+                "Filesize:", "</tr>"))[:-1]),
+            "tags"    : text.split_html(extr(
+                "Keywords:", "</tr>")),
+            "date"    : text.parse_datetime(text.remove_html(extr(
+                "Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
+            "permissions": text.remove_html(extr(
+                "Permissions:", "</tr>")).split(" &amp; "),
+        }
+
+        file["file_url"] = url = extr("openUrl('", "'")
+        text.nameext_from_url(file["name"] or url, file)
+        yield Message.Directory, file
+        yield Message.Url, url, file
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 019410c..f8ad07a 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -102,7 +102,10 @@ class DanbooruExtractor(BaseExtractor):
                     post["extension"] = "webm"
 
             if url[0] == "/":
-                url = self.root + url
+                if url[1] == "/":
+                    url = "https:" + url
+                else:
+                    url = self.root + url
 
             post.update(data)
             yield Message.Directory, post
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index f9ed1ab..bf24941 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -376,34 +376,6 @@ class FacebookExtractor(Extractor):
         return user
 
 
-class FacebookSetExtractor(FacebookExtractor):
-    """Base class for Facebook Set extractors"""
-    subcategory = "set"
-    pattern = (
-        BASE_PATTERN +
-        r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)"
-        r"[^/?#]*(?<!&setextract)$"
-        r"|([^/?#]+/posts/[^/?#]+)"
-        r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)"
-    )
-    example = "https://www.facebook.com/media/set/?set=SET_ID"
-
-    def items(self):
-        set_id = self.groups[0] or self.groups[3]
-        if path := self.groups[1]:
-            post_url = self.root + "/" + path
-            post_page = self.request(post_url).text
-            set_id = self.parse_post_page(post_page)["set_id"]
-
-        set_url = f"{self.root}/media/set/?set={set_id}"
-        set_page = self.request(set_url).text
-        set_data = self.parse_set_page(set_page)
-        if self.groups[2]:
-            set_data["first_photo_id"] = self.groups[2]
-
-        return self.extract_set(set_data)
-
-
 class FacebookPhotoExtractor(FacebookExtractor):
     """Base class for Facebook Photo extractors"""
     subcategory = "photo"
@@ -441,6 +413,34 @@ class FacebookPhotoExtractor(FacebookExtractor):
                 yield Message.Url, comment_photo["url"], comment_photo
 
 
+class FacebookSetExtractor(FacebookExtractor):
+    """Base class for Facebook Set extractors"""
+    subcategory = "set"
+    pattern = (
+        BASE_PATTERN +
+        r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)"
+        r"[^/?#]*(?<!&setextract)$"
+        r"|([^/?#]+/posts/[^/?#]+)"
+        r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)"
+    )
+    example = "https://www.facebook.com/media/set/?set=SET_ID"
+
+    def items(self):
+        set_id = self.groups[0] or self.groups[3]
+        if path := self.groups[1]:
+            post_url = self.root + "/" + path
+            post_page = self.request(post_url).text
+            set_id = self.parse_post_page(post_page)["set_id"]
+
+        set_url = f"{self.root}/media/set/?set={set_id}"
+        set_page = self.request(set_url).text
+        set_data = self.parse_set_page(set_page)
+        if self.groups[2]:
+            set_data["first_photo_id"] = self.groups[2]
+
+        return self.extract_set(set_data)
+
+
 class FacebookVideoExtractor(FacebookExtractor):
     """Base class for Facebook Video extractors"""
     subcategory = "video"
diff --git a/gallery_dl/extractor/fansly.py b/gallery_dl/extractor/fansly.py
index 31d242f..8a6dbef 100644
--- a/gallery_dl/extractor/fansly.py
+++ b/gallery_dl/extractor/fansly.py
@@ -25,7 +25,11 @@ class FanslyExtractor(Extractor):
 
     def _init(self):
         self.api = FanslyAPI(self)
-        self.formats = self.config("format") or (303, 302, 1, 2, 4)
+
+        if fmts := self.config("formats"):
+            self.formats = set(fmts)
+        else:
+            self.formats = {1, 2, 3, 4, 302, 303}
 
     def items(self):
         for post in self.posts():
@@ -41,6 +45,19 @@ class FanslyExtractor(Extractor):
 
     def _extract_files(self, post):
         files = []
+
+        if "_extra" in post:
+            extra = post.pop("_extra", ())
+            media = {
+                media["id"]: media
+                for media in self.api.account_media(extra)
+            }
+            post["attachments"].extend(
+                media[mid]
+                for mid in extra
+                if mid in media
+            )
+
         for attachment in post.pop("attachments"):
             try:
                 self._extract_attachment(files, post, attachment)
@@ -54,19 +71,23 @@ class FanslyExtractor(Extractor):
 
     def _extract_attachment(self, files, post, attachment):
         media = attachment["media"]
-        variants = {
-            variant["type"]: variant
-            for variant in media.pop("variants", ())
-        }
-        variants[media["type"]] = media
 
-        for fmt in self.formats:
-            if fmt in variants and (variant := variants[fmt]).get("locations"):
-                break
-        else:
-            return self.log.warning(
-                "%s/%s: Requested format not available",
-                post["id"], attachment["id"])
+        variants = media.pop("variants") or []
+        if media.get("locations"):
+            variants.append(media)
+
+        formats = [
+            (type > 256, variant["width"], type, variant)
+            for variant in variants
+            if variant.get("locations") and
+            (type := variant["type"]) in self.formats
+        ]
+
+        try:
+            variant = max(formats)[-1]
+        except Exception:
+            return self.log.warning("%s/%s: No format available",
+                                    post["id"], attachment["id"])
 
         mime = variant["mimetype"]
         location = variant.pop("locations")[0]
@@ -78,7 +99,7 @@ class FanslyExtractor(Extractor):
 
         file = {
             **variant,
-            "format": fmt,
+            "format": variant["type"],
             "date": text.parse_timestamp(media["createdAt"]),
             "date_updated": text.parse_timestamp(media["updatedAt"]),
         }
@@ -86,12 +107,17 @@ class FanslyExtractor(Extractor):
         if "metadata" in location:
             # manifest
             meta = location["metadata"]
-
             file["type"] = "video"
+
+            try:
+                fallback = (media["locations"][0]["location"],)
+            except Exception:
+                fallback = ()
+
             files.append({
                 "file": file,
                 "url": f"ytdl:{location['location']}",
-                #  "_fallback": (media["locations"][0]["location"],),
+                "_fallback": fallback,
                 "_ytdl_manifest":
                     "dash" if mime == "application/dash+xml" else "hls",
                 "_ytdl_manifest_cookies": (
@@ -161,17 +187,26 @@ class FanslyListsExtractor(FanslyExtractor):
 
 class FanslyCreatorPostsExtractor(FanslyExtractor):
     subcategory = "creator-posts"
-    pattern = rf"{BASE_PATTERN}/([^/?#]+)/posts"
+    pattern = rf"{BASE_PATTERN}/([^/?#]+)/posts(?:/wall/(\d+))?"
     example = "https://fansly.com/CREATOR/posts"
 
     def posts(self):
-        creator = self.groups[0]
-        if creator.startswith("id:"):
-            account = self.api.account_by_id(creator[3:])
-        else:
-            account = self.api.account(creator)
-        wall_id = account["walls"][0]["id"]
-        return self.api.timeline_new(account["id"], wall_id)
+        creator, wall_id = self.groups
+        account = self.api.account(creator)
+        return self.api.timeline_new(
+            account["id"], wall_id or account["walls"][0]["id"])
+
+
+class FanslyCreatorMediaExtractor(FanslyExtractor):
+    subcategory = "creator-media"
+    pattern = rf"{BASE_PATTERN}/([^/?#]+)/media(?:/wall/(\d+))?"
+    example = "https://fansly.com/CREATOR/media"
+
+    def posts(self):
+        creator, wall_id = self.groups
+        account = self.api.account(creator)
+        return self.api.mediaoffers_location(
+            account["id"], wall_id or account["walls"][0]["id"])
 
 
 class FanslyAPI():
@@ -179,18 +214,24 @@ class FanslyAPI():
 
     def __init__(self, extractor):
         self.extractor = extractor
-
-        token = extractor.config("token")
-        if not token:
-            self.extractor.log.warning("No 'token' provided")
-
         self.headers = {
             "fansly-client-ts": None,
             "Origin"          : extractor.root,
-            "authorization"   : token,
         }
 
-    def account(self, username):
+        if token := extractor.config("token"):
+            self.headers["authorization"] = token
+            self.extractor.log.debug(
+                "Using authorization 'token' %.5s...", token)
+        else:
+            self.extractor.log.warning("No 'token' provided")
+
+    def account(self, creator):
+        if creator.startswith("id:"):
+            return self.account_by_id(creator[3:])
+        return self.account_by_username(creator)
+
+    def account_by_username(self, username):
         endpoint = "/v1/account"
         params = {"usernames": username}
         return self._call(endpoint, params)[0]
@@ -205,6 +246,11 @@ class FanslyAPI():
         params = {"ids": ",".join(map(str, account_ids))}
         return self._call(endpoint, params)
 
+    def account_media(self, media_ids):
+        endpoint = "/v1/account/media"
+        params = {"ids": ",".join(map(str, media_ids))}
+        return self._call(endpoint, params)
+
     def lists_account(self):
         endpoint = "/v1/lists/account"
         params = {"itemId": ""}
@@ -218,7 +264,21 @@ class FanslyAPI():
             "after"   : None,
             "sortMode": sort,
         }
-        return self._pagination(endpoint, params)
+        return self._pagination_list(endpoint, params)
+
+    def mediaoffers_location(self, account_id, wall_id):
+        endpoint = "/v1/mediaoffers/location"
+        params = {
+            "locationId": wall_id,
+            "locationType": "1002",
+            "accountId": account_id,
+            "mediaType": "",
+            "before": "",
+            "after" : "0",
+            "limit" : "30",
+            "offset": "0",
+        }
+        return self._pagination_media(endpoint, params)
 
     def post(self, post_id):
         endpoint = "/v1/post"
@@ -262,6 +322,7 @@ class FanslyAPI():
         for post in posts:
             post["account"] = accounts[post.pop("accountId")]
 
+            extra = None
             attachments = []
             for attachment in post["attachments"]:
                 cid = attachment["contentId"]
@@ -270,18 +331,35 @@ class FanslyAPI():
                 elif cid in bundles:
                     bundle = bundles[cid]["bundleContent"]
                     bundle.sort(key=lambda c: c["pos"])
-                    attachments.extend(
-                        media[m["accountMediaId"]]
-                        for m in bundle
-                        if m["accountMediaId"] in media
-                    )
+                    for c in bundle:
+                        mid = c["accountMediaId"]
+                        if mid in media:
+                            attachments.append(media[mid])
+                        else:
+                            if extra is None:
+                                post["_extra"] = extra = []
+                            extra.append(mid)
                 else:
                     self.extractor.log.warning(
                         "%s: Unhandled 'contentId' %s",
                         post["id"], cid)
             post["attachments"] = attachments
+
         return posts
 
+    def _update_media(self, items, response):
+        posts = {
+            post["id"]: post
+            for post in response["posts"]
+        }
+
+        response["posts"] = [
+            posts[item["correlationId"]]
+            for item in items
+        ]
+
+        return self._update_posts(response)
+
     def _update_items(self, items):
         ids = [item["id"] for item in items]
         accounts = {
@@ -304,15 +382,27 @@ class FanslyAPI():
         while True:
             response = self._call(endpoint, params)
 
-            if isinstance(response, list):
-                if not response:
-                    return
-                yield from self._update_items(response)
-                params["after"] = response[-1]["sortId"]
-
-            else:
-                if not response.get("posts"):
-                    return
-                posts = self._update_posts(response)
-                yield from posts
-                params["before"] = min(p["id"] for p in posts)
+            if not response.get("posts"):
+                return
+            posts = self._update_posts(response)
+            yield from posts
+            params["before"] = min(p["id"] for p in posts)
+
+    def _pagination_list(self, endpoint, params):
+        while True:
+            response = self._call(endpoint, params)
+
+            if not response:
+                return
+            yield from self._update_items(response)
+            params["after"] = response[-1]["sortId"]
+
+    def _pagination_media(self, endpoint, params):
+        while True:
+            response = self._call(endpoint, params)
+
+            data = response["data"]
+            if not data:
+                return
+            yield from self._update_media(data, response["aggregationData"])
+            params["before"] = data[-1]["id"]
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index e6abdeb..d9a63c7 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -16,63 +16,42 @@ from ..cache import cache
 class ImgbbExtractor(Extractor):
     """Base class for imgbb extractors"""
     category = "imgbb"
-    directory_fmt = ("{category}", "{user}")
-    filename_fmt = "{title} {id}.{extension}"
-    archive_fmt = "{id}"
-    root = "https://imgbb.com"
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.page_url = self.sort = None
+    directory_fmt = ("{category}", "{user[name]:?//}{user[id]:? (/)/}",
+                     "{album[title]} ({album[id]})")
+    filename_fmt = "{title} ({id}).{extension}"
+    archive_fmt = "{user[id]} {id}"
+    cookies_domain = ".imgbb.com"
+    cookies_names = ("PHPSESSID", "LID")
+    root = "https://ibb.co"
 
     def items(self):
         self.login()
 
-        url = self.page_url
-        params = {"sort": self.sort}
-        while True:
-            response = self.request(url, params=params, allow_redirects=False)
-            if response.status_code < 300:
-                break
-            url = response.headers["location"]
-            if url.startswith(self.root):
-                raise exception.NotFoundError(self.subcategory)
-
-        page = response.text
-        data = self.metadata(page)
-        first = True
-
-        for img in self.images(page):
-            image = {
-                "id"       : img["url_viewer"].rpartition("/")[2],
-                "user"     : img["user"]["username"] if "user" in img else "",
-                "title"    : text.unescape(img["title"]),
-                "url"      : img["image"]["url"],
-                "extension": img["image"]["extension"],
-                "size"     : text.parse_int(img["image"]["size"]),
-                "width"    : text.parse_int(img["width"]),
-                "height"   : text.parse_int(img["height"]),
-            }
-            image.update(data)
-            if first:
-                first = False
-                yield Message.Directory, data
-            yield Message.Url, image["url"], image
+        for image in self.posts():
+            url = image["url"]
+            text.nameext_from_url(url, image)
+            yield Message.Directory, image
+            yield Message.Url, url, image
 
     def login(self):
+        if self.cookies_check(self.cookies_names):
+            return
+
         username, password = self._get_auth_info()
         if username:
-            self.cookies_update(self._login_impl(username, password))
+            return self.cookies_update(self._login_impl(username, password))
 
     @cache(maxage=365*86400, keyarg=1)
     def _login_impl(self, username, password):
         self.log.info("Logging in as %s", username)
 
-        url = self.root + "/login"
+        url = "https://imgbb.com/login"
         page = self.request(url).text
-        token = text.extr(page, 'PF.obj.config.auth_token="', '"')
+        token = text.extr(page, 'name="auth_token" value="', '"')
 
-        headers = {"Referer": url}
+        headers = {
+            "Referer": url,
+        }
         data = {
             "auth_token"   : token,
             "login-subject": username,
@@ -84,27 +63,26 @@ class ImgbbExtractor(Extractor):
             raise exception.AuthenticationError()
         return self.cookies
 
-    def _extract_resource(self, page):
-        return util.json_loads(text.extr(
-            page, "CHV.obj.resource=", "};") + "}")
-
-    def _extract_user(self, page):
-        return self._extract_resource(page).get("user") or {}
-
-    def _pagination(self, page, endpoint, params):
-        data = None
+    def _pagination(self, page, url, params):
         seek, pos = text.extract(page, 'data-seek="', '"')
         tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos)
-        params["action"] = "list"
-        params["list"] = "images"
-        params["sort"] = self.sort
-        params["seek"] = seek
-        params["page"] = 2
-        params["auth_token"] = tokn
+        resc, pos = text.extract(page, "CHV.obj.resource=", "};", pos)
+        self.kwdict["user"] = util.json_loads(resc + "}").get("user")
 
+        data = None
         while True:
-            for img in text.extract_iter(page, "data-object='", "'"):
-                yield util.json_loads(text.unquote(img))
+            for obj in text.extract_iter(page, "data-object='", "'"):
+                post = util.json_loads(text.unquote(obj))
+                image = post["image"]
+                image["filename"], image["name"] = \
+                    image["name"], image["filename"]
+                image["id"] = post["id_encoded"]
+                image["title"] = post["title"]
+                image["width"] = text.parse_int(post["width"])
+                image["height"] = text.parse_int(post["height"])
+                image["size"] = text.parse_int(image["size"])
+                yield image
+
             if data:
                 if not data["seekEnd"] or params["seek"] == data["seekEnd"]:
                     return
@@ -112,105 +90,114 @@ class ImgbbExtractor(Extractor):
                 params["page"] += 1
             elif not seek or 'class="pagination-next"' not in page:
                 return
-            data = self.request_json(endpoint, method="POST", data=params)
+            else:
+                params["action"] = "list"
+                params["page"] = 2
+                params["seek"] = seek
+                params["auth_token"] = tokn
+
+                headers = {
+                    "Accept": "application/json, text/javascript, */*; q=0.01",
+                    "X-Requested-With": "XMLHttpRequest",
+                    "Origin": self.root,
+                    "Sec-Fetch-Dest": "empty",
+                    "Sec-Fetch-Mode": "cors",
+                    "Sec-Fetch-Site": "same-origin",
+                }
+
+            data = self.request_json(
+                url, method="POST", headers=headers, data=params)
             page = data["html"]
 
 
 class ImgbbAlbumExtractor(ImgbbExtractor):
-    """Extractor for albums on imgbb.com"""
+    """Extractor for imgbb albums"""
     subcategory = "album"
-    directory_fmt = ("{category}", "{user}", "{album_name} {album_id}")
     pattern = r"(?:https?://)?ibb\.co/album/([^/?#]+)/?(?:\?([^#]+))?"
     example = "https://ibb.co/album/ID"
 
-    def __init__(self, match):
-        ImgbbExtractor.__init__(self, match)
-        self.album_name = None
-        self.album_id = match[1]
-        self.sort = text.parse_query(match[2]).get("sort", "date_desc")
-        self.page_url = "https://ibb.co/album/" + self.album_id
-
-    def metadata(self, page):
-        album = text.extr(page, '"og:title" content="', '"')
-        user = self._extract_user(page)
-        return {
-            "album_id"   : self.album_id,
-            "album_name" : text.unescape(album),
-            "user"       : user.get("username") or "",
-            "user_id"    : user.get("id") or "",
-            "displayname": user.get("name") or "",
-        }
-
-    def images(self, page):
-        url = text.extr(page, '"og:url" content="', '"')
-        album_id = url.rpartition("/")[2].partition("?")[0]
-
-        return self._pagination(page, "https://ibb.co/json", {
-            "from"      : "album",
-            "albumid"   : album_id,
-            "params_hidden[list]"   : "images",
-            "params_hidden[from]"   : "album",
-            "params_hidden[albumid]": album_id,
-        })
-
-
-class ImgbbUserExtractor(ImgbbExtractor):
-    """Extractor for user profiles in imgbb.com"""
-    subcategory = "user"
-    pattern = r"(?:https?://)?([\w-]+)\.imgbb\.com/?(?:\?([^#]+))?$"
-    example = "https://USER.imgbb.com"
+    def posts(self):
+        album_id, qs = self.groups
+        url = f"{self.root}/album/{album_id}"
+        params = text.parse_query(qs)
+        page = self.request(url, params=params).text
+        extr = text.extract_from(page)
 
-    def __init__(self, match):
-        ImgbbExtractor.__init__(self, match)
-        self.user = match[1]
-        self.sort = text.parse_query(match[2]).get("sort", "date_desc")
-        self.page_url = f"https://{self.user}.imgbb.com/"
-
-    def metadata(self, page):
-        user = self._extract_user(page)
-        return {
-            "user"       : user.get("username") or self.user,
-            "user_id"    : user.get("id") or "",
-            "displayname": user.get("name") or "",
+        self.kwdict["album"] = album = {
+            "url": extr(
+                'property="og:url" content="', '"'),
+            "title": text.unescape(extr(
+                'property="og:title" content="', '"')),
+            "description": text.unescape(extr(
+                'property="og:description" content="', '"')),
+            "id": extr(
+                'data-text="album-name" href="https://ibb.co/album/', '"'),
+            "count": text.parse_int(extr(
+                'data-text="image-count">', "<")),
         }
 
-    def images(self, page):
-        user = text.extr(page, '.obj.resource={"id":"', '"')
-        return self._pagination(page, self.page_url + "json", {
-            "from"      : "user",
-            "userid"    : user,
-            "params_hidden[userid]": user,
-            "params_hidden[from]"  : "user",
-        })
+        url = f"{self.root}/json"
+        params["pathname"] = f"/album/{album['id']}"
+        return self._pagination(page, url, params)
 
 
 class ImgbbImageExtractor(ImgbbExtractor):
     subcategory = "image"
-    pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?#]+)"
+    pattern = r"(?:https?://)?ibb\.co/([^/?#]+)"
     example = "https://ibb.co/ID"
 
-    def __init__(self, match):
-        ImgbbExtractor.__init__(self, match)
-        self.image_id = match[1]
-
-    def items(self):
-        url = "https://ibb.co/" + self.image_id
+    def posts(self):
+        url = f"{self.root}/{self.groups[0]}"
         page = self.request(url).text
         extr = text.extract_from(page)
-        user = self._extract_user(page)
 
         image = {
-            "id"    : self.image_id,
+            "id"    : extr('property="og:url" content="https://ibb.co/', '"'),
             "title" : text.unescape(extr(
                 '"og:title" content="', ' hosted at ImgBB"')),
             "url"   : extr('"og:image" content="', '"'),
             "width" : text.parse_int(extr('"og:image:width" content="', '"')),
             "height": text.parse_int(extr('"og:image:height" content="', '"')),
-            "user"       : user.get("username") or "",
-            "user_id"    : user.get("id") or "",
-            "displayname": user.get("name") or "",
+            "album" : extr("Added to <a", "</a>"),
+            "date"  : text.parse_datetime(extr(
+                '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+            "user"  : util.json_loads(extr(
+                "CHV.obj.resource=", "};") + "}").get("user"),
         }
-        image["extension"] = text.ext_from_url(image["url"])
 
-        yield Message.Directory, image
-        yield Message.Url, image["url"], image
+        if album := image["album"]:
+            image["album"] = {
+                "id"   : text.extr(album, "/album/", '"'),
+                "title": text.unescape(album.rpartition(">")[2]),
+            }
+        else:
+            image["album"] = None
+
+        return (image,)
+
+
+class ImgbbUserExtractor(ImgbbExtractor):
+    """Extractor for imgbb user profiles"""
+    subcategory = "user"
+    directory_fmt = ("{category}", "{user[name]} ({user[id]})")
+    pattern = r"(?:https?://)?([\w-]+)\.imgbb\.com/?(?:\?([^#]+))?"
+    example = "https://USER.imgbb.com"
+
+    def posts(self):
+        user, qs = self.groups
+        url = f"https://{user}.imgbb.com/"
+        params = text.parse_query(qs)
+        response = self.request(url, params=params, allow_redirects=False)
+
+        if response.status_code < 300:
+            params["pathname"] = "/"
+            return self._pagination(response.text, f"{url}json", params)
+
+        if response.status_code == 301:
+            raise exception.NotFoundError("user")
+        redirect = f"HTTP redirect to {response.headers.get('Location')}"
+        if response.status_code == 302:
+            raise exception.AuthRequired(
+                ("username & password", "authenticated cookies"),
+                "profile", redirect)
+        raise exception.AbortExtraction(redirect)
diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py
new file mode 100644
index 0000000..8cc7e38
--- /dev/null
+++ b/gallery_dl/extractor/simpcity.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://simpcity.cr/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?simpcity\.(?:cr|su)"
+
+
+class SimpcityExtractor(Extractor):
+    """Base class for simpcity extractors"""
+    category = "simpcity"
+    root = "https://simpcity.cr"
+
+    def items(self):
+        extract_urls = text.re(r' href="([^"]+)').findall
+
+        for post in self.posts():
+            urls = extract_urls(post["content"])
+            data = {"post": post}
+            post["count"] = data["count"] = len(urls)
+            for data["num"], url in enumerate(urls, 1):
+                yield Message.Queue, url, data
+
+    def request_page(self, url):
+        try:
+            return self.request(url).text
+        except exception.HttpError as exc:
+            if exc.status == 403 and b">Log in<" in exc.response.content:
+                msg = text.extr(exc.response.text, "blockMessage--error", "</")
+                raise exception.AuthRequired(
+                    "'authenticated cookies'", None,
+                    msg.rpartition(">")[2].strip())
+            raise
+
+    def _pagination(self, base, pnum=None):
+        base = f"{self.root}{base}"
+
+        if pnum is None:
+            url = base
+            pnum = 1
+        else:
+            url = f"{base}/page-{pnum}"
+            pnum = None
+
+        while True:
+            page = self.request_page(url)
+
+            yield page
+
+            if pnum is None or "pageNav-jump--next" not in page:
+                return
+            pnum += 1
+            url = f"{base}/page-{pnum}"
+
+    def _parse_thread(self, page):
+        schema = self._extract_jsonld(page)["mainEntity"]
+        author = schema["author"]
+        stats = schema["interactionStatistic"]
+        url_t = schema["url"]
+        url_a = author["url"]
+
+        thread = {
+            "id"   : url_t[url_t.rfind(".")+1:-1],
+            "url"  : url_t,
+            "title": schema["headline"],
+            "date" : text.parse_datetime(schema["datePublished"]),
+            "views": stats[0]["userInteractionCount"],
+            "posts": stats[1]["userInteractionCount"],
+            "tags" : (schema["keywords"].split(", ")
+                      if "keywords" in schema else ()),
+            "section"   : schema["articleSection"],
+            "author"    : author["name"],
+            "author_id" : url_a[url_a.rfind(".")+1:-1],
+            "author_url": url_a,
+        }
+
+        return thread
+
+    def _parse_post(self, html):
+        extr = text.extract_from(html)
+
+        post = {
+            "author": extr('data-author="', '"'),
+            "id": extr('data-content="post-', '"'),
+            "author_url": extr('itemprop="url" content="', '"'),
+            "date": text.parse_datetime(extr('datetime="', '"')),
+            "content": extr('<div itemprop="text">', "\t\t</div>").strip(),
+        }
+
+        url_a = post["author_url"]
+        post["author_id"] = url_a[url_a.rfind(".")+1:-1]
+
+        return post
+
+
+class SimpcityPostExtractor(SimpcityExtractor):
+    subcategory = "post"
+    pattern = rf"{BASE_PATTERN}/(?:threads/[^/?#]+/post-|posts/)(\d+)"
+    example = "https://simpcity.cr/threads/TITLE.12345/post-54321"
+
+    def posts(self):
+        post_id = self.groups[0]
+        url = f"{self.root}/posts/{post_id}/"
+        page = self.request_page(url)
+
+        pos = page.find(f'data-content="post-{post_id}"')
+        if pos < 0:
+            raise exception.NotFoundError("post")
+        html = text.extract(page, "<article ", "</article>", pos-200)[0]
+
+        self.kwdict["thread"] = self._parse_thread(page)
+        return (self._parse_post(html),)
+
+
+class SimpcityThreadExtractor(SimpcityExtractor):
+    subcategory = "thread"
+    pattern = rf"{BASE_PATTERN}(/threads/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
+    example = "https://simpcity.cr/threads/TITLE.12345/"
+
+    def posts(self):
+        for page in self._pagination(*self.groups):
+            if "thread" not in self.kwdict:
+                self.kwdict["thread"] = self._parse_thread(page)
+            for html in text.extract_iter(page, "<article ", "</article>"):
+                yield self._parse_post(html)
+
+
+class SimpcityForumExtractor(SimpcityExtractor):
+    subcategory = "forum"
+    pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
+    example = "https://simpcity.cr/forums/TITLE.123/"
+
+    def items(self):
+        data = {"_extractor": SimpcityThreadExtractor}
+        for page in self._pagination(*self.groups):
+            for path in text.extract_iter(page, ' uix-href="', '"'):
+                yield Message.Queue, f"{self.root}{text.unquote(path)}", data
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index 973bd22..f450806 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -42,8 +42,7 @@ class TiktokExtractor(Extractor):
                 continue
 
             post = video_detail["itemInfo"]["itemStruct"]
-            author = post["author"]
-            post["user"] = author["uniqueId"]
+            post["user"] = (a := post.get("author")) and a["uniqueId"] or ""
             post["date"] = text.parse_timestamp(post["createTime"])
             original_title = title = post["desc"]
 
diff --git a/gallery_dl/extractor/tungsten.py b/gallery_dl/extractor/tungsten.py
index 20d5a59..45836a9 100644
--- a/gallery_dl/extractor/tungsten.py
+++ b/gallery_dl/extractor/tungsten.py
@@ -87,14 +87,17 @@ class TungstenModelExtractor(TungstenExtractor):
 
 class TungstenUserExtractor(TungstenExtractor):
     subcategory = "user"
-    pattern = rf"{BASE_PATTERN}/user/([^/?#]+)"
-    example = "https://tungsten.run/user/USER/posts"
+    pattern = rf"{BASE_PATTERN}/user/([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?"
+    example = "https://tungsten.run/user/USER"
 
     def posts(self):
-        url = f"{self.root}/user/{self.groups[0]}"
+        user, qs = self.groups
+        url = f"{self.root}/user/{user}"
         page = self.request(url).text
         uuid_user = text.extr(page, '"user":{"uuid":"', '"')
 
         url = f"https://api.tungsten.run/v1/users/{uuid_user}/posts"
-        params = {"sort": "top_all_time"}
+        params = text.parse_query(qs)
+        params.setdefault("sort", "top_all_time")
+        self.kwdict["search_tags"] = params.get("tag", "")
         return self._pagination(url, params)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c919cb8..ed3cfae 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1447,20 +1447,33 @@ class TwitterAPI():
             "includePromotedContent": False,
         }
         return self._pagination_tweets(
-            endpoint, variables, ("bookmark_timeline_v2", "timeline"), False)
+            endpoint, variables, ("bookmark_timeline_v2", "timeline"),
+            stop_tweets=128)
 
     def search_timeline(self, query, product="Latest"):
         endpoint = "/graphql/4fpceYZ6-YQCx_JSl_Cn_A/SearchTimeline"
         variables = {
             "rawQuery": query,
-            "count": 100,
+            "count": self.extractor.config("search-limit", 20),
             "querySource": "typed_query",
             "product": product,
             "withGrokTranslatedBio": False,
         }
+
+        if self.extractor.config("search-pagination") in (
+                "max_id", "maxid", "id"):
+            update_variables = self._update_variables_search
+        else:
+            update_variables = None
+
+        stop_tweets = self.extractor.config("search-stop")
+        if stop_tweets is None or stop_tweets == "auto":
+            stop_tweets = 3 if update_variables is None else 0
+
         return self._pagination_tweets(
             endpoint, variables,
-            ("search_by_raw_query", "search_timeline", "timeline"))
+            ("search_by_raw_query", "search_timeline", "timeline"),
+            stop_tweets=stop_tweets, update_variables=update_variables)
 
     def community_query(self, community_id):
         endpoint = "/graphql/2W09l7nD7ZbxGQHXvfB22w/CommunityQuery"
@@ -1870,11 +1883,12 @@ class TwitterAPI():
             params["cursor"] = extr._update_cursor(cursor)
 
     def _pagination_tweets(self, endpoint, variables,
-                           path=None, stop_tweets=True,
+                           path=None, stop_tweets=0, update_variables=None,
                            features=None, field_toggles=None):
         extr = self.extractor
         original_retweets = (extr.retweets == "original")
         pinned_tweet = extr.pinned
+        stop_tweets_max = stop_tweets
 
         params = {"variables": None}
         if cursor := extr._init_cursor():
@@ -2067,11 +2081,24 @@ class TwitterAPI():
                             tweet.get("rest_id"))
                         continue
 
-            if stop_tweets and not tweet:
-                return extr._update_cursor(None)
+            if tweet:
+                stop_tweets = stop_tweets_max
+                last_tweet = tweet
+            else:
+                if stop_tweets <= 0:
+                    return extr._update_cursor(None)
+                self.log.debug(
+                    "No Tweet results (%s/%s)",
+                    stop_tweets_max - stop_tweets + 1, stop_tweets_max)
+                stop_tweets -= 1
+
             if not cursor or cursor == variables.get("cursor"):
                 return extr._update_cursor(None)
-            variables["cursor"] = extr._update_cursor(cursor)
+
+            if update_variables is None:
+                variables["cursor"] = extr._update_cursor(cursor)
+            else:
+                variables = update_variables(variables, cursor, last_tweet)
 
     def _pagination_users(self, endpoint, variables, path=None):
         extr = self.extractor
@@ -2140,6 +2167,30 @@ class TwitterAPI():
 
         self.log.debug("Skipping %s ('%s')", tweet_id, text)
 
+    def _update_variables_search(self, variables, cursor, tweet):
+        try:
+            tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"]
+            max_id = f"max_id:{int(tweet_id)-1}"
+
+            query, n = text.re(r"\bmax_id:\d+").subn(
+                max_id, variables["rawQuery"])
+            if n:
+                variables["rawQuery"] = query
+            else:
+                variables["rawQuery"] = f"{query} {max_id}"
+
+            if prefix := self.extractor._cursor_prefix:
+                self.extractor._cursor_prefix = \
+                    f"{prefix.partition('_')[0]}_{tweet_id}/"
+            variables["cursor"] = None
+        except Exception as exc:
+            self.extractor.log.debug(
+                "Failed to update 'max_id' search query (%s: %s). Falling "
+                "back to 'cursor' pagination", exc.__class__.__name__, exc)
+            variables["cursor"] = self.extractor._update_cursor(cursor)
+
+        return variables
+
 
 @cache(maxage=365*86400, keyarg=1)
 def _login_impl(extr, username, password):
author	Unit 193 <unit193@unit193.net>	2025-09-16 02:12:49 -0400
committer	Unit 193 <unit193@unit193.net>	2025-09-16 02:12:49 -0400
commit	3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (patch)
tree	1009e66478f4f0a64324acd92e0cc8709eb5f90f /gallery_dl/extractor
parent	243b2597edb922fe7e0b0d887e80bb7ebbe72ab7 (diff)