New upstream version 1.28.5.upstream/1.28.5

author: Unit 193 <unit193@unit193.net> 2025-01-28 19:12:09 -0500
committer: Unit 193 <unit193@unit193.net> 2025-01-28 19:12:09 -0500
commit: a26df18796ff4e506b16bf32fcec9336233b9e2e (patch)
tree: 876512f59831cd670a90a0bc92bc85def6ea3d82 /gallery_dl/extractor
parent: 0532a387ef5b7fcb4507a9b094dca37a5f635fe1 (diff)
40 files changed, 606 insertions, 536 deletions
diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py
index 948a605..d198369 100644
--- a/gallery_dl/extractor/4archive.py
+++ b/gallery_dl/extractor/4archive.py
@@ -64,7 +64,7 @@ class _4archiveThreadExtractor(Extractor):
         data = {
             "name": extr('class="name">', "</span>"),
             "date": text.parse_datetime(
-                extr('class="dateTime postNum">', "<").strip(),
+                extr('class="dateTime postNum" >', "<").strip(),
                 "%Y-%m-%d %H:%M:%S"),
             "no"  : text.parse_int(extr('href="#p', '"')),
         }
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b582c99..fc8d7b2 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -39,7 +39,6 @@ modules = [
     "chevereto",
     "cien",
     "civitai",
-    "cohost",
     "comicvine",
     "cyberdrop",
     "danbooru",
@@ -52,7 +51,6 @@ modules = [
     "exhentai",
     "facebook",
     "fanbox",
-    "fanleaks",
     "fantia",
     "fapello",
     "fapachi",
@@ -116,6 +114,7 @@ modules = [
     "myportfolio",
     "naver",
     "naverwebtoon",
+    "nekohouse",
     "newgrounds",
     "nhentai",
     "nijie",
@@ -196,6 +195,7 @@ modules = [
     "wikiart",
     "wikifeet",
     "wikimedia",
+    "xfolio",
     "xhamster",
     "xvideos",
     "yiffverse",
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 1617414..c891b17 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -24,6 +24,9 @@ class AdultempireGalleryExtractor(GalleryExtractor):
         GalleryExtractor.__init__(self, match)
         self.gallery_id = match.group(2)
 
+    def _init(self):
+        self.cookies.set("ageConfirmed", "true", domain="www.adultempire.com")
+
     def metadata(self, page):
         extr = text.extract_from(page, page.index('<div id="content">'))
         return {
diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py
index 8064e78..0268224 100644
--- a/gallery_dl/extractor/architizer.py
+++ b/gallery_dl/extractor/architizer.py
@@ -32,10 +32,10 @@ class ArchitizerProjectExtractor(GalleryExtractor):
         extr('id="Pages"', "")
 
         return {
-            "title"      : extr('data-name="', '"'),
-            "slug"       : extr('data-slug="', '"'),
-            "gid"        : extr('data-gid="', '"').rpartition(".")[2],
-            "firm"       : extr('data-firm-leaders-str="', '"'),
+            "title"      : extr("data-name='", "'"),
+            "slug"       : extr("data-slug='", "'"),
+            "gid"        : extr("data-gid='", "'").rpartition(".")[2],
+            "firm"       : extr("data-firm-leaders-str='", "'"),
             "location"   : extr("<h2>", "<").strip(),
             "type"       : text.unescape(text.remove_html(extr(
                 '<div class="title">Type</div>', '<br'))),
@@ -54,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
         return [
             (url, None)
             for url in text.extract_iter(
-                page, 'property="og:image:secure_url" content="', "?")
+                page, "property='og:image:secure_url' content='", "?")
         ]
 
 
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index ce1a78d..f448710 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -11,8 +11,6 @@
 from .common import Extractor, Message
 from .. import text, util, exception
 import itertools
-import random
-import string
 
 
 class ArtstationExtractor(Extractor):
@@ -29,6 +27,9 @@ class ArtstationExtractor(Extractor):
         Extractor.__init__(self, match)
         self.user = match.group(1) or match.group(2)
 
+    def _init(self):
+        self.session.headers["Cache-Control"] = "max-age=0"
+
     def items(self):
         videos = self.config("videos", True)
         previews = self.config("previews", False)
@@ -172,7 +173,7 @@ class ArtstationExtractor(Extractor):
         ).json()["public_csrf_token"]
 
     @staticmethod
-    def _no_cache(url, alphabet=(string.digits + string.ascii_letters)):
+    def _no_cache(url):
         """Cause a cache miss to prevent Cloudflare 'optimizations'
 
         Cloudflare's 'Polish' optimization strips image metadata and may even
@@ -184,10 +185,9 @@ class ArtstationExtractor(Extractor):
         https://github.com/r888888888/danbooru/issues/3528
         https://danbooru.donmai.us/forum_topics/14952
         """
-        param = "gallerydl_no_cache=" + util.bencode(
-            random.getrandbits(64), alphabet)
         sep = "&" if "?" in url else "?"
-        return url + sep + param
+        token = util.generate_token(8)
+        return url + sep + token[:4] + "=" + token[4:]
 
 
 class ArtstationUserExtractor(ArtstationExtractor):
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 77c40ef..4d192a4 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -67,6 +67,7 @@ class BatotoBase():
 
 class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
     """Extractor for batoto manga chapters"""
+    archive_fmt = "{chapter_id}_{page}"
     pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
     example = "https://xbato.org/title/12345-MANGA/54321"
 
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index e1ee50d..25e9fd5 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -70,6 +70,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             self.root = "https://" + domain
 
     def request(self, url, **kwargs):
+        kwargs["encoding"] = "utf-8"
         kwargs["allow_redirects"] = False
 
         while True:
@@ -114,8 +115,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
 
     def fetch_album(self, album_id):
         # album metadata
-        page = self.request(
-            self.root + "/a/" + album_id, encoding="utf-8").text
+        page = self.request(self.root + "/a/" + album_id).text
         title = text.unescape(text.unescape(text.extr(
             page, 'property="og:title" content="', '"')))
 
@@ -140,7 +140,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
 
                 file = self._extract_file(url)
                 info = text.split_html(item)
-                file["name"] = info[-3]
+                if not file["name"]:
+                    file["name"] = info[-3]
                 file["size"] = info[-2]
                 file["date"] = text.parse_datetime(
                     info[-1], "%H:%M:%S %d/%m/%Y")
@@ -157,6 +158,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
         page = response.text
         file_url = (text.extr(page, '<source src="', '"') or
                     text.extr(page, '<img src="', '"'))
+        file_name = (text.extr(page, 'property="og:title" content="', '"') or
+                     text.extr(page, "<title>", " | Bunkr<"))
 
         if not file_url:
             webpage_url = text.unescape(text.rextract(
@@ -166,6 +169,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
 
         return {
             "file"          : text.unescape(file_url),
+            "name"          : text.unescape(file_name),
             "_http_headers" : {"Referer": response.url},
             "_http_validate": self._validate,
         }
diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py
deleted file mode 100644
index 6a43224..0000000
--- a/gallery_dl/extractor/cohost.py
+++ /dev/null
@@ -1,250 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2024 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://cohost.org/"""
-
-from .common import Extractor, Message
-from .. import text, util
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?cohost\.org"
-
-
-class CohostExtractor(Extractor):
-    """Base class for cohost extractors"""
-    category = "cohost"
-    root = "https://cohost.org"
-    directory_fmt = ("{category}", "{postingProject[handle]}")
-    filename_fmt = ("{postId}{headline:?_//[b:200]}{num:?_//}.{extension}")
-    archive_fmt = "{postId}_{num}"
-
-    def _init(self):
-        self.replies = self.config("replies", True)
-        self.pinned = self.config("pinned", False)
-        self.shares = self.config("shares", False)
-        self.asks = self.config("asks", True)
-
-        self.avatar = self.config("avatar", False)
-        if self.avatar:
-            self._urls_avatar = {None, ""}
-
-        self.background = self.config("background", False)
-        if self.background:
-            self._urls_background = {None, ""}
-
-    def items(self):
-        for post in self.posts():
-            reason = post.get("limitedVisibilityReason")
-            if reason and reason != "none":
-                if reason == "log-in-first":
-                    reason = ("This page's posts are visible only to users "
-                              "who are logged in.")
-                self.log.warning('%s: "%s"', post["postId"], reason)
-
-            files = self._extract_files(post)
-            post["count"] = len(files)
-            post["date"] = text.parse_datetime(
-                post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
-
-            yield Message.Directory, post
-
-            project = post["postingProject"]
-            if self.avatar:
-                url = project.get("avatarURL")
-                if url not in self._urls_avatar:
-                    self._urls_avatar.add(url)
-                    p = post.copy()
-                    p["postId"] = p["kind"] = "avatar"
-                    p["headline"] = p["num"] = ""
-                    yield Message.Url, url, text.nameext_from_url(url, p)
-
-            if self.background:
-                url = project.get("headerURL")
-                if url not in self._urls_background:
-                    self._urls_background.add(url)
-                    p = post.copy()
-                    p["postId"] = p["kind"] = "background"
-                    p["headline"] = p["num"] = ""
-                    yield Message.Url, url, text.nameext_from_url(url, p)
-
-            for post["num"], file in enumerate(files, 1):
-                url = file["fileURL"]
-                post.update(file)
-                text.nameext_from_url(url, post)
-                yield Message.Url, url, post
-
-    def posts(self):
-        return ()
-
-    def _request_api(self, endpoint, input):
-        url = "{}/api/v1/trpc/{}".format(self.root, endpoint)
-        params = {"batch": "1", "input": util.json_dumps({"0": input})}
-        headers = {"content-type": "application/json"}
-
-        data = self.request(url, params=params, headers=headers).json()
-        return data[0]["result"]["data"]
-
-    def _extract_files(self, post):
-        files = []
-
-        self._extract_blocks(post, files)
-        if self.shares and post.get("shareTree"):
-            for share in post["shareTree"]:
-                self._extract_blocks(share, files, share)
-            del post["shareTree"]
-
-        return files
-
-    def _extract_blocks(self, post, files, shared=None):
-        post["content"] = content = []
-
-        for block in post.pop("blocks") or ():
-            try:
-                type = block["type"]
-                if type == "attachment":
-                    file = block["attachment"].copy()
-                    file["shared"] = shared
-                    files.append(file)
-                elif type == "attachment-row":
-                    for att in block["attachments"]:
-                        file = att["attachment"].copy()
-                        file["shared"] = shared
-                        files.append(file)
-                elif type == "markdown":
-                    content.append(block["markdown"]["content"])
-                elif type == "ask":
-                    post["ask"] = block["ask"]
-                else:
-                    self.log.debug("%s: Unsupported block type '%s'",
-                                   post["postId"], type)
-            except Exception as exc:
-                self.log.debug("%s: %s", exc.__class__.__name__, exc)
-
-
-class CohostUserExtractor(CohostExtractor):
-    """Extractor for media from a cohost user"""
-    subcategory = "user"
-    pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:$|\?|#)"
-    example = "https://cohost.org/USER"
-
-    def posts(self):
-        empty = 0
-        params = {
-            "projectHandle": self.groups[0],
-            "page": 0,
-            "options": {
-                "pinnedPostsAtTop"    : True if self.pinned else False,
-                "hideReplies"         : not self.replies,
-                "hideShares"          : not self.shares,
-                "hideAsks"            : not self.asks,
-                "viewingOnProjectPage": True,
-            },
-        }
-
-        while True:
-            data = self._request_api("posts.profilePosts", params)
-
-            posts = data["posts"]
-            if posts:
-                empty = 0
-                yield from posts
-            else:
-                empty += 1
-
-            pagination = data["pagination"]
-            if not pagination.get("morePagesForward"):
-                return
-            if empty >= 3:
-                return self.log.debug("Empty API results")
-            params["page"] = pagination["nextPage"]
-
-
-class CohostPostExtractor(CohostExtractor):
-    """Extractor for media from a single cohost post"""
-    subcategory = "post"
-    pattern = BASE_PATTERN + r"/([^/?#]+)/post/(\d+)"
-    example = "https://cohost.org/USER/post/12345"
-
-    def posts(self):
-        endpoint = "posts.singlePost"
-        params = {
-            "handle": self.groups[0],
-            "postId": int(self.groups[1]),
-        }
-
-        data = self._request_api(endpoint, params)
-        post = data["post"]
-
-        try:
-            post["comments"] = data["comments"][self.groups[1]]
-        except LookupError:
-            post["comments"] = ()
-
-        return (post,)
-
-
-class CohostTagExtractor(CohostExtractor):
-    """Extractor for tagged posts"""
-    subcategory = "tag"
-    pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?"
-    example = "https://cohost.org/USER/tagged/TAG"
-
-    def posts(self):
-        user, tag, query = self.groups
-        url = "{}/{}/tagged/{}".format(self.root, user, tag)
-        params = text.parse_query(query)
-        post_feed_key = ("tagged-post-feed" if user == "rc" else
-                         "project-tagged-post-feed")
-
-        while True:
-            page = self.request(url, params=params).text
-            data = util.json_loads(text.extr(
-                page, 'id="__COHOST_LOADER_STATE__">', '</script>'))
-
-            try:
-                feed = data[post_feed_key]
-            except KeyError:
-                feed = data.popitem()[1]
-
-            yield from feed["posts"]
-
-            pagination = feed["paginationMode"]
-            if not pagination.get("morePagesForward"):
-                return
-            params["refTimestamp"] = pagination["refTimestamp"]
-            params["skipPosts"] = \
-                pagination["currentSkip"] + pagination["idealPageStride"]
-
-
-class CohostLikesExtractor(CohostExtractor):
-    """Extractor for liked posts"""
-    subcategory = "likes"
-    pattern = BASE_PATTERN + r"/rc/liked-posts"
-    example = "https://cohost.org/rc/liked-posts"
-
-    def posts(self):
-        url = "{}/rc/liked-posts".format(self.root)
-        params = {}
-
-        while True:
-            page = self.request(url, params=params).text
-            data = util.json_loads(text.extr(
-                page, 'id="__COHOST_LOADER_STATE__">', '</script>'))
-
-            try:
-                feed = data["liked-posts-feed"]
-            except KeyError:
-                feed = data.popitem()[1]
-
-            yield from feed["posts"]
-
-            pagination = feed["paginationMode"]
-            if not pagination.get("morePagesForward"):
-                return
-            params["refTimestamp"] = pagination["refTimestamp"]
-            params["skipPosts"] = \
-                pagination["currentSkip"] + pagination["idealPageStride"]
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 37b6747..d0a9397 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -32,7 +32,7 @@ class DanbooruExtractor(BaseExtractor):
         if isinstance(threshold, int):
             self.threshold = 1 if threshold < 1 else threshold
         else:
-            self.threshold = self.per_page
+            self.threshold = self.per_page - 20
 
         username, api_key = self._get_auth_info()
         if username:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 8172f62..59b2d6d 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -822,7 +822,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
                 username, folder["gallery_id"], public=False):
             cache[dev["deviationid"]] = dev if has_access else None
 
-        return cache[deviation["deviationid"]]
+        return cache.get(deviation["deviationid"])
 
     def _unwatch_premium(self):
         for username in self.unwatch:
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 33e6ba8..eddcb12 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -8,7 +8,7 @@
 
 """Extractors for https://e621.net/ and other e621 instances"""
 
-from .common import Message
+from .common import Extractor, Message
 from . import danbooru
 from ..cache import memcache
 from .. import text, util
@@ -156,3 +156,20 @@ class E621FavoriteExtractor(E621Extractor):
 
     def posts(self):
         return self._pagination("/favorites.json", self.query)
+
+
+class E621FrontendExtractor(Extractor):
+    """Extractor for alternative e621 frontends"""
+    basecategory = "E621"
+    category = "e621"
+    subcategory = "frontend"
+    pattern = r"(?:https?://)?e621\.(?:cc/\?tags|anthro\.fr/\?q)=([^&#]*)"
+    example = "https://e621.cc/?tags=TAG"
+
+    def initialize(self):
+        pass
+
+    def items(self):
+        url = "https://e621.net/posts?tags=" + self.groups[0]
+        data = {"_extractor": E621TagExtractor}
+        yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index 2f3fdbf..1ec6adc 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -238,8 +238,9 @@ class FacebookExtractor(Extractor):
 
         return res
 
-    def extract_set(self, first_photo_id, set_id):
-        all_photo_ids = [first_photo_id]
+    def extract_set(self, set_data):
+        set_id = set_data["set_id"]
+        all_photo_ids = [set_data["first_photo_id"]]
 
         retries = 0
         i = 0
@@ -252,7 +253,6 @@ class FacebookExtractor(Extractor):
             photo_page = self.photo_page_request_wrapper(photo_url).text
 
             photo = self.parse_photo_page(photo_page)
-            photo["set_id"] = set_id
             photo["num"] = i + 1
 
             if self.author_followups:
@@ -281,9 +281,11 @@ class FacebookExtractor(Extractor):
                     retries = 0
             else:
                 retries = 0
+                photo.update(set_data)
+                yield Message.Directory, photo
                 yield Message.Url, photo["url"], photo
 
-            if photo["next_photo_id"] == "":
+            if not photo["next_photo_id"]:
                 self.log.debug(
                     "Can't find next image in the set. "
                     "Extraction is over."
@@ -322,15 +324,11 @@ class FacebookSetExtractor(FacebookExtractor):
 
         set_url = self.set_url_fmt.format(set_id=set_id)
         set_page = self.request(set_url).text
+        set_data = self.parse_set_page(set_page)
+        if self.groups[2]:
+            set_data["first_photo_id"] = self.groups[2]
 
-        directory = self.parse_set_page(set_page)
-
-        yield Message.Directory, directory
-
-        yield from self.extract_set(
-            self.groups[2] or directory["first_photo_id"],
-            directory["set_id"]
-        )
+        return self.extract_set(set_data)
 
 
 class FacebookPhotoExtractor(FacebookExtractor):
@@ -436,13 +434,8 @@ class FacebookProfileExtractor(FacebookExtractor):
         if set_id:
             set_url = self.set_url_fmt.format(set_id=set_id)
             set_page = self.request(set_url).text
+            set_data = self.parse_set_page(set_page)
+            return self.extract_set(set_data)
 
-            directory = self.parse_set_page(set_page)
-
-            yield Message.Directory, directory
-
-            yield from self.extract_set(
-                directory["first_photo_id"], directory["set_id"]
-            )
-        else:
-            self.log.debug("Profile photos set ID not found.")
+        self.log.debug("Profile photos set ID not found.")
+        return iter(())
diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py
deleted file mode 100644
index 886e893..0000000
--- a/gallery_dl/extractor/fanleaks.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://fanleaks.club/"""
-
-from .common import Extractor, Message
-from .. import text
-
-
-class FanleaksExtractor(Extractor):
-    """Base class for Fanleaks extractors"""
-    category = "fanleaks"
-    directory_fmt = ("{category}", "{model}")
-    filename_fmt = "{model_id}_{id}.{extension}"
-    archive_fmt = "{model_id}_{id}"
-    root = "https://fanleaks.club"
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.model_id = match.group(1)
-
-    def extract_post(self, url):
-        extr = text.extract_from(self.request(url, notfound="post").text)
-        data = {
-            "model_id": self.model_id,
-            "model"   : text.unescape(extr('text-lg">', "</a>")),
-            "id"      : text.parse_int(self.id),
-            "type"    : extr('type="', '"')[:5] or "photo",
-        }
-        url = extr('src="', '"')
-        yield Message.Directory, data
-        yield Message.Url, url, text.nameext_from_url(url, data)
-
-
-class FanleaksPostExtractor(FanleaksExtractor):
-    """Extractor for individual posts on fanleaks.club"""
-    subcategory = "post"
-    pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
-    example = "https://fanleaks.club/MODEL/12345"
-
-    def __init__(self, match):
-        FanleaksExtractor.__init__(self, match)
-        self.id = match.group(2)
-
-    def items(self):
-        url = "{}/{}/{}".format(self.root, self.model_id, self.id)
-        return self.extract_post(url)
-
-
-class FanleaksModelExtractor(FanleaksExtractor):
-    """Extractor for all posts from a fanleaks model"""
-    subcategory = "model"
-    pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
-               r"/(?!latest/?$)([^/?#]+)/?$")
-    example = "https://fanleaks.club/MODEL"
-
-    def items(self):
-        page_num = 1
-        page = self.request(
-            self.root + "/" + self.model_id, notfound="model").text
-        data = {
-            "model_id": self.model_id,
-            "model"   : text.unescape(text.extr(page, 'mt-4">', "</h1>")),
-            "type"    : "photo",
-        }
-        page_url = text.extr(page, "url: '", "'")
-        while True:
-            page = self.request("{}{}".format(page_url, page_num)).text
-            if not page:
-                return
-
-            for item in text.extract_iter(page, '<a href="/', "</a>"):
-                self.id = id = text.extr(item, "/", '"')
-                if "/icon-play.svg" in item:
-                    url = "{}/{}/{}".format(self.root, self.model_id, id)
-                    yield from self.extract_post(url)
-                    continue
-
-                data["id"] = text.parse_int(id)
-                url = text.extr(item, 'src="', '"').replace(
-                    "/thumbs/", "/", 1)
-                yield Message.Directory, data
-                yield Message.Url, url, text.nameext_from_url(url, data)
-            page_num += 1
diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py
index 80478ca..43627e2 100644
--- a/gallery_dl/extractor/fapachi.py
+++ b/gallery_dl/extractor/fapachi.py
@@ -33,7 +33,8 @@ class FapachiPostExtractor(Extractor):
         }
         page = self.request("{}/{}/media/{}".format(
             self.root, self.user, self.id)).text
-        url = self.root + text.extr(page, 'd-block" src="', '"')
+        url = self.root + text.extract(
+            page, 'data-src="', '"', page.index('class="media-img'))[0]
         yield Message.Directory, data
         yield Message.Url, url, text.nameext_from_url(url, data)
 
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index c939a3c..f15aab7 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -6,7 +6,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://hipertoon.com/"""
+"""Extractors for https://hiperdex.com/"""
 
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
@@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
 class HiperdexBase():
     """Base class for hiperdex extractors"""
     category = "hiperdex"
-    root = "https://hipertoon.com"
+    root = "https://hiperdex.com"
 
     @memcache(keyarg=1)
     def manga_data(self, manga, page=None):
@@ -49,7 +49,7 @@ class HiperdexBase():
             "status" : extr(
                 'class="summary-content">', '<').strip(),
             "description": text.remove_html(text.unescape(extr(
-                "Summary					</h5>", "</div>"))),
+                '<div class="description-summary">', "</div>"))),
             "language": "English",
             "lang"    : "en",
         }
@@ -69,7 +69,7 @@ class HiperdexBase():
 class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
     """Extractor for hiperdex manga chapters"""
     pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
-    example = "https://hipertoon.com/manga/MANGA/CHAPTER/"
+    example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
 
     def __init__(self, match):
         root, path, self.manga, self.chapter = match.groups()
@@ -91,7 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
     """Extractor for hiperdex manga"""
     chapterclass = HiperdexChapterExtractor
     pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
-    example = "https://hipertoon.com/manga/MANGA/"
+    example = "https://hiperdex.com/manga/MANGA/"
 
     def __init__(self, match):
         root, path, self.manga = match.groups()
@@ -127,7 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
     chapterclass = HiperdexMangaExtractor
     reverse = False
     pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
-    example = "https://hipertoon.com/manga-artist/NAME/"
+    example = "https://hiperdex.com/manga-artist/NAME/"
 
     def __init__(self, match):
         self.root = text.ensure_http_scheme(match.group(1))
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 5f1e0f4..d6b36cb 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -286,6 +286,34 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
         return url, url
 
 
+class TurboimagehostGalleryExtractor(ImagehostImageExtractor):
+    """Extractor for image galleries from turboimagehost.com"""
+    category = "turboimagehost"
+    subcategory = "gallery"
+    pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
+               r"/album/(\d+)/([^/?#]*))")
+    example = "https://www.turboimagehost.com/album/12345/GALLERY_NAME"
+
+    def items(self):
+        data = {"_extractor": TurboimagehostImageExtractor}
+        params = {"p": 1}
+
+        while True:
+            page = self.request(self.page_url, params=params).text
+
+            if params["p"] == 1 and \
+                    "Requested gallery don`t exist on our website." in page:
+                raise exception.NotFoundError("gallery")
+
+            thumb_url = None
+            for thumb_url in text.extract_iter(page, '"><a href="', '"'):
+                yield Message.Queue, thumb_url, data
+            if thumb_url is None:
+                return
+
+            params["p"] += 1
+
+
 class ViprImageExtractor(ImagehostImageExtractor):
     """Extractor for single images from vipr.im"""
     category = "vipr"
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 54c6539..b900113 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -54,26 +54,30 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
 class IssuuUserExtractor(IssuuBase, Extractor):
     """Extractor for all publications of a user/publisher"""
     subcategory = "user"
-    pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
+    pattern = r"(?:https?://)?issuu\.com/([^/?#]+)(?:/(\d*))?$"
     example = "https://issuu.com/USER"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.user = match.group(1)
-
     def items(self):
-        url = "{}/call/profile/v1/documents/{}".format(self.root, self.user)
-        params = {"offset": 0, "limit": "25"}
+        user, pnum = self.groups
+        base = self.root + "/" + user
+        pnum = text.parse_int(pnum, 1)
 
         while True:
-            data = self.request(url, params=params).json()
+            url = base + "/" + str(pnum) if pnum > 1 else base
+            try:
+                html = self.request(url).text
+                data = util.json_loads(text.unescape(text.extr(
+                    html, '</main></div><script data-json="', '" id="')))
+                docs = data["docs"]
+            except Exception as exc:
+                self.log.debug("", exc_info=exc)
+                return
 
-            for publication in data["items"]:
-                publication["url"] = "{}/{}/docs/{}".format(
-                    self.root, self.user, publication["uri"])
+            for publication in docs:
+                url = self.root + "/" + publication["uri"]
                 publication["_extractor"] = IssuuPublicationExtractor
-                yield Message.Queue, publication["url"], publication
+                yield Message.Queue, url, publication
 
-            if not data["hasMore"]:
+            if len(docs) < 48:
                 return
-            params["offset"] += data["limit"]
+            pnum += 1
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 66bbab5..788b5d9 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -54,26 +54,19 @@ class KemonopartyExtractor(Extractor):
             sort_keys=True, separators=(",", ":")).encode
 
     def items(self):
-        service = self.groups[2]
-        creator_id = self.groups[3]
-
         find_hash = re.compile(HASH_PATTERN).match
         generators = self._build_file_generators(self.config("files"))
         announcements = True if self.config("announcements") else None
         comments = True if self.config("comments") else False
         duplicates = True if self.config("duplicates") else False
         dms = True if self.config("dms") else None
-        profile = username = None
+        max_posts = self.config("max-posts")
+        creator_info = {} if self.config("metadata") else None
 
         # prevent files from being sent with gzip compression
         headers = {"Accept-Encoding": "identity"}
 
-        if self.config("metadata"):
-            profile = self.api.creator_profile(service, creator_id)
-            username = profile["name"]
-
         posts = self.posts()
-        max_posts = self.config("max-posts")
         if max_posts:
             posts = itertools.islice(posts, max_posts)
         if self.revisions:
@@ -85,10 +78,20 @@ class KemonopartyExtractor(Extractor):
             post["_http_headers"] = headers
             post["date"] = self._parse_datetime(
                 post.get("published") or post.get("added") or "")
+            service = post["service"]
+            creator_id = post["user"]
+
+            if creator_info is not None:
+                key = "{}_{}".format(service, creator_id)
+                if key not in creator_info:
+                    creator = creator_info[key] = self.api.creator_profile(
+                        service, creator_id)
+                else:
+                    creator = creator_info[key]
+
+                post["user_profile"] = creator
+                post["username"] = creator["name"]
 
-            if profile is not None:
-                post["username"] = username
-                post["user_profile"] = profile
             if comments:
                 try:
                     post["comments"] = self.api.creator_post_comments(
@@ -171,7 +174,7 @@ class KemonopartyExtractor(Extractor):
             try:
                 msg = '"' + response.json()["error"] + '"'
             except Exception:
-                msg = '"0/1 Username or password is incorrect"'
+                msg = '"Username or password is incorrect"'
             raise exception.AuthenticationError(msg)
 
         return {c.name: c.value for c in response.cookies}
@@ -296,8 +299,12 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
     def posts(self):
         _, _, service, creator_id, query = self.groups
         params = text.parse_query(query)
-        return self.api.creator_posts(
-            service, creator_id, params.get("o"), params.get("q"))
+        if params.get("tag"):
+            return self.api.creator_tagged_posts(
+                service, creator_id, params.get("tag"), params.get("o"))
+        else:
+            return self.api.creator_posts(
+                service, creator_id, params.get("o"), params.get("q"))
 
 
 class KemonopartyPostsExtractor(KemonopartyExtractor):
@@ -493,7 +500,7 @@ class KemonoAPI():
 
     def posts(self, offset=0, query=None, tags=None):
         endpoint = "/posts"
-        params = {"q": query, "o": offset, "tags": tags}
+        params = {"q": query, "o": offset, "tag": tags}
         return self._pagination(endpoint, params, 50, "posts")
 
     def creator_posts(self, service, creator_id, offset=0, query=None):
@@ -501,6 +508,11 @@ class KemonoAPI():
         params = {"q": query, "o": offset}
         return self._pagination(endpoint, params, 50)
 
+    def creator_tagged_posts(self, service, creator_id, tags, offset=0):
+        endpoint = "/{}/user/{}/posts-legacy".format(service, creator_id)
+        params = {"o": offset, "tag": tags}
+        return self._pagination(endpoint, params, 50, "results")
+
     def creator_announcements(self, service, creator_id):
         endpoint = "/{}/user/{}/announcements".format(service, creator_id)
         return self._call(endpoint)
@@ -565,9 +577,10 @@ class KemonoAPI():
             data = self._call(endpoint, params)
 
             if key:
-                yield from data[key]
-            else:
-                yield from data
+                data = data.get(key)
+            if not data:
+                return
+            yield from data
 
             if len(data) < batch:
                 return
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index d0c9c30..e779e97 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -36,22 +36,36 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
 
         data = self.metadata(page)
         yield Message.Directory, data
-        for track in self.tracks(page):
+
+        if self.config("covers", False):
+            for num, url in enumerate(self._extract_covers(page), 1):
+                cover = text.nameext_from_url(
+                    url, {"url": url, "num": num, "type": "cover"})
+                cover.update(data)
+                yield Message.Url, url, cover
+
+        for track in self._extract_tracks(page):
             track.update(data)
+            track["type"] = "track"
             yield Message.Url, track["url"], track
 
     def metadata(self, page):
         extr = text.extract_from(page)
         return {"album": {
             "name" : text.unescape(extr("<h2>", "<")),
-            "platform": extr("Platforms: <a", "<").rpartition(">")[2],
+            "platform": text.split_html(extr("Platforms: ", "<br>"))[::2],
+            "year": extr("Year: <b>", "<"),
+            "catalog": extr("Catalog Number: <b>", "<"),
+            "developer": text.remove_html(extr(" Developed by: ", "</")),
+            "publisher": text.remove_html(extr(" Published by: ", "</")),
             "count": text.parse_int(extr("Number of Files: <b>", "<")),
             "size" : text.parse_bytes(extr("Total Filesize: <b>", "<")[:-1]),
             "date" : extr("Date Added: <b>", "<"),
             "type" : text.remove_html(extr("Album type: <b>", "</b>")),
+            "uploader": text.remove_html(extr("Uploaded by: ", "</")),
         }}
 
-    def tracks(self, page):
+    def _extract_tracks(self, page):
         fmt = self.config("format", ("mp3",))
         if fmt and isinstance(fmt, str):
             if fmt == "all":
@@ -75,3 +89,9 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
                     yield track
             if first:
                 yield first
+
+    def _extract_covers(self, page):
+        return [
+            text.unescape(text.extr(cover, ' href="', '"'))
+            for cover in text.extract_iter(page, ' class="albumImage', '</')
+        ]
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index e39e272..89a1b5e 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -6,19 +6,20 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://komikcast.cz/"""
+"""Extractors for https://komikcast.la/"""
 
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
 import re
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:cz|lol|site|mo?e|com)"
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
+                r"komikcast\.(?:la|cz|lol|site|mo?e|com)")
 
 
 class KomikcastBase():
     """Base class for komikcast extractors"""
     category = "komikcast"
-    root = "https://komikcast.cz"
+    root = "https://komikcast.la"
 
     @staticmethod
     def parse_chapter_string(chapter_string, data=None):
@@ -48,7 +49,7 @@ class KomikcastBase():
 class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
     """Extractor for komikcast manga chapters"""
     pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
-    example = "https://komikcast.cz/chapter/TITLE/"
+    example = "https://komikcast.la/chapter/TITLE/"
 
     def metadata(self, page):
         info = text.extr(page, "<title>", " - Komikcast<")
@@ -68,7 +69,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
     """Extractor for komikcast manga"""
     chapterclass = KomikcastChapterExtractor
     pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
-    example = "https://komikcast.cz/komik/TITLE"
+    example = "https://komikcast.la/komik/TITLE"
 
     def chapters(self, page):
         results = []
diff --git a/gallery_dl/extractor/lofter.py b/gallery_dl/extractor/lofter.py
index 412b6b9..b92a6ff 100644
--- a/gallery_dl/extractor/lofter.py
+++ b/gallery_dl/extractor/lofter.py
@@ -23,6 +23,8 @@ class LofterExtractor(Extractor):
 
     def items(self):
         for post in self.posts():
+            if post is None:
+                continue
             if "post" in post:
                 post = post["post"]
 
@@ -129,6 +131,9 @@ class LofterAPI():
             url, method="POST", params=params, data=data)
         info = response.json()
 
+        if info["meta"]["status"] == 4200:
+            raise exception.NotFoundError("blog")
+
         if info["meta"]["status"] != 200:
             self.extractor.log.debug("Server response: %s", info)
             raise exception.StopExtraction("API request failed")
@@ -142,6 +147,9 @@ class LofterAPI():
 
             yield from posts
 
+            if data["offset"] < 0:
+                break
+
             if params["offset"] + len(posts) < data["offset"]:
                 break
             params["offset"] = data["offset"]
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 295b9c4..6a9f633 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -53,7 +53,14 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
             if "name" in file:
                 name = file["name"]
                 file["name"] = name.rpartition(".")[0] or name
-                file["id"] = file["filename"].rpartition("-")[2]
+                _, sep, fid = file["filename"].rpartition("-")
+                if not sep or len(fid) == 12:
+                    if "id" not in file:
+                        file["id"] = ""
+                    file["filename"] = file["name"]
+                else:
+                    file["id"] = fid
+                    file["filename"] = file["name"] + "-" + fid
             elif "id" in file:
                 file["name"] = file["filename"]
                 file["filename"] = "{}-{}".format(file["name"], file["id"])
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index d590753..827756a 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -30,7 +30,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
     def metadata(self, page):
         manga, pos = text.extract(page, "<title>", "</title>")
         count, pos = text.extract(
-            page, ">", "<", page.find("</select>", pos) - 20)
+            page, ">", "<", page.find("</select>", pos) - 40)
         sid  , pos = text.extract(page, "var series_id =", ";", pos)
         cid  , pos = text.extract(page, "var chapter_id =", ";", pos)
 
@@ -49,9 +49,9 @@ class MangafoxChapterExtractor(ChapterExtractor):
         pnum = 1
         while True:
             url, pos = text.extract(page, '<img src="', '"')
-            yield text.ensure_http_scheme(url), None
+            yield text.ensure_http_scheme(text.unescape(url)), None
             url, pos = text.extract(page, ' src="', '"', pos)
-            yield text.ensure_http_scheme(url), None
+            yield text.ensure_http_scheme(text.unescape(url)), None
 
             pnum += 2
             page = self.request("{}/{}.html".format(self.urlbase, pnum)).text
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index e8ee861..8c94f04 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -37,7 +37,7 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
 
     def metadata(self, page):
         pos = page.index("</select>")
-        count     , pos = text.extract(page, ">", "<", pos - 20)
+        count     , pos = text.extract(page, ">", "<", pos - 40)
         manga_id  , pos = text.extract(page, "series_id = ", ";", pos)
         chapter_id, pos = text.extract(page, "chapter_id = ", ";", pos)
         manga     , pos = text.extract(page, '"name":"', '"', pos)
@@ -61,9 +61,9 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
 
         while True:
             url, pos = text.extract(page, '<img src="', '"')
-            yield text.ensure_http_scheme(url), None
+            yield text.ensure_http_scheme(text.unescape(url)), None
             url, pos = text.extract(page, ' src="', '"', pos)
-            yield text.ensure_http_scheme(url), None
+            yield text.ensure_http_scheme(text.unescape(url)), None
             pnum += 2
             page = self.request(self.url_fmt.format(self.part, pnum)).text
 
diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py
index 4b017dc..6970b4f 100644
--- a/gallery_dl/extractor/mangaread.py
+++ b/gallery_dl/extractor/mangaread.py
@@ -92,9 +92,9 @@ class MangareadMangaExtractor(MangareadBase, MangaExtractor):
             "genres"     : list(text.extract_iter(
                 extr('class="genres-content">', "</div>"), '"tag">', "</a>")),
             "type"       : text.remove_html(
-                extr("Type </h5>\n</div>", "</div>")),
+                extr("	Type	", "\n</div>")),
             "release"    : text.parse_int(text.remove_html(
-                extr("Release </h5>\n</div>", "</div>"))),
+                extr("	Release	", "\n</div>"))),
             "status"     : text.remove_html(
-                extr("Status </h5>\n</div>", "</div>")),
+                extr("	Status	", "\n</div>")),
         }
diff --git a/gallery_dl/extractor/nekohouse.py b/gallery_dl/extractor/nekohouse.py
new file mode 100644
index 0000000..fe9d512
--- /dev/null
+++ b/gallery_dl/extractor/nekohouse.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://nekohouse.su/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?nekohouse\.su"
+USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
+
+
+class NekohouseExtractor(Extractor):
+    """Base class for nekohouse extractors"""
+    category = "nekohouse"
+    root = "https://nekohouse.su"
+
+
+class NekohousePostExtractor(NekohouseExtractor):
+    subcategory = "post"
+    directory_fmt = ("{category}", "{service}", "{username} ({user_id})",
+                     "{post_id} {date} {title[b:230]}")
+    filename_fmt = "{num:>02} {id|filename}.{extension}"
+    archive_fmt = "{service}_{user_id}_{post_id}_{hash}"
+    pattern = USER_PATTERN + r"/post/([^/?#]+)"
+    example = "https://nekohouse.su/SERVICE/user/12345/post/12345"
+
+    def items(self):
+        service, user_id, post_id = self.groups
+        url = "{}/{}/user/{}/post/{}".format(
+            self.root, service, user_id, post_id)
+        html = self.request(url).text
+
+        files = self._extract_files(html)
+        post = self._extract_post(html)
+        post["service"] = service
+        post["user_id"] = user_id
+        post["post_id"] = post_id
+        post["count"] = len(files)
+
+        yield Message.Directory, post
+        for post["num"], file in enumerate(files, 1):
+            url = file["url"]
+            text.nameext_from_url(url, file)
+            file["hash"] = file["filename"]
+            file.update(post)
+            if "name" in file:
+                text.nameext_from_url(file.pop("name"), file)
+            yield Message.Url, url, file
+
+    def _extract_post(self, html):
+        extr = text.extract_from(html)
+        return {
+            "username": text.unescape(extr(
+                'class="scrape__user-name', '</').rpartition(">")[2].strip()),
+            "title"   : text.unescape(extr(
+                'class="scrape__title', '</').rpartition(">")[2]),
+            "date"   : text.parse_datetime(extr(
+                'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
+            "content": text.unescape(extr(
+                'class="scrape__content">', "</div>").strip()),
+        }
+
+    def _extract_files(self, html):
+        files = []
+
+        extr = text.extract_from(text.extr(
+            html, 'class="scrape__files"', "<footer"))
+        while True:
+            file_id = extr('<a href="/post/', '"')
+            if not file_id:
+                break
+            files.append({
+                "id"  : file_id,
+                "url" : self.root + extr('href="', '"'),
+                "type": "file",
+            })
+
+        extr = text.extract_from(text.extr(
+            html, 'class="scrape__attachments"', "</ul>"))
+        while True:
+            url = extr('href="', '"')
+            if not url:
+                break
+            files.append({
+                "id"  : "",
+                "url" : self.root + url,
+                "name": text.unescape(extr('download="', '"')),
+                "type": "attachment",
+            })
+
+        return files
+
+
+class NekohouseUserExtractor(NekohouseExtractor):
+    subcategory = "user"
+    pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)"
+    example = "https://nekohouse.su/SERVICE/user/12345"
+
+    def items(self):
+        service, user_id, _ = self.groups
+        creator_url = "{}/{}/user/{}".format(self.root, service, user_id)
+        params = {"o": 0}
+
+        data = {"_extractor": NekohousePostExtractor}
+        while True:
+            html = self.request(creator_url, params=params).text
+
+            cnt = 0
+            for post in text.extract_iter(html, "<article", "</article>"):
+                cnt += 1
+                post_url = self.root + text.extr(post, '<a href="', '"')
+                yield Message.Queue, post_url, data
+
+            if cnt < 50:
+                return
+            params["o"] += 50
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index d3e40ee..7fe8869 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -27,8 +27,10 @@ class PixivExtractor(Extractor):
     filename_fmt = "{id}_p{num}.{extension}"
     archive_fmt = "{id}{suffix}.{extension}"
     cookies_domain = ".pixiv.net"
-    sanity_url = "https://s.pximg.net/common/images/limit_sanity_level_360.png"
-    mypixiv_url = "https://s.pximg.net/common/images/limit_mypixiv_360.png"
+    limit_url = "https://s.pximg.net/common/images/limit_"
+    # https://s.pximg.net/common/images/limit_sanity_level_360.png
+    # https://s.pximg.net/common/images/limit_unviewable_360.png
+    # https://s.pximg.net/common/images/limit_mypixiv_360.png
 
     def _init(self):
         self.api = PixivAppAPI(self)
@@ -117,16 +119,30 @@ class PixivExtractor(Extractor):
             ]
 
         url = meta_single_page["original_image_url"]
-        if url == self.sanity_url:
-            work["_ajax"] = True
-            self.log.warning("%s: 'limit_sanity_level' warning", work["id"])
-            if self.sanity_workaround:
-                body = self._request_ajax("/illust/" + str(work["id"]))
-                return self._extract_ajax(work, body)
+        if url.startswith(self.limit_url):
+            work_id = work["id"]
+            self.log.debug("%s: %s", work_id, url)
+
+            limit_type = url.rpartition("/")[2]
+            if limit_type in (
+                "limit_",  # for '_extend_sanity()' inserts
+                "limit_unviewable_360.png",
+                "limit_sanity_level_360.png",
+            ):
+                work["_ajax"] = True
+                self.log.warning("%s: 'limit_sanity_level' warning", work_id)
+                if self.sanity_workaround:
+                    body = self._request_ajax("/illust/" + str(work_id))
+                    return self._extract_ajax(work, body)
+
+            elif limit_type == "limit_mypixiv_360.png":
+                work["_mypixiv"] = True
+                self.log.warning("%s: 'My pixiv' locked", work_id)
 
-        elif url == self.mypixiv_url:
-            work["_mypixiv"] = True
-            self.log.warning("%s: 'My pixiv' locked", work["id"])
+            else:
+                work["_mypixiv"] = True  # stop further processing
+                self.log.error("%s: Unknown 'limit' URL type: %s",
+                               work_id, limit_type)
 
         elif work["type"] != "ugoira":
             return ({"url": url, "_fallback": self._fallback_image(url)},)
@@ -430,7 +446,7 @@ class PixivArtworksExtractor(PixivExtractor):
                 elif ajax_id > work_id:
                     index -= 1
                     self.log.debug("Inserting work %s", ajax_id)
-                    yield self._make_work(ajax_id, self.sanity_url, user)
+                    yield self._make_work(ajax_id, self.limit_url, user)
 
                 else:  # ajax_id < work_id
                     break
@@ -440,7 +456,7 @@ class PixivArtworksExtractor(PixivExtractor):
         while index >= 0:
             ajax_id = ajax_ids[index]
             self.log.debug("Inserting work %s", ajax_id)
-            yield self._make_work(ajax_id, self.sanity_url, user)
+            yield self._make_work(ajax_id, self.limit_url, user)
             index -= 1
 
 
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index 83f3064..863ef3b 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -20,10 +20,6 @@ class PornpicsExtractor(Extractor):
     root = "https://www.pornpics.com"
     request_interval = (0.5, 1.5)
 
-    def __init__(self, match):
-        super().__init__(match)
-        self.item = match.group(1)
-
     def items(self):
         for gallery in self.galleries():
             gallery["_extractor"] = PornpicsGalleryExtractor
@@ -34,9 +30,11 @@ class PornpicsExtractor(Extractor):
             # fetch first 20 galleries from HTML
             # since '"offset": 0' does not return a JSON response
             page = self.request(url).text
-            for path in text.extract_iter(
+            for href in text.extract_iter(
                     page, 'class="rel-link" href="', '"'):
-                yield {"g_url": self.root + path}
+                if href[0] == "/":
+                    href = self.root + href
+                yield {"g_url": href}
             del page
             params = {"offset": 20}
 
@@ -60,12 +58,12 @@ class PornpicsExtractor(Extractor):
 
 class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
     """Extractor for pornpics galleries"""
-    pattern = BASE_PATTERN + r"(/galleries/(?:[^/?#]+-)?(\d+))"
+    pattern = BASE_PATTERN + r"/galleries/((?:[^/?#]+-)?(\d+))"
     example = "https://www.pornpics.com/galleries/TITLE-12345/"
 
     def __init__(self, match):
-        PornpicsExtractor.__init__(self, match)
-        self.gallery_id = match.group(2)
+        url = "{}/galleries/{}/".format(self.root, match.group(1))
+        GalleryExtractor.__init__(self, match, url)
 
     items = GalleryExtractor.items
 
@@ -73,7 +71,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
         extr = text.extract_from(page)
 
         return {
-            "gallery_id": text.parse_int(self.gallery_id),
+            "gallery_id": text.parse_int(self.groups[1]),
             "slug"      : extr("/galleries/", "/").rpartition("-")[0],
             "title"     : text.unescape(extr("<h1>", "<")),
             "channel"   : text.split_html(extr(">Channel:&nbsp;", '</div>')),
@@ -100,7 +98,7 @@ class PornpicsTagExtractor(PornpicsExtractor):
     example = "https://www.pornpics.com/tags/TAGS/"
 
     def galleries(self):
-        url = "{}/tags/{}/".format(self.root, self.item)
+        url = "{}/tags/{}/".format(self.root, self.groups[0])
         return self._pagination(url)
 
 
@@ -113,7 +111,7 @@ class PornpicsSearchExtractor(PornpicsExtractor):
     def galleries(self):
         url = self.root + "/search/srch.php"
         params = {
-            "q"     : self.item.replace("-", " "),
+            "q"     : self.groups[0].replace("-", " "),
             "lang"  : "en",
             "offset": 0,
         }
diff --git a/gallery_dl/extractor/rule34xyz.py b/gallery_dl/extractor/rule34xyz.py
index f1e7518..3b8d344 100644
--- a/gallery_dl/extractor/rule34xyz.py
+++ b/gallery_dl/extractor/rule34xyz.py
@@ -60,18 +60,22 @@ class Rule34xyzExtractor(BooruExtractor):
         post.pop("filesPreview", None)
         post.pop("tagsWithType", None)
         post["date"] = text.parse_datetime(
-            post["created"], "%Y-%m-%dT%H:%M:%S.%f")
+            post["created"][:19], "%Y-%m-%dT%H:%M:%S")
 
     def _tags(self, post, _):
         if post.get("tagsWithType") is None:
             post.update(self._fetch_post(post["id"]))
 
         tags = collections.defaultdict(list)
+        tagslist = []
         for tag in post["tagsWithType"]:
-            tags[tag["type"]].append(tag["value"])
+            value = tag["value"]
+            tagslist.append(value)
+            tags[tag["type"]].append(value)
         types = self.TAG_TYPES
         for type, values in tags.items():
             post["tags_" + types[type]] = values
+        post["tags"] = tagslist
 
     def _fetch_post(self, post_id):
         url = "{}/api/post/{}".format(self.root, post_id)
diff --git a/gallery_dl/extractor/saint.py b/gallery_dl/extractor/saint.py
index 1c62d75..5ec2443 100644
--- a/gallery_dl/extractor/saint.py
+++ b/gallery_dl/extractor/saint.py
@@ -81,6 +81,7 @@ class SaintMediaExtractor(SaintAlbumExtractor):
             else:  # /d/
                 file = {
                     "file"     : text.unescape(extr('<a href="', '"')),
+                    "id"       : album_id,
                     "id_dl"    : album_id,
                     "name"     : album_id,
                     "filename" : album_id,
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 97bad09..d15762d 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -74,10 +74,6 @@ BASE_PATTERN = Shimmie2Extractor.update({
         "pattern": r"(?:sizechange|giantess)booru\.com",
         "cookies": {"agreed": "true"},
     },
-    "tentaclerape": {
-        "root": "https://tentaclerape.net",
-        "pattern": r"tentaclerape\.net",
-    },
     "cavemanon": {
         "root": "https://booru.cavemanon.xyz",
         "pattern": r"booru\.cavemanon\.xyz",
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index b122f26..1713509 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -79,10 +79,6 @@ class SzurubooruExtractor(booru.BooruExtractor):
 
 
 BASE_PATTERN = SzurubooruExtractor.update({
-    "foalcon": {
-        "root": "https://booru.foalcon.com",
-        "pattern": r"booru\.foalcon\.com",
-    },
     "bcbnsfw": {
         "root": "https://booru.bcbnsfw.space",
         "pattern": r"booru\.bcbnsfw\.space",
@@ -104,7 +100,7 @@ class SzurubooruTagExtractor(SzurubooruExtractor):
     directory_fmt = ("{category}", "{search_tags}")
     archive_fmt = "t_{search_tags}_{id}_{version}"
     pattern = BASE_PATTERN + r"/posts(?:/query=([^/?#]*))?"
-    example = "https://booru.foalcon.com/posts/query=TAG"
+    example = "https://booru.bcbnsfw.space/posts/query=TAG"
 
     def __init__(self, match):
         SzurubooruExtractor.__init__(self, match)
@@ -127,7 +123,7 @@ class SzurubooruPostExtractor(SzurubooruExtractor):
     subcategory = "post"
     archive_fmt = "{id}_{version}"
     pattern = BASE_PATTERN + r"/post/(\d+)"
-    example = "https://booru.foalcon.com/post/12345"
+    example = "https://booru.bcbnsfw.space/post/12345"
 
     def posts(self):
         return (self._api_request("/post/" + self.groups[-1]),)
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index 44d87ee..cee0d9d 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -52,16 +52,18 @@ class ToyhouseExtractor(Extractor):
         return {
             "url": extr(needle, '"'),
             "date": text.parse_datetime(extr(
-                'Credits\n</h2>\n<div class="mb-1">', '<'),
+                '</h2>\n            <div class="mb-1">', '<'),
                 "%d %b %Y, %I:%M:%S %p"),
             "artists": [
                 text.remove_html(artist)
                 for artist in extr(
-                    '<div class="artist-credit">', '</div>\n</div>').split(
-                    '<div class="artist-credit">')
+                    '<div class="artist-credit">',
+                    '</div>\n                    </div>').split(
+                    '<div class="ar tist-credit">')
             ],
             "characters": text.split_html(extr(
-                '<div class="image-characters', '</div>\n</div>'))[2:],
+                '<div class="image-characters',
+                '<div class="image-comments">'))[2:],
         }
 
     def _pagination(self, path):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 090b11a..840e846 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -121,14 +121,7 @@ class TwitterExtractor(Extractor):
                 txt = data.get("full_text") or data.get("text") or ""
                 self.log.warning("'%s' (%s)", txt, data["id_str"])
 
-            files = []
-            if "extended_entities" in data:
-                self._extract_media(
-                    data, data["extended_entities"]["media"], files)
-            if "card" in tweet and self.cards:
-                self._extract_card(tweet, files)
-            if self.twitpic:
-                self._extract_twitpic(data, files)
+            files = self._extract_files(data, tweet)
             if not files and not self.textonly:
                 continue
 
@@ -143,6 +136,39 @@ class TwitterExtractor(Extractor):
                     text.nameext_from_url(url, file)
                 yield Message.Url, url, file
 
+    def _extract_files(self, data, tweet):
+        files = []
+
+        if "extended_entities" in data:
+            try:
+                self._extract_media(
+                    data, data["extended_entities"]["media"], files)
+            except Exception as exc:
+                self.log.debug("", exc_info=exc)
+                self.log.warning(
+                    "%s: Error while extracting media files (%s: %s)",
+                    data["id_str"], exc.__class__.__name__, exc)
+
+        if self.cards and "card" in tweet:
+            try:
+                self._extract_card(tweet, files)
+            except Exception as exc:
+                self.log.debug("", exc_info=exc)
+                self.log.warning(
+                    "%s: Error while extracting Card files (%s: %s)",
+                    data["id_str"], exc.__class__.__name__, exc)
+
+        if self.twitpic:
+            try:
+                self._extract_twitpic(data, files)
+            except Exception as exc:
+                self.log.debug("", exc_info=exc)
+                self.log.warning(
+                    "%s: Error while extracting TwitPic files (%s: %s)",
+                    data["id_str"], exc.__class__.__name__, exc)
+
+        return files
+
     def _extract_media(self, tweet, entities, files):
         for media in entities:
 
@@ -1039,7 +1065,7 @@ class TwitterAPI():
         else:
             csrf_token = None
         if not csrf_token:
-            csrf_token = util.generate_token()
+            csrf_token = util.generate_token(80)
             cookies.set("ct0", csrf_token, domain=cookies_domain)
 
         auth_token = cookies.get("auth_token", domain=cookies_domain)
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
index bb80055..ebfeb9d 100644
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -15,12 +15,15 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
     category = "urlgalleries"
     root = "https://urlgalleries.net"
     request_interval = (0.5, 1.5)
-    pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
-    example = "https://BLOG.urlgalleries.net/gallery-12345/TITLE"
+    pattern = (r"(?:https?://)()(?:(\w+)\.)?urlgalleries\.net"
+               r"/(?:b/([^/?#]+)/)?(?:[\w-]+-)?(\d+)")
+    example = "https://urlgalleries.net/b/BLOG/gallery-12345/TITLE"
 
     def items(self):
-        blog, self.gallery_id = self.groups
-        url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format(
+        _, blog_alt, blog, self.gallery_id = self.groups
+        if not blog:
+            blog = blog_alt
+        url = "https://urlgalleries.net/b/{}/porn-gallery-{}/?a=10000".format(
             blog, self.gallery_id)
 
         with self.request(url, allow_redirects=False, fatal=...) as response:
@@ -35,7 +38,7 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
         data = self.metadata(page)
         data["count"] = len(imgs)
 
-        root = "https://{}.urlgalleries.net".format(blog)
+        root = "https://urlgalleries.net/b/" + blog
         yield Message.Directory, data
         for data["num"], img in enumerate(imgs, 1):
             page = self.request(root + img).text
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 922a591..1c0c172 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -70,7 +70,8 @@ class VscoExtractor(Extractor):
 
     def _extract_preload_state(self, url):
         page = self.request(url, notfound=self.subcategory).text
-        return util.json_loads(text.extr(page, "__PRELOADED_STATE__ = ", "<"))
+        return util.json_loads(text.extr(page, "__PRELOADED_STATE__ = ", "<")
+                               .replace('"prevPageToken":undefined,', ''))
 
     def _pagination(self, url, params, token, key, extra=None):
         headers = {
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 70ab259..008ae6e 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -102,8 +102,8 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
         else:
             episode = ""
 
-        if extr('<div class="author_area"', '\n'):
-            username = extr('/creator/', '"')
+        if extr('<span class="author"', '\n'):
+            username = extr('/u/', '"')
             author_name = extr('<span>', '</span>')
         else:
             username = author_name = ""
diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py
index 39f998a..fc1badb 100644
--- a/gallery_dl/extractor/weebcentral.py
+++ b/gallery_dl/extractor/weebcentral.py
@@ -80,12 +80,12 @@ class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
 
         results = []
         while True:
-            src = extr(' src="', '"')
+            src = extr('src="', '"')
             if not src:
                 break
             results.append((src, {
-                "width" : text.parse_int(extr(' width="' , '"')),
-                "height": text.parse_int(extr(' height="', '"')),
+                "width" : text.parse_int(extr('width="' , '"')),
+                "height": text.parse_int(extr('height="', '"')),
             }))
         return results
 
diff --git a/gallery_dl/extractor/xfolio.py b/gallery_dl/extractor/xfolio.py
new file mode 100644
index 0000000..a1a5be3
--- /dev/null
+++ b/gallery_dl/extractor/xfolio.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://xfolio.jp/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?"
+
+
+class XfolioExtractor(Extractor):
+    """Base class for xfolio extractors"""
+    category = "xfolio"
+    root = "https://xfolio.jp"
+    cookies_domain = ".xfolio.jp"
+    directory_fmt = ("{category}", "{creator_slug}", "{work_id}")
+    filename_fmt = "{work_id}_{image_id}.{extension}"
+    archive_fmt = "{work_id}_{image_id}"
+    request_interval = (0.5, 1.5)
+
+    def _init(self):
+        XfolioExtractor._init = Extractor._init
+        if not self.cookies_check(("xfolio_session",)):
+            self.log.error("'xfolio_session' cookie required")
+
+    def items(self):
+        data = {"_extractor": XfolioWorkExtractor}
+        for work in self.works():
+            yield Message.Queue, work, data
+
+    def request(self, url, **kwargs):
+        response = Extractor.request(self, url, **kwargs)
+
+        if "/system/recaptcha" in response.url:
+            raise exception.StopExtraction("Bot check / CAPTCHA page")
+
+        return response
+
+
+class XfolioWorkExtractor(XfolioExtractor):
+    subcategory = "work"
+    pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)"
+    example = "https://xfolio.jp/portfolio/USER/works/12345"
+    ref_fmt = ("{}/fullscale_image?image_id={}&work_id={}")
+    url_fmt = ("{}/user_asset.php?id={}&work_id={}"
+               "&work_image_id={}&type=work_image")
+
+    def items(self):
+        creator, work_id = self.groups
+        url = "{}/portfolio/{}/works/{}".format(self.root, creator, work_id)
+        html = self.request(url).text
+
+        work = self._extract_data(html)
+        files = self._extract_files(html, work)
+        work["count"] = len(files)
+
+        yield Message.Directory, work
+        for work["num"], file in enumerate(files, 1):
+            file.update(work)
+            yield Message.Url, file["url"], file
+
+    def _extract_data(self, html):
+        creator, work_id = self.groups
+        extr = text.extract_from(html)
+        return {
+            "title"          : text.unescape(extr(
+                'property="og:title" content="', '"').rpartition(" - ")[0]),
+            "description"    : text.unescape(extr(
+                'property="og:description" content="', '"')),
+            "creator_id"     : extr(' data-creator-id="', '"'),
+            "creator_userid" : extr(' data-creator-user-id="', '"'),
+            "creator_name"   : extr(' data-creator-name="', '"'),
+            "creator_profile": text.unescape(extr(
+                ' data-creator-profile="', '"')),
+            "series_id"      : extr("/series/", '"'),
+            "creator_slug"   : creator,
+            "work_id"        : work_id,
+        }
+
+    def _extract_files(self, html, work):
+        files = []
+
+        work_id = work["work_id"]
+        for img in text.extract_iter(
+                html, 'class="article__wrap_img', "</div>"):
+            image_id = text.extr(img, "/fullscale_image?image_id=", "&")
+            if not image_id:
+                self.log.warning(
+                    "%s: 'fullscale_image' not available", work_id)
+                continue
+
+            files.append({
+                "image_id" : image_id,
+                "extension": "jpg",
+                "url": self.url_fmt.format(
+                    self.root, image_id, work_id, image_id),
+                "_http_headers": {"Referer": self.ref_fmt.format(
+                    self.root, image_id, work_id)},
+            })
+
+        return files
+
+
+class XfolioUserExtractor(XfolioExtractor):
+    subcategory = "user"
+    pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)"
+    example = "https://xfolio.jp/portfolio/USER"
+
+    def works(self):
+        url = "{}/portfolio/{}/works".format(self.root, self.groups[0])
+
+        while True:
+            html = self.request(url).text
+
+            for item in text.extract_iter(
+                    html, '<div class="postItem', "</div>"):
+                yield text.extr(item, ' href="', '"')
+
+            pager = text.extr(html, ' class="pager__list_next', "</li>")
+            url = text.extr(pager, ' href="', '"')
+            if not url:
+                return
+            url = text.unescape(url)
+
+
+class XfolioSeriesExtractor(XfolioExtractor):
+    subcategory = "series"
+    pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/series/(\d+)"
+    example = "https://xfolio.jp/portfolio/USER/series/12345"
+
+    def works(self):
+        creator, series_id = self.groups
+        url = "{}/portfolio/{}/series/{}".format(self.root, creator, series_id)
+        html = self.request(url).text
+
+        return [
+            text.extr(item, ' href="', '"')
+            for item in text.extract_iter(
+                html, 'class="listWrap--title">', "</a>")
+        ]
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 6dc9362..4d69d3d 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -20,8 +20,8 @@ class XhamsterExtractor(Extractor):
     category = "xhamster"
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
         self.root = "https://" + match.group(1)
+        Extractor.__init__(self, match)
 
 
 class XhamsterGalleryExtractor(XhamsterExtractor):
@@ -34,48 +34,48 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
     pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)"
     example = "https://xhamster.com/photos/gallery/12345"
 
-    def __init__(self, match):
-        XhamsterExtractor.__init__(self, match)
-        self.path = match.group(2)
-        self.data = None
-
     def items(self):
         data = self.metadata()
         yield Message.Directory, data
         for num, image in enumerate(self.images(), 1):
             url = image["imageURL"]
             image.update(data)
+            text.nameext_from_url(url, image)
             image["num"] = num
-            yield Message.Url, url, text.nameext_from_url(url, image)
+            image["extension"] = "webp"
+            del image["modelName"]
+            yield Message.Url, url, image
 
     def metadata(self):
-        self.data = self._data(self.root + self.path)
-        user = self.data["authorModel"]
-        imgs = self.data["photosGalleryModel"]
+        data = self.data = self._extract_data(self.root + self.groups[1])
+
+        gallery = data["galleryPage"]
+        info = gallery["infoProps"]
+        model = gallery["galleryModel"]
+        author = info["authorInfoProps"]
 
         return {
             "user":
             {
-                "id"         : text.parse_int(user["id"]),
-                "url"        : user["pageURL"],
-                "name"       : user["name"],
-                "retired"    : user["retired"],
-                "verified"   : user["verified"],
-                "subscribers": user["subscribers"],
+                "id"         : text.parse_int(model["userId"]),
+                "url"        : author["authorLink"],
+                "name"       : author["authorName"],
+                "verified"   : True if author.get("verified") else False,
+                "subscribers": info["subscribeButtonProps"]["subscribers"],
             },
             "gallery":
             {
-                "id"         : text.parse_int(imgs["id"]),
-                "tags"       : [c["name"] for c in imgs["categories"]],
-                "date"       : text.parse_timestamp(imgs["created"]),
-                "views"      : text.parse_int(imgs["views"]),
-                "likes"      : text.parse_int(imgs["rating"]["likes"]),
-                "dislikes"   : text.parse_int(imgs["rating"]["dislikes"]),
-                "title"      : text.unescape(imgs["title"]),
-                "description": text.unescape(imgs["description"]),
-                "thumbnail"  : imgs["thumbURL"],
+                "id"         : text.parse_int(gallery["id"]),
+                "tags"       : [t["label"] for t in info["categoriesTags"]],
+                "date"       : text.parse_timestamp(model["created"]),
+                "views"      : text.parse_int(model["views"]),
+                "likes"      : text.parse_int(model["rating"]["likes"]),
+                "dislikes"   : text.parse_int(model["rating"]["dislikes"]),
+                "title"      : model["title"],
+                "description": model["description"],
+                "thumbnail"  : model["thumbURL"],
             },
-            "count": text.parse_int(imgs["quantity"]),
+            "count": text.parse_int(gallery["photosCount"]),
         }
 
     def images(self):
@@ -83,17 +83,17 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
         self.data = None
 
         while True:
-            for image in data["photosGalleryModel"]["photos"]:
-                del image["modelName"]
-                yield image
+            yield from data["photosGalleryModel"]["photos"]
 
-            pgntn = data["pagination"]
-            if pgntn["active"] == pgntn["maxPage"]:
+            pagination = data["galleryPage"]["paginationProps"]
+            if pagination["currentPageNumber"] >= pagination["lastPageNumber"]:
                 return
-            url = pgntn["pageLinkTemplate"][:-3] + str(pgntn["next"])
-            data = self._data(url)
+            url = (pagination["pageLinkTemplate"][:-3] +
+                   str(pagination["currentPageNumber"] + 1))
+
+            data = self._extract_data(url)
 
-    def _data(self, url):
+    def _extract_data(self, url):
         page = self.request(url).text
         return util.json_loads(text.extr(
             page, "window.initials=", "</script>").rstrip("\n\r;"))
@@ -105,12 +105,8 @@ class XhamsterUserExtractor(XhamsterExtractor):
     pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
     example = "https://xhamster.com/users/USER/photos"
 
-    def __init__(self, match):
-        XhamsterExtractor.__init__(self, match)
-        self.user = match.group(2)
-
     def items(self):
-        url = "{}/users/{}/photos".format(self.root, self.user)
+        url = "{}/users/{}/photos".format(self.root, self.groups[1])
         data = {"_extractor": XhamsterGalleryExtractor}
 
         while url:
author	Unit 193 <unit193@unit193.net>	2025-01-28 19:12:09 -0500
committer	Unit 193 <unit193@unit193.net>	2025-01-28 19:12:09 -0500
commit	a26df18796ff4e506b16bf32fcec9336233b9e2e (patch)
tree	876512f59831cd670a90a0bc92bc85def6ea3d82 /gallery_dl/extractor
parent	0532a387ef5b7fcb4507a9b094dca37a5f635fe1 (diff)