New upstream version 1.27.2.upstream/1.27.2

author: Unit 193 <unit193@unit193.net> 2024-08-03 20:27:44 -0400
committer: Unit 193 <unit193@unit193.net> 2024-08-03 20:27:44 -0400
commit: 032e5bed275a253e122ed9ac86dac7b8c4204172 (patch)
tree: b4eda52ebfe00c4d22e9d633b1ab2d158a9f0573 /gallery_dl/extractor
parent: 80e39a8fc7de105510cbbdca8507f2a4b8c9e01d (diff)
38 files changed, 1019 insertions, 308 deletions
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index a4b0997..a5e8b27 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -27,7 +27,8 @@ class _8chanExtractor(Extractor):
         Extractor.__init__(self, match)
 
     def _init(self):
-        self.cookies.set("TOS", "1", domain=self.root.rpartition("/")[2])
+        self.cookies.set(
+            "TOS20240718", "1", domain=self.root.rpartition("/")[2])
 
     @memcache()
     def cookies_prepare(self):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6aff1f3..e103cb1 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -22,6 +22,7 @@ modules = [
     "8chan",
     "8muses",
     "adultempire",
+    "agnph",
     "architizer",
     "artstation",
     "aryion",
@@ -33,6 +34,7 @@ modules = [
     "bunkr",
     "catbox",
     "chevereto",
+    "cien",
     "comicvine",
     "cyberdrop",
     "danbooru",
@@ -42,7 +44,6 @@ modules = [
     "e621",
     "erome",
     "exhentai",
-    "fallenangels",
     "fanbox",
     "fanleaks",
     "fantia",
@@ -84,6 +85,7 @@ modules = [
     "keenspot",
     "kemonoparty",
     "khinsider",
+    "koharu",
     "komikcast",
     "lensdump",
     "lexica",
diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py
new file mode 100644
index 0000000..653b73f
--- /dev/null
+++ b/gallery_dl/extractor/agnph.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://agn.ph/"""
+
+from . import booru
+from .. import text
+
+from xml.etree import ElementTree
+import collections
+import re
+
+BASE_PATTERN = r"(?:https?://)?agn\.ph"
+
+
+class AgnphExtractor(booru.BooruExtractor):
+    category = "agnph"
+    root = "https://agn.ph"
+    page_start = 1
+    per_page = 45
+
+    TAG_TYPES = {
+        "a": "artist",
+        "b": "copyright",
+        "c": "character",
+        "d": "species",
+        "m": "general",
+    }
+
+    def _init(self):
+        self.cookies.set("confirmed_age", "true", domain="agn.ph")
+
+    def _prepare(self, post):
+        post["date"] = text.parse_timestamp(post["created_at"])
+        post["status"] = post["status"].strip()
+        post["has_children"] = ("true" in post["has_children"])
+
+    def _xml_to_dict(self, xml):
+        return {element.tag: element.text for element in xml}
+
+    def _pagination(self, url, params):
+        params["api"] = "xml"
+        if "page" in params:
+            params["page"] = \
+                self.page_start + text.parse_int(params["page"]) - 1
+        else:
+            params["page"] = self.page_start
+
+        while True:
+            data = self.request(url, params=params).text
+            root = ElementTree.fromstring(data)
+
+            yield from map(self._xml_to_dict, root)
+
+            attrib = root.attrib
+            if int(attrib["offset"]) + len(root) >= int(attrib["count"]):
+                return
+
+            params["page"] += 1
+
+    def _html(self, post):
+        url = "{}/gallery/post/show/{}/".format(self.root, post["id"])
+        return self.request(url).text
+
+    def _tags(self, post, page):
+        tag_container = text.extr(
+            page, '<ul class="taglist">', '<h3>Statistics</h3>')
+        if not tag_container:
+            return
+
+        tags = collections.defaultdict(list)
+        pattern = re.compile(r'class="(.)typetag">([^<]+)')
+        for tag_type, tag_name in pattern.findall(tag_container):
+            tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
+        for key, value in tags.items():
+            post["tags_" + self.TAG_TYPES[key]] = " ".join(value)
+
+
+class AgnphTagExtractor(AgnphExtractor):
+    subcategory = "tag"
+    directory_fmt = ("{category}", "{search_tags}")
+    archive_fmt = "t_{search_tags}_{id}"
+    pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$"
+    example = "https://agn.ph/gallery/post/?search=TAG"
+
+    def __init__(self, match):
+        AgnphExtractor.__init__(self, match)
+        self.params = text.parse_query(self.groups[0])
+
+    def metadata(self):
+        return {"search_tags": self.params.get("search") or ""}
+
+    def posts(self):
+        url = self.root + "/gallery/post/"
+        return self._pagination(url, self.params.copy())
+
+
+class AgnphPostExtractor(AgnphExtractor):
+    subcategory = "post"
+    archive_fmt = "{id}"
+    pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)"
+    example = "https://agn.ph/gallery/post/show/12345/"
+
+    def posts(self):
+        url = "{}/gallery/post/show/{}/?api=xml".format(
+            self.root, self.groups[0])
+        post = ElementTree.fromstring(self.request(url).text)
+        return (self._xml_to_dict(post),)
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index ec86263..17b780e 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -79,18 +79,20 @@ class AryionExtractor(Extractor):
     def metadata(self):
         """Return general metadata"""
 
-    def _pagination_params(self, url, params=None):
+    def _pagination_params(self, url, params=None, needle=None):
         if params is None:
             params = {"p": 1}
         else:
             params["p"] = text.parse_int(params.get("p"), 1)
 
+        if needle is None:
+            needle = "class='gallery-item' id='"
+
         while True:
             page = self.request(url, params=params).text
 
             cnt = 0
-            for post_id in text.extract_iter(
-                    page, "class='gallery-item' id='", "'"):
+            for post_id in text.extract_iter(page, needle, "'"):
                 cnt += 1
                 yield post_id
 
@@ -200,6 +202,21 @@ class AryionGalleryExtractor(AryionExtractor):
             return util.advance(self._pagination_next(url), self.offset)
 
 
+class AryionFavoriteExtractor(AryionExtractor):
+    """Extractor for a user's favorites gallery"""
+    subcategory = "favorite"
+    directory_fmt = ("{category}", "{user!l}", "favorites")
+    archive_fmt = "f_{user}_{id}"
+    categorytransfer = True
+    pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
+    example = "https://aryion.com/g4/favorites/USER"
+
+    def posts(self):
+        url = "{}/g4/favorites/{}".format(self.root, self.user)
+        return self._pagination_params(
+            url, None, "class='gallery-item favorite' id='")
+
+
 class AryionTagExtractor(AryionExtractor):
     """Extractor for tag searches on eka's portal"""
     subcategory = "tag"
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index ad0caf9..f24059f 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -152,8 +152,16 @@ class BehanceGalleryExtractor(BehanceExtractor):
                 continue
 
             if mtype == "image":
-                url = module["imageSizes"]["size_original"]["url"]
-                append((url, module))
+                sizes = {
+                    size["url"].rsplit("/", 2)[1]: size
+                    for size in module["imageSizes"]["allAvailable"]
+                }
+                size = (sizes.get("source") or
+                        sizes.get("max_3840") or
+                        sizes.get("fs") or
+                        sizes.get("hd") or
+                        sizes.get("disp"))
+                append((size["url"], module))
 
             elif mtype == "video":
                 try:
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index cbd0e07..7e26f38 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -29,16 +29,21 @@ class BooruExtractor(BaseExtractor):
 
         url_key = self.config("url")
         if url_key:
-            self._file_url = operator.itemgetter(url_key)
+            if isinstance(url_key, (list, tuple)):
+                self._file_url = self._file_url_list
+                self._file_url_keys = url_key
+            else:
+                self._file_url = operator.itemgetter(url_key)
 
         for post in self.posts():
             try:
                 url = self._file_url(post)
                 if url[0] == "/":
                     url = self.root + url
-            except (KeyError, TypeError):
-                self.log.debug("Unable to fetch download URL for post %s "
-                               "(md5: %s)", post.get("id"), post.get("md5"))
+            except Exception as exc:
+                self.log.debug("%s: %s", exc.__class__.__name__, exc)
+                self.log.warning("Unable to fetch download URL for post %s "
+                                 "(md5: %s)", post.get("id"), post.get("md5"))
                 continue
 
             if fetch_html:
@@ -73,6 +78,11 @@ class BooruExtractor(BaseExtractor):
 
     _file_url = operator.itemgetter("file_url")
 
+    def _file_url_list(self, post):
+        urls = (post[key] for key in self._file_url_keys if post.get(key))
+        post["_fallback"] = it = iter(urls)
+        return next(it)
+
     def _prepare(self, post):
         """Prepare a 'post's metadata"""
 
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index a093347..77f0de6 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -13,7 +13,7 @@ from .. import text
 
 BASE_PATTERN = (
     r"(?:https?://)?(?:app\.)?(bunkr+"
-    r"\.(?:s[kiu]|ru|la|is|to|ac|black|cat|media|red|site|ws))"
+    r"\.(?:s[kiu]|fi|ru|la|is|to|ac|black|cat|media|red|site|ws))"
 )
 
 LEGACY_DOMAINS = {
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
new file mode 100644
index 0000000..bae86d0
--- /dev/null
+++ b/gallery_dl/extractor/cien.py
@@ -0,0 +1,199 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://ci-en.net/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
+
+
+class CienExtractor(Extractor):
+    category = "cien"
+    root = "https://ci-en.net"
+    request_interval = (1.0, 2.0)
+
+    def __init__(self, match):
+        self.root = text.root_from_url(match.group(0))
+        Extractor.__init__(self, match)
+
+    def _init(self):
+        self.cookies.set("accepted_rating", "r18g", domain="ci-en.dlsite.com")
+
+    def _pagination_articles(self, url, params):
+        data = {"_extractor": CienArticleExtractor}
+        params["page"] = text.parse_int(params.get("page"), 1)
+
+        while True:
+            page = self.request(url, params=params).text
+
+            for card in text.extract_iter(
+                    page, ' class="c-cardCase-item', '</div>'):
+                article_url = text.extr(card, ' href="', '"')
+                yield Message.Queue, article_url, data
+
+            if ' rel="next"' not in page:
+                return
+            params["page"] += 1
+
+
+class CienArticleExtractor(CienExtractor):
+    subcategory = "article"
+    filename_fmt = "{num:>02} {filename}.{extension}"
+    directory_fmt = ("{category}", "{author[name]}", "{post_id} {name}")
+    archive_fmt = "{post_id}_{num}"
+    pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)"
+    example = "https://ci-en.net/creator/123/article/12345"
+
+    def items(self):
+        url = "{}/creator/{}/article/{}".format(
+            self.root, self.groups[0], self.groups[1])
+        page = self.request(url, notfound="article").text
+
+        post = util.json_loads(text.extr(
+            page, '<script type="application/ld+json">', '</script>'))[0]
+
+        files = self._extract_files(post.get("articleBody") or page)
+
+        post["post_url"] = url
+        post["post_id"] = text.parse_int(self.groups[1])
+        post["count"] = len(files)
+        post["date"] = text.parse_datetime(post["datePublished"])
+
+        try:
+            del post["publisher"]
+            del post["sameAs"]
+        except Exception:
+            pass
+
+        yield Message.Directory, post
+        for post["num"], file in enumerate(files, 1):
+            post.update(file)
+            if "extension" not in file:
+                text.nameext_from_url(file["url"], post)
+            yield Message.Url, file["url"], post
+
+    def _extract_files(self, page):
+        files = []
+
+        filetypes = self.config("files")
+        if filetypes is None:
+            self._extract_files_image(page, files)
+            self._extract_files_video(page, files)
+            self._extract_files_download(page, files)
+            self._extract_files_gallery(page, files)
+        else:
+            generators = {
+                "image"   : self._extract_files_image,
+                "video"   : self._extract_files_video,
+                "download": self._extract_files_download,
+                "gallery" : self._extract_files_gallery,
+                "gallerie": self._extract_files_gallery,
+            }
+            if isinstance(filetypes, str):
+                filetypes = filetypes.split(",")
+            for ft in filetypes:
+                generators[ft.rstrip("s")](page, files)
+
+        return files
+
+    def _extract_files_image(self, page, files):
+        for image in text.extract_iter(
+                page, 'class="file-player-image"', "</figure>"):
+            size = text.extr(image, ' data-size="', '"')
+            w, _, h = size.partition("x")
+
+            files.append({
+                "url"   : text.extr(image, ' data-raw="', '"'),
+                "width" : text.parse_int(w),
+                "height": text.parse_int(h),
+                "type"  : "image",
+            })
+
+    def _extract_files_video(self, page, files):
+        for video in text.extract_iter(
+                page, "<vue-file-player", "</vue-file-player>"):
+            path = text.extr(video, ' base-path="', '"')
+            name = text.extr(video, ' file-name="', '"')
+            auth = text.extr(video, ' auth-key="', '"')
+
+            file = text.nameext_from_url(name)
+            file["url"] = "{}video-web.mp4?{}".format(path, auth)
+            file["type"] = "video"
+            files.append(file)
+
+    def _extract_files_download(self, page, files):
+        for download in text.extract_iter(
+                page, 'class="downloadBlock', "</div>"):
+            name = text.extr(download, "<p>", "<")
+
+            file = text.nameext_from_url(name.rpartition(" ")[0])
+            file["url"] = text.extr(download, ' href="', '"')
+            file["type"] = "download"
+            files.append(file)
+
+    def _extract_files_gallery(self, page, files):
+        for gallery in text.extract_iter(
+                page, "<vue-image-gallery", "</vue-image-gallery>"):
+
+            url = self.root + "/api/creator/gallery/images"
+            params = {
+                "hash"      : text.extr(gallery, ' hash="', '"'),
+                "gallery_id": text.extr(gallery, ' gallery-id="', '"'),
+                "time"      : text.extr(gallery, ' time="', '"'),
+            }
+            data = self.request(url, params=params).json()
+            url = self.root + "/api/creator/gallery/imagePath"
+
+            for params["page"], params["file_id"] in enumerate(
+                    data["imgList"]):
+                path = self.request(url, params=params).json()["path"]
+
+                file = params.copy()
+                file["url"] = path
+                files.append(file)
+
+
+class CienCreatorExtractor(CienExtractor):
+    subcategory = "creator"
+    pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
+    example = "https://ci-en.net/creator/123"
+
+    def items(self):
+        url = "{}/creator/{}/article".format(self.root, self.groups[0])
+        params = text.parse_query(self.groups[1])
+        params["mode"] = "list"
+        return self._pagination_articles(url, params)
+
+
+class CienRecentExtractor(CienExtractor):
+    subcategory = "recent"
+    pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?"
+    example = "https://ci-en.net/mypage/recent"
+
+    def items(self):
+        url = self.root + "/mypage/recent"
+        params = text.parse_query(self.groups[0])
+        return self._pagination_articles(url, params)
+
+
+class CienFollowingExtractor(CienExtractor):
+    subcategory = "following"
+    pattern = BASE_PATTERN + r"/mypage/subscription(/following)?"
+    example = "https://ci-en.net/mypage/subscription"
+
+    def items(self):
+        url = self.root + "/mypage/subscription" + (self.groups[0] or "")
+        page = self.request(url).text
+        data = {"_extractor": CienCreatorExtractor}
+
+        for subscription in text.extract_iter(
+                page, 'class="c-grid-subscriptionInfo', '</figure>'):
+            url = text.extr(subscription, ' href="', '"')
+            yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d7a41bc..df70571 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -378,7 +378,7 @@ class Extractor():
             useragent = self.config("user-agent")
             if useragent is None:
                 useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
-                             "rv:109.0) Gecko/20100101 Firefox/115.0")
+                             "rv:128.0) Gecko/20100101 Firefox/128.0")
             elif useragent == "browser":
                 useragent = _browser_useragent()
             headers["User-Agent"] = useragent
@@ -390,6 +390,8 @@ class Extractor():
             headers["Accept-Encoding"] = "gzip, deflate, br"
         else:
             headers["Accept-Encoding"] = "gzip, deflate"
+        if ZSTD:
+            headers["Accept-Encoding"] += ", zstd"
 
         referer = self.config("referer", self.referer)
         if referer:
@@ -789,10 +791,11 @@ class BaseExtractor(Extractor):
     instances = ()
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
         if not self.category:
+            self.groups = match.groups()
+            self.match = match
             self._init_category()
-            self._cfgpath = ("extractor", self.category, self.subcategory)
+        Extractor.__init__(self, match)
 
     def _init_category(self):
         for index, group in enumerate(self.groups):
@@ -911,13 +914,12 @@ _browser_cookies = {}
 HTTP_HEADERS = {
     "firefox": (
         ("User-Agent", "Mozilla/5.0 ({}; "
-                       "rv:109.0) Gecko/20100101 Firefox/115.0"),
+                       "rv:128.0) Gecko/20100101 Firefox/128.0"),
         ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
-                   "image/avif,image/webp,*/*;q=0.8"),
+                   "image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"),
         ("Accept-Language", "en-US,en;q=0.5"),
         ("Accept-Encoding", None),
         ("Referer", None),
-        ("DNT", "1"),
         ("Connection", "keep-alive"),
         ("Upgrade-Insecure-Requests", "1"),
         ("Cookie", None),
@@ -991,6 +993,12 @@ try:
 except AttributeError:
     BROTLI = False
 
+# detect zstandard support
+try:
+    ZSTD = urllib3.response.HAS_ZSTD
+except AttributeError:
+    ZSTD = False
+
 # set (urllib3) warnings filter
 action = config.get((), "warnings", "default")
 if action:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 2199cc8..a70710c 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -846,55 +846,6 @@ class DeviantartStatusExtractor(DeviantartExtractor):
             )
 
 
-class DeviantartPopularExtractor(DeviantartExtractor):
-    """Extractor for popular deviations"""
-    subcategory = "popular"
-    directory_fmt = ("{category}", "Popular",
-                     "{popular[range]}", "{popular[search]}")
-    archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
-    pattern = (r"(?:https?://)?www\.deviantart\.com/(?:"
-               r"(?:deviations/?)?\?order=(popular-[^/?#]+)"
-               r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
-               r")/?(?:\?([^#]*))?")
-    example = "https://www.deviantart.com/popular-24-hours/"
-
-    def __init__(self, match):
-        DeviantartExtractor.__init__(self, match)
-        self.user = ""
-
-        trange1, path, trange2, query = match.groups()
-        query = text.parse_query(query)
-        self.search_term = query.get("q")
-
-        trange = trange1 or trange2 or query.get("order", "")
-        if trange.startswith("popular-"):
-            trange = trange[8:]
-        self.time_range = {
-            "newest"      : "now",
-            "most-recent" : "now",
-            "this-week"   : "1week",
-            "this-month"  : "1month",
-            "this-century": "alltime",
-            "all-time"    : "alltime",
-        }.get(trange, "alltime")
-
-        self.popular = {
-            "search": self.search_term or "",
-            "range" : trange or "all-time",
-            "path"  : path.strip("/") if path else "",
-        }
-
-    def deviations(self):
-        if self.time_range == "now":
-            return self.api.browse_newest(self.search_term, self.offset)
-        return self.api.browse_popular(
-            self.search_term, self.time_range, self.offset)
-
-    def prepare(self, deviation):
-        DeviantartExtractor.prepare(self, deviation)
-        deviation["popular"] = self.popular
-
-
 class DeviantartTagExtractor(DeviantartExtractor):
     """Extractor for deviations from tag searches"""
     subcategory = "tag"
@@ -1077,14 +1028,14 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
 class DeviantartFollowingExtractor(DeviantartExtractor):
     """Extractor for user's watched users"""
     subcategory = "following"
-    pattern = BASE_PATTERN + "/about#watching$"
+    pattern = BASE_PATTERN + "/(?:about#)?watching"
     example = "https://www.deviantart.com/USER/about#watching"
 
     def items(self):
-        eclipse_api = DeviantartEclipseAPI(self)
+        api = DeviantartOAuthAPI(self)
 
-        for user in eclipse_api.user_watching(self.user, self.offset):
-            url = "{}/{}".format(self.root, user["username"])
+        for user in api.user_friends(self.user):
+            url = "{}/{}".format(self.root, user["user"]["username"])
             user["_extractor"] = DeviantartUserExtractor
             yield Message.Queue, url, user
 
@@ -1095,7 +1046,7 @@ class DeviantartFollowingExtractor(DeviantartExtractor):
 class DeviantartOAuthAPI():
     """Interface for the DeviantArt OAuth API
 
-    Ref: https://www.deviantart.com/developers/http/v1/20160316
+    https://www.deviantart.com/developers/http/v1/20160316
     """
     CLIENT_ID = "5388"
     CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
@@ -1188,29 +1139,6 @@ class DeviantartOAuthAPI():
                   "mature_content": self.mature}
         return self._pagination(endpoint, params, public=False, unpack=True)
 
-    def browse_newest(self, query=None, offset=0):
-        """Browse newest deviations"""
-        endpoint = "/browse/newest"
-        params = {
-            "q"             : query,
-            "limit"         : 120,
-            "offset"        : offset,
-            "mature_content": self.mature,
-        }
-        return self._pagination(endpoint, params)
-
-    def browse_popular(self, query=None, timerange=None, offset=0):
-        """Yield popular deviations"""
-        endpoint = "/browse/popular"
-        params = {
-            "q"             : query,
-            "limit"         : 120,
-            "timerange"     : timerange,
-            "offset"        : offset,
-            "mature_content": self.mature,
-        }
-        return self._pagination(endpoint, params)
-
     def browse_tags(self, tag, offset=0):
         """ Browse a tag """
         endpoint = "/browse/tags"
@@ -1223,11 +1151,12 @@ class DeviantartOAuthAPI():
         return self._pagination(endpoint, params)
 
     def browse_user_journals(self, username, offset=0):
-        """Yield all journal entries of a specific user"""
-        endpoint = "/browse/user/journals"
-        params = {"username": username, "offset": offset, "limit": 50,
-                  "mature_content": self.mature, "featured": "false"}
-        return self._pagination(endpoint, params)
+        journals = filter(
+            lambda post: "/journal/" in post["url"],
+            self.user_profile_posts(username))
+        if offset:
+            journals = util.advance(journals, offset)
+        return journals
 
     def collections(self, username, folder_id, offset=0):
         """Yield all Deviation-objects contained in a collection folder"""
@@ -1339,16 +1268,10 @@ class DeviantartOAuthAPI():
                   "mature_content": self.mature}
         return self._pagination_list(endpoint, params)
 
-    @memcache(keyarg=1)
-    def user_profile(self, username):
-        """Get user profile information"""
-        endpoint = "/user/profile/" + username
-        return self._call(endpoint, fatal=False)
-
-    def user_statuses(self, username, offset=0):
-        """Yield status updates of a specific user"""
-        endpoint = "/user/statuses/"
-        params = {"username": username, "offset": offset, "limit": 50}
+    def user_friends(self, username, offset=0):
+        """Get the users list of friends"""
+        endpoint = "/user/friends/" + username
+        params = {"limit": 50, "offset": offset, "mature_content": self.mature}
         return self._pagination(endpoint, params)
 
     def user_friends_watch(self, username):
@@ -1376,6 +1299,27 @@ class DeviantartOAuthAPI():
             endpoint, method="POST", public=False, fatal=False,
         ).get("success")
 
+    @memcache(keyarg=1)
+    def user_profile(self, username):
+        """Get user profile information"""
+        endpoint = "/user/profile/" + username
+        return self._call(endpoint, fatal=False)
+
+    def user_profile_posts(self, username):
+        endpoint = "/user/profile/posts"
+        params = {"username": username, "limit": 50,
+                  "mature_content": self.mature}
+        return self._pagination(endpoint, params)
+
+    def user_statuses(self, username, offset=0):
+        """Yield status updates of a specific user"""
+        statuses = filter(
+            lambda post: "/status-update/" in post["url"],
+            self.user_profile_posts(username))
+        if offset:
+            statuses = util.advance(statuses, offset)
+        return statuses
+
     def authenticate(self, refresh_token_key):
         """Authenticate the application by requesting an access token"""
         self.headers["Authorization"] = \
@@ -1464,7 +1408,7 @@ class DeviantartOAuthAPI():
                     self.log.error(msg)
                 return data
 
-    def _switch_tokens(self, results, params):
+    def _should_switch_tokens(self, results, params):
         if len(results) < params["limit"]:
             return True
 
@@ -1496,7 +1440,7 @@ class DeviantartOAuthAPI():
                 results = [item["journal"] for item in results
                            if "journal" in item]
             if extend:
-                if public and self._switch_tokens(results, params):
+                if public and self._should_switch_tokens(results, params):
                     if self.refresh_token_key:
                         self.log.debug("Switching to private access token")
                         public = False
@@ -1540,6 +1484,11 @@ class DeviantartOAuthAPI():
                     return
                 params["offset"] = int(params["offset"]) + len(results)
 
+    def _pagination_list(self, endpoint, params, key="results"):
+        result = []
+        result.extend(self._pagination(endpoint, params, False, key=key))
+        return result
+
     @staticmethod
     def _shared_content(results):
         """Return an iterable of shared deviations in 'results'"""
@@ -1548,11 +1497,6 @@ class DeviantartOAuthAPI():
                 if "deviation" in item:
                     yield item["deviation"]
 
-    def _pagination_list(self, endpoint, params, key="results"):
-        result = []
-        result.extend(self._pagination(endpoint, params, False, key=key))
-        return result
-
     def _metadata(self, deviations):
         """Add extended metadata to each deviation object"""
         if len(deviations) <= self.limit:
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 26f2184..2f0230a 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -18,7 +18,8 @@ class DirectlinkExtractor(Extractor):
     filename_fmt = "{domain}/{path}/{filename}.{extension}"
     archive_fmt = filename_fmt
     pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
-               r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
+               r"(?:jpe?g|jpe|png|gif|bmp|svg|web[mp]|avif|heic|psd"
+               r"|mp4|m4v|mov|mkv|og[gmv]|wav|mp3|opus|zip|rar|7z|pdf|swf))"
                r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
     example = "https://en.wikipedia.org/static/images/project-logos/enwiki.png"
 
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 733d0d8..583869f 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -66,6 +66,8 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
                          text.extr(group, ' alt="', '"')),
             "date"    : text.parse_datetime(extr(
                 '"icon-calendar"></i> ', '<'), "%b %d, %Y"),
+            "tags"    : text.split_html(extr(
+                "class='tags'>", "<div id='chapter-actions'")),
             "lang"    : "en",
             "language": "English",
         }
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 8c9da2f..e6d136f 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -46,18 +46,24 @@ class EromeExtractor(Extractor):
                 page, 'href="https://www.erome.com/', '"', pos)
 
             urls = []
+            date = None
             groups = page.split('<div class="media-group"')
             for group in util.advance(groups, 1):
                 url = (text.extr(group, '<source src="', '"') or
                        text.extr(group, 'data-src="', '"'))
                 if url:
                     urls.append(url)
+                if not date:
+                    ts = text.extr(group, '?v=', '"')
+                    if len(ts) > 1:
+                        date = text.parse_timestamp(ts)
 
             data = {
                 "album_id"     : album_id,
                 "title"        : text.unescape(title),
                 "user"         : text.unquote(user),
                 "count"        : len(urls),
+                "date"         : date,
                 "_http_headers": {"Referer": url},
             }
 
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 1805403..1b4f995 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -394,6 +394,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
                 self.original = False
                 return self.data["_url_1280"]
 
+            if " temporarily banned " in page:
+                raise exception.AuthorizationError("Temporarily Banned")
+
             self._report_limits()
         return True
 
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
deleted file mode 100644
index 650a707..0000000
--- a/gallery_dl/extractor/fallenangels.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2017-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://www.fascans.com/"""
-
-from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
-
-
-class FallenangelsChapterExtractor(ChapterExtractor):
-    """Extractor for manga chapters from fascans.com"""
-    category = "fallenangels"
-    pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
-               r"/manga/([^/?#]+)/([^/?#]+)")
-    example = "https://manga.fascans.com/manga/NAME/CHAPTER/"
-
-    def __init__(self, match):
-        self.version, self.manga, self.chapter = match.groups()
-        url = "https://{}.fascans.com/manga/{}/{}/1".format(
-            self.version, self.manga, self.chapter)
-        ChapterExtractor.__init__(self, match, url)
-
-    def metadata(self, page):
-        extr = text.extract_from(page)
-        lang = "vi" if self.version == "truyen" else "en"
-        chapter, sep, minor = self.chapter.partition(".")
-        return {
-            "manga"   : extr('name="description" content="', ' Chapter '),
-            "title"   : extr(':  ', ' - Page 1'),
-            "chapter" : chapter,
-            "chapter_minor": sep + minor,
-            "lang"    : lang,
-            "language": util.code_to_language(lang),
-        }
-
-    @staticmethod
-    def images(page):
-        return [
-            (img["page_image"], None)
-            for img in util.json_loads(
-                text.extr(page, "var pages = ", ";")
-            )
-        ]
-
-
-class FallenangelsMangaExtractor(MangaExtractor):
-    """Extractor for manga from fascans.com"""
-    chapterclass = FallenangelsChapterExtractor
-    category = "fallenangels"
-    pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
-    example = "https://manga.fascans.com/manga/NAME"
-
-    def __init__(self, match):
-        url = "https://" + match.group(1)
-        self.lang = "vi" if match.group(2) == "truyen" else "en"
-        MangaExtractor.__init__(self, match, url)
-
-    def chapters(self, page):
-        extr = text.extract_from(page)
-        results = []
-        language = util.code_to_language(self.lang)
-        while extr('<li style="', '"'):
-            vol = extr('class="volume-', '"')
-            url = extr('href="', '"')
-            cha = extr('>', '<')
-            title = extr('<em>', '</em>')
-
-            manga, _, chapter = cha.rpartition(" ")
-            chapter, dot, minor = chapter.partition(".")
-            results.append((url, {
-                "manga"   : manga,
-                "title"   : text.unescape(title),
-                "volume"  : text.parse_int(vol),
-                "chapter" : text.parse_int(chapter),
-                "chapter_minor": dot + minor,
-                "lang"    : self.lang,
-                "language": language,
-            }))
-        return results
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 6040187..f48a984 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -113,6 +113,12 @@ class FuraffinityExtractor(Extractor):
             data["gender"] = rh(extr('>Gender</strong>', '</div>'))
             data["width"] = pi(extr("<span>", "x"))
             data["height"] = pi(extr("", "p"))
+            data["folders"] = folders = []
+            for folder in extr(
+                    "<h3>Listed in Folders</h3>", "</section>").split("</a>"):
+                folder = rh(folder)
+                if folder:
+                    folders.append(folder)
         else:
             # old site layout
             data["title"] = text.unescape(extr("<h2>", "</h2>"))
@@ -132,11 +138,14 @@ class FuraffinityExtractor(Extractor):
             data["_description"] = extr(
                 '<td valign="top" align="left" width="70%" class="alt1" '
                 'style="padding:8px">', '                               </td>')
+            data["folders"] = ()  # folders not present in old layout
 
         data["artist_url"] = data["artist"].replace("_", "").lower()
         data["user"] = self.user or data["artist_url"]
         data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
         data["description"] = self._process_description(data["_description"])
+        data["thumbnail"] = "https://t.furaffinity.net/{}@600-{}.jpg".format(
+            post_id, path.rsplit("/", 2)[1])
 
         return data
 
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 8d8b8ad..fbbd26c 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -36,7 +36,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
         params["pid"] = self.page_start
         params["limit"] = self.per_page
 
-        post = None
+        post = total = None
+        count = 0
+
         while True:
             try:
                 root = self._api_request(params)
@@ -50,12 +52,29 @@ class GelbooruV02Extractor(booru.BooruExtractor):
                 params["pid"] = 0
                 continue
 
+            if total is None:
+                try:
+                    total = int(root.attrib["count"])
+                    self.log.debug("%s posts in total", total)
+                except Exception as exc:
+                    total = 0
+                    self.log.debug(
+                        "Failed to get total number of posts (%s: %s)",
+                        exc.__class__.__name__, exc)
+
             post = None
             for post in root:
                 yield post.attrib
 
-            if len(root) < self.per_page:
-                return
+            num = len(root)
+            count += num
+            if num < self.per_page:
+                if not total or count >= total:
+                    return
+                if not num:
+                    self.log.debug("Empty response - Retrying")
+                    continue
+
             params["pid"] += 1
 
     def _pagination_html(self, params):
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index 97b7844..286ee38 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -70,10 +70,13 @@ class HentainexusGalleryExtractor(GalleryExtractor):
             for img in imgs:
                 img["_http_headers"] = headers
 
-        return [
-            (img["image"], img)
-            for img in imgs
-        ]
+        results = []
+        for img in imgs:
+            try:
+                results.append((img["image"], img))
+            except KeyError:
+                pass
+        return results
 
     @staticmethod
     def _decode(data):
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index a2b51be..34fbabd 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -23,6 +23,12 @@ class HotleakExtractor(Extractor):
 
     def items(self):
         for post in self.posts():
+            if self.type == "photo":
+                post["url"] = (
+                    post["url"]
+                    .replace("/storage/storage/", "/storage/")
+                    .replace("_thumb.", ".")
+                )
             post["_http_expected_status"] = (404,)
             yield Message.Directory, post
             yield Message.Url, post["url"], post
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 85446c0..345f51d 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -19,7 +19,7 @@ class ImagefapExtractor(Extractor):
     category = "imagefap"
     root = "https://www.imagefap.com"
     directory_fmt = ("{category}", "{gallery_id} {title}")
-    filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"
+    filename_fmt = "{category}_{gallery_id}_{num:04}_{filename}.{extension}"
     archive_fmt = "{gallery_id}_{image_id}"
     request_interval = (2.0, 4.0)
 
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 2ae8cbe..f3098f1 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -246,14 +246,12 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
         data = {"_extractor": InkbunnyUserExtractor}
 
         while True:
-            cnt = 0
             for user in text.extract_iter(
                     page, '<a class="widget_userNameSmall" href="', '"',
                     page.index('id="changethumboriginal_form"')):
-                cnt += 1
                 yield Message.Queue, self.root + user, data
 
-            if cnt < 20:
+            if "<a title='next page' " not in page:
                 return
             params["page"] += 1
             page = self.request(url, params=params).text
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index f7a5cc7..dbe2df3 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -596,6 +596,22 @@ class InstagramTagExtractor(InstagramExtractor):
         return self.api.tags_media(self.item)
 
 
+class InstagramInfoExtractor(InstagramExtractor):
+    """Extractor for an Instagram user's profile data"""
+    subcategory = "info"
+    pattern = USER_PATTERN + r"/info"
+    example = "https://www.instagram.com/USER/info/"
+
+    def items(self):
+        screen_name = self.item
+        if screen_name.startswith("id:"):
+            user = self.api.user_by_id(screen_name[3:])
+        else:
+            user = self.api.user_by_name(screen_name)
+
+        return iter(((Message.Directory, user),))
+
+
 class InstagramAvatarExtractor(InstagramExtractor):
     """Extractor for an Instagram user's avatar"""
     subcategory = "avatar"
@@ -975,9 +991,9 @@ class InstagramGraphqlAPI():
             if not info["has_next_page"]:
                 return extr._update_cursor(None)
             elif not data["edges"]:
-                s = "" if self.item.endswith("s") else "s"
+                s = "" if self.extractor.item.endswith("s") else "s"
                 raise exception.StopExtraction(
-                    "%s'%s posts are private", self.item, s)
+                    "%s'%s posts are private", self.extractor.item, s)
 
             variables["after"] = extr._update_cursor(info["end_cursor"])
 
diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py
new file mode 100644
index 0000000..979b1a2
--- /dev/null
+++ b/gallery_dl/extractor/koharu.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://koharu.to/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to"
+
+
+class KoharuExtractor(Extractor):
+    """Base class for koharu extractors"""
+    category = "koharu"
+    root = "https://koharu.to"
+    root_api = "https://api.koharu.to"
+    request_interval = (0.5, 1.5)
+
+    def _init(self):
+        self.headers = {
+            "Accept" : "*/*",
+            "Referer": self.root + "/",
+            "Origin" : self.root,
+        }
+
+    def _pagination(self, endpoint, params):
+        url_api = self.root_api + endpoint
+
+        while True:
+            data = self.request(
+                url_api, params=params, headers=self.headers).json()
+
+            try:
+                entries = data["entries"]
+            except KeyError:
+                return
+
+            for entry in entries:
+                url = "{}/g/{}/{}".format(
+                    self.root, entry["id"], entry["public_key"])
+                entry["_extractor"] = KoharuGalleryExtractor
+                yield Message.Queue, url, entry
+
+            try:
+                if data["limit"] * data["page"] >= data["total"]:
+                    return
+            except Exception:
+                pass
+            params["page"] += 1
+
+
+class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
+    """Extractor for koharu galleries"""
+    filename_fmt = "{num:>03}.{extension}"
+    directory_fmt = ("{category}", "{id} {title}")
+    archive_fmt = "{id}_{num}"
+    request_interval = 0.0
+    pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
+    example = "https://koharu.to/g/12345/67890abcde/"
+
+    TAG_TYPES = {
+        0 : "general",
+        1 : "artist",
+        2 : "circle",
+        3 : "parody",
+        4 : "magazine",
+        5 : "character",
+        6 : "",
+        7 : "uploader",
+        8 : "male",
+        9 : "female",
+        10: "mixed",
+        11: "language",
+        12: "other",
+    }
+
+    def __init__(self, match):
+        GalleryExtractor.__init__(self, match)
+        self.gallery_url = None
+
+    def _init(self):
+        self.headers = {
+            "Accept" : "*/*",
+            "Referer": self.root + "/",
+            "Origin" : self.root,
+        }
+
+        self.fmt = self.config("format")
+        self.cbz = self.config("cbz", True)
+
+        if self.cbz:
+            self.filename_fmt = "{id} {title}.{extension}"
+            self.directory_fmt = ("{category}",)
+
+    def metadata(self, _):
+        url = "{}/books/detail/{}/{}".format(
+            self.root_api, self.groups[0], self.groups[1])
+        self.data = data = self.request(url, headers=self.headers).json()
+
+        tags = []
+        for tag in data["tags"]:
+            name = tag["name"]
+            namespace = tag.get("namespace", 0)
+            tags.append(self.TAG_TYPES[namespace] + ":" + name)
+        data["tags"] = tags
+        data["date"] = text.parse_timestamp(data["created_at"] // 1000)
+
+        try:
+            if self.cbz:
+                data["count"] = len(data["thumbnails"]["entries"])
+            del data["thumbnails"]
+            del data["rels"]
+        except Exception:
+            pass
+
+        return data
+
+    def images(self, _):
+        data = self.data
+        fmt = self._select_format(data["data"])
+
+        url = "{}/books/data/{}/{}/{}/{}".format(
+            self.root_api,
+            data["id"], data["public_key"],
+            fmt["id"], fmt["public_key"],
+        )
+        params = {
+            "v": data["updated_at"],
+            "w": fmt["w"],
+        }
+
+        if self.cbz:
+            params["action"] = "dl"
+            base = self.request(
+                url, method="POST", params=params, headers=self.headers,
+            ).json()["base"]
+            url = "{}?v={}&w={}".format(base, data["updated_at"], fmt["w"])
+            info = text.nameext_from_url(base)
+            if not info["extension"]:
+                info["extension"] = "cbz"
+            return ((url, info),)
+
+        data = self.request(url, params=params, headers=self.headers).json()
+        base = data["base"]
+
+        results = []
+        for entry in data["entries"]:
+            dimensions = entry["dimensions"]
+            info = {
+                "w": dimensions[0],
+                "h": dimensions[1],
+                "_http_headers": self.headers,
+            }
+            results.append((base + entry["path"], info))
+        return results
+
+    def _select_format(self, formats):
+        if not self.fmt or self.fmt == "original":
+            fmtid = "0"
+        else:
+            fmtid = str(self.fmt)
+
+        try:
+            fmt = formats[fmtid]
+        except KeyError:
+            raise exception.NotFoundError("format")
+
+        fmt["w"] = fmtid
+        return fmt
+
+
+class KoharuSearchExtractor(KoharuExtractor):
+    """Extractor for koharu search results"""
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"/\?([^#]*)"
+    example = "https://koharu.to/?s=QUERY"
+
+    def items(self):
+        params = text.parse_query(self.groups[0])
+        params["page"] = text.parse_int(params.get("page"), 1)
+        return self._pagination("/books", params)
+
+
+class KoharuFavoriteExtractor(KoharuExtractor):
+    """Extractor for koharu favorites"""
+    subcategory = "favorite"
+    pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+    example = "https://koharu.to/favorites"
+
+    def items(self):
+        self.login()
+
+        params = text.parse_query(self.groups[0])
+        params["page"] = text.parse_int(params.get("page"), 1)
+        return self._pagination("/favorites", params)
+
+    def login(self):
+        username, password = self._get_auth_info()
+        if username:
+            self.headers["Authorization"] = \
+                "Bearer " + self._login_impl(username, password)
+            return
+
+        raise exception.AuthenticationError("Username and password required")
+
+    @cache(maxage=86400, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        url = "https://auth.koharu.to/login"
+        data = {"uname": username, "passwd": password}
+        response = self.request(
+            url, method="POST", headers=self.headers, data=data)
+
+        return response.json()["session"]
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 60cca22..b01c591 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -120,7 +120,8 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
             ]
         else:
             pos = page.find('id="view-center"') + 1
-            return (text.extr(page, 'itemprop="image" src="', '"', pos),)
+            # do NOT use text.extr() here, as it doesn't support a pos argument
+            return (text.extract(page, 'itemprop="image" src="', '"', pos)[0],)
 
     @staticmethod
     def _extract_user_name(page):
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index b21e1eb..2330b08 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -77,6 +77,7 @@ class PahealTagExtractor(PahealExtractor):
     pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
                r"/post/list/([^/?#]+)")
     example = "https://rule34.paheal.net/post/list/TAG/1"
+    page_start = 1
     per_page = 70
 
     def __init__(self, match):
@@ -87,11 +88,16 @@ class PahealTagExtractor(PahealExtractor):
         if self.config("metadata"):
             self._extract_data = self._extract_data_ex
 
+    def skip(self, num):
+        pages = num // self.per_page
+        self.page_start += pages
+        return pages * self.per_page
+
     def get_metadata(self):
         return {"search_tags": self.tags}
 
     def get_posts(self):
-        pnum = 1
+        pnum = self.page_start
         while True:
             url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
             page = self.request(url).text
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 115de9a..271fa50 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -78,12 +78,16 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
         }
 
     def images(self, page):
-        return [
-            (beau(url), None)
-            for url in text.extract_iter(
-                page, "lstImages.push('", "'",
-            )
-        ]
+        results = []
+
+        for block in page.split("    pth = '")[1:]:
+            pth = text.extr(block, "", "'")
+            for needle, repl in re.findall(
+                    r"pth = pth\.replace\(/([^/]+)/g, [\"']([^\"']*)", block):
+                pth = pth.replace(needle, repl)
+            results.append((beau(pth), None))
+
+        return results
 
 
 class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
@@ -116,9 +120,9 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
 
 
 def beau(url):
-    """https://readcomiconline.li/Scripts/rguard.min.js"""
-    url = url.replace("_x236", "d")
-    url = url.replace("_x945", "g")
+    """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.1"""
+    url = url.replace("pw_.g28x", "b")
+    url = url.replace("d2pr.x_27", "h")
 
     if url.startswith("https"):
         return url
@@ -126,8 +130,8 @@ def beau(url):
     url, sep, rest = url.partition("?")
     containsS0 = "=s0" in url
     url = url[:-3 if containsS0 else -6]
-    url = url[4:22] + url[25:]
-    url = url[0:-6] + url[-2:]
+    url = url[15:33] + url[50:]
+    url = url[0:-11] + url[-2:]
     url = binascii.a2b_base64(url).decode()
     url = url[0:13] + url[17:]
     url = url[0:-2] + ("=s0" if containsS0 else "=s1600")
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 327bcd1..506f6ac 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -190,7 +190,7 @@ class RedgifsImageExtractor(RedgifsExtractor):
                r"(?:\w+\.)?redgifs\.com/(?:watch|ifr)|"
                r"(?:\w+\.)?gfycat\.com(?:/gifs/detail|/\w+)?|"
                r"(?:www\.)?gifdeliverynetwork\.com|"
-               r"i\.redgifs\.com/i)/([A-Za-z]+)")
+               r"i\.redgifs\.com/i)/([A-Za-z0-9]+)")
     example = "https://redgifs.com/watch/ID"
 
     def gifs(self):
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index caf3e16..ad3efa7 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -16,7 +16,7 @@ import collections
 import re
 
 BASE_PATTERN = r"(?:https?://)?" \
-    r"(?:(?:chan|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
+    r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
     r"(?:/[a-z]{2})?"
 
 
@@ -45,6 +45,9 @@ class SankakuExtractor(BooruExtractor):
     def skip(self, num):
         return 0
 
+    def _init(self):
+        self.api = SankakuAPI(self)
+
     def _file_url(self, post):
         url = post["file_url"]
         if not url:
@@ -81,6 +84,15 @@ class SankakuExtractor(BooruExtractor):
             post["tags_" + key] = value
             post["tag_string_" + key] = " ".join(value)
 
+    def _notes(self, post, page):
+        if post.get("has_notes"):
+            post["notes"] = self.api.notes(post["id"])
+            for note in post["notes"]:
+                note["created_at"] = note["created_at"]["s"]
+                note["updated_at"] = note["updated_at"]["s"]
+        else:
+            post["notes"] = ()
+
 
 class SankakuTagExtractor(SankakuExtractor):
     """Extractor for images from sankaku.app by search-tags"""
@@ -109,7 +121,7 @@ class SankakuTagExtractor(SankakuExtractor):
 
     def posts(self):
         params = {"tags": self.tags}
-        return SankakuAPI(self).posts_keyset(params)
+        return self.api.posts_keyset(params)
 
 
 class SankakuPoolExtractor(SankakuExtractor):
@@ -125,7 +137,7 @@ class SankakuPoolExtractor(SankakuExtractor):
         self.pool_id = match.group(1)
 
     def metadata(self):
-        pool = SankakuAPI(self).pools(self.pool_id)
+        pool = self.api.pools(self.pool_id)
         pool["tags"] = [tag["name"] for tag in pool["tags"]]
         pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]]
 
@@ -151,7 +163,7 @@ class SankakuPostExtractor(SankakuExtractor):
         self.post_id = match.group(1)
 
     def posts(self):
-        return SankakuAPI(self).posts(self.post_id)
+        return self.api.posts(self.post_id)
 
 
 class SankakuBooksExtractor(SankakuExtractor):
@@ -167,7 +179,7 @@ class SankakuBooksExtractor(SankakuExtractor):
 
     def items(self):
         params = {"tags": self.tags, "pool_type": "0"}
-        for pool in SankakuAPI(self).pools_keyset(params):
+        for pool in self.api.pools_keyset(params):
             pool["_extractor"] = SankakuPoolExtractor
             url = "https://sankaku.app/books/{}".format(pool["id"])
             yield Message.Queue, url, pool
@@ -192,6 +204,10 @@ class SankakuAPI():
         if not self.username:
             self.authenticate = util.noop
 
+    def notes(self, post_id):
+        params = {"lang": "en"}
+        return self._call("/posts/{}/notes".format(post_id), params)
+
     def pools(self, pool_id):
         params = {"lang": "en"}
         return self._call("/pools/" + pool_id, params)
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index e1d4153..50c21e3 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -6,7 +6,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://www.sankakucomplex.com/"""
+"""Extractors for https://news.sankakucomplex.com/"""
 
 from .common import Extractor, Message
 from .. import text, util
@@ -16,7 +16,7 @@ import re
 class SankakucomplexExtractor(Extractor):
     """Base class for sankakucomplex extractors"""
     category = "sankakucomplex"
-    root = "https://www.sankakucomplex.com"
+    root = "https://news.sankakucomplex.com"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
@@ -24,14 +24,14 @@ class SankakucomplexExtractor(Extractor):
 
 
 class SankakucomplexArticleExtractor(SankakucomplexExtractor):
-    """Extractor for articles on www.sankakucomplex.com"""
+    """Extractor for articles on news.sankakucomplex.com"""
     subcategory = "article"
     directory_fmt = ("{category}", "{date:%Y-%m-%d} {title}")
     filename_fmt = "{filename}.{extension}"
     archive_fmt = "{date:%Y%m%d}_{filename}"
-    pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
+    pattern = (r"(?:https?://)?(?:news|www)\.sankakucomplex\.com"
                r"/(\d\d\d\d/\d\d/\d\d/[^/?#]+)")
-    example = "https://www.sankakucomplex.com/1970/01/01/TITLE"
+    example = "https://news.sankakucomplex.com/1970/01/01/TITLE"
 
     def items(self):
         url = "{}/{}/?pg=X".format(self.root, self.path)
@@ -87,9 +87,9 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
 class SankakucomplexTagExtractor(SankakucomplexExtractor):
     """Extractor for sankakucomplex blog articles by tag or author"""
     subcategory = "tag"
-    pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
+    pattern = (r"(?:https?://)?(?:news|www)\.sankakucomplex\.com"
                r"/((?:tag|category|author)/[^/?#]+)")
-    example = "https://www.sankakucomplex.com/tag/TAG/"
+    example = "https://news.sankakucomplex.com/tag/TAG/"
 
     def items(self):
         pnum = 1
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 0abb3ab..7c760ac 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -127,6 +127,8 @@ class SubscribestarExtractor(Extractor):
         }
 
     def _parse_datetime(self, dt):
+        if dt.startswith("Updated on "):
+            dt = dt[11:]
         date = text.parse_datetime(dt, "%b %d, %Y %I:%M %p")
         if date is dt:
             date = text.parse_datetime(dt, "%B %d, %Y %I:%M %p")
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index 78ff265..64fa951 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -77,23 +77,27 @@ class ToyhouseExtractor(Extractor):
                 cnt += 1
                 yield self._parse_post(post)
 
-            if cnt == 0 and params["page"] == 1:
-                token, pos = text.extract(
-                    page, '<input name="_token" type="hidden" value="', '"')
-                if not token:
-                    return
-                data = {
-                    "_token": token,
-                    "user"  : text.extract(page, 'value="', '"', pos)[0],
-                }
-                self.request(self.root + "/~account/warnings/accept",
-                             method="POST", data=data, allow_redirects=False)
-                continue
+            if not cnt and params["page"] == 1:
+                if self._accept_content_warning(page):
+                    continue
+                return
 
             if cnt < 18:
                 return
             params["page"] += 1
 
+    def _accept_content_warning(self, page):
+        pos = page.find(' name="_token"') + 1
+        token, pos = text.extract(page, ' value="', '"', pos)
+        user , pos = text.extract(page, ' value="', '"', pos)
+        if not token or not user:
+            return False
+
+        data = {"_token": token, "user": user}
+        self.request(self.root + "/~account/warnings/accept",
+                     method="POST", data=data, allow_redirects=False)
+        return True
+
 
 class ToyhouseArtExtractor(ToyhouseExtractor):
     """Extractor for artworks of a toyhouse user"""
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index c34910f..ff29c04 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -386,7 +386,7 @@ class TumblrAPI(oauth.OAuth1API):
     def posts(self, blog, params):
         """Retrieve published posts"""
         params["offset"] = self.extractor.config("offset")
-        params["limit"] = "50"
+        params["limit"] = 50
         params["reblog_info"] = "true"
         params["type"] = self.posts_type
         params["before"] = self.before
@@ -398,8 +398,14 @@ class TumblrAPI(oauth.OAuth1API):
 
     def likes(self, blog):
         """Retrieve liked posts"""
+        endpoint = "/v2/blog/{}/likes".format(blog)
         params = {"limit": "50", "before": self.before}
-        return self._pagination(blog, "/likes", params, key="liked_posts")
+        while True:
+            posts = self._call(endpoint, params)["liked_posts"]
+            if not posts:
+                return
+            yield from posts
+            params["before"] = posts[-1]["liked_timestamp"]
 
     def _call(self, endpoint, params, **kwargs):
         url = self.ROOT + endpoint
@@ -474,6 +480,7 @@ class TumblrAPI(oauth.OAuth1API):
         if self.api_key:
             params["api_key"] = self.api_key
 
+        strategy = self.extractor.config("pagination")
         while True:
             data = self._call(endpoint, params)
 
@@ -481,13 +488,31 @@ class TumblrAPI(oauth.OAuth1API):
                 self.BLOG_CACHE[blog] = data["blog"]
                 cache = False
 
-            yield from data[key]
-
-            try:
-                endpoint = data["_links"]["next"]["href"]
-            except KeyError:
-                return
+            posts = data[key]
+            yield from posts
 
-            params = None
-            if self.api_key:
-                endpoint += "&api_key=" + self.api_key
+            if strategy == "api":
+                try:
+                    endpoint = data["_links"]["next"]["href"]
+                except KeyError:
+                    return
+
+                params = None
+                if self.api_key:
+                    endpoint += "&api_key=" + self.api_key
+
+            elif strategy == "before":
+                if not posts:
+                    return
+                timestamp = posts[-1]["timestamp"] + 1
+                if params["before"] and timestamp >= params["before"]:
+                    return
+                params["before"] = timestamp
+                params["offset"] = None
+
+            else:  # offset
+                params["offset"] = \
+                    text.parse_int(params["offset"]) + params["limit"]
+                params["before"] = None
+                if params["offset"] >= data["total_posts"]:
+                    return
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ec098aa..9fa5b3f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -51,6 +51,8 @@ class TwitterExtractor(Extractor):
         if not self.config("transform", True):
             self._transform_user = util.identity
             self._transform_tweet = util.identity
+
+        self._cursor = None
         self._user = None
         self._user_obj = None
         self._user_cache = {}
@@ -321,8 +323,17 @@ class TwitterExtractor(Extractor):
             "quote_count"   : tget("quote_count"),
             "reply_count"   : tget("reply_count"),
             "retweet_count" : tget("retweet_count"),
+            "bookmark_count": tget("bookmark_count"),
         }
 
+        if "views" in tweet:
+            try:
+                tdata["view_count"] = int(tweet["views"]["count"])
+            except Exception:
+                tdata["view_count"] = 0
+        else:
+            tdata["view_count"] = 0
+
         if "note_tweet" in tweet:
             note = tweet["note_tweet"]["note_tweet_results"]["result"]
             content = note["text"]
@@ -492,6 +503,14 @@ class TwitterExtractor(Extractor):
             },
         }
 
+    def _init_cursor(self):
+        return self.config("cursor") or None
+
+    def _update_cursor(self, cursor):
+        self.log.debug("Cursor: %s", cursor)
+        self._cursor = cursor
+        return cursor
+
     def metadata(self):
         """Return general metadata"""
         return {}
@@ -499,6 +518,11 @@ class TwitterExtractor(Extractor):
     def tweets(self):
         """Yield all relevant tweet objects"""
 
+    def finalize(self):
+        if self._cursor:
+            self.log.info("Use '-o cursor=%s' to continue downloading "
+                          "from the current position", self._cursor)
+
     def login(self):
         if self.cookies_check(self.cookies_names):
             return
@@ -530,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor):
     def initialize(self):
         pass
 
+    def finalize(self):
+        pass
+
     def items(self):
         base = "{}/{}/".format(self.root, self.user)
         return self._dispatch_extractors((
@@ -549,30 +576,73 @@ class TwitterTimelineExtractor(TwitterExtractor):
     pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
     example = "https://x.com/USER/timeline"
 
+    def _init_cursor(self):
+        if self._cursor:
+            return self._cursor.partition("/")[2] or None
+        return None
+
+    def _update_cursor(self, cursor):
+        if cursor:
+            self._cursor = self._cursor_prefix + cursor
+            self.log.debug("Cursor: %s", self._cursor)
+        else:
+            self._cursor = None
+        return cursor
+
     def tweets(self):
-        # yield initial batch of (media) tweets
-        tweet = None
-        for tweet in self._select_tweet_source()(self.user):
-            yield tweet
-        if tweet is None:
-            return
+        self._cursor = cursor = self.config("cursor") or None
+        reset = False
 
-        # build search query
-        query = "from:{} max_id:{}".format(
-            self._user["name"], tweet["rest_id"])
-        if self.retweets:
-            query += " include:retweets include:nativeretweets"
+        if cursor:
+            state = cursor.partition("/")[0]
+            state, _, tweet_id = state.partition("_")
+            state = text.parse_int(state, 1)
+        else:
+            state = 1
+
+        if state <= 1:
+            self._cursor_prefix = "1/"
 
-        if not self.textonly:
-            # try to search for media-only tweets
+            # yield initial batch of (media) tweets
             tweet = None
-            for tweet in self.api.search_timeline(query + " filter:links"):
+            for tweet in self._select_tweet_source()(self.user):
                 yield tweet
-            if tweet is not None:
+            if tweet is None and not cursor:
                 return
+            tweet_id = tweet["rest_id"]
+
+            state = reset = 2
+        else:
+            self.api._user_id_by_screen_name(self.user)
+
+        # build search query
+        query = "from:{} max_id:{}".format(self._user["name"], tweet_id)
+        if self.retweets:
+            query += " include:retweets include:nativeretweets"
 
-        # yield unfiltered search results
-        yield from self.api.search_timeline(query)
+        if state <= 2:
+            self._cursor_prefix = "2_{}/".format(tweet_id)
+            if reset:
+                self._cursor = self._cursor_prefix
+
+            if not self.textonly:
+                # try to search for media-only tweets
+                tweet = None
+                for tweet in self.api.search_timeline(query + " filter:links"):
+                    yield tweet
+                if tweet is not None:
+                    return self._update_cursor(None)
+
+            state = reset = 3
+
+        if state <= 3:
+            # yield unfiltered search results
+            self._cursor_prefix = "3_{}/".format(tweet_id)
+            if reset:
+                self._cursor = self._cursor_prefix
+
+            yield from self.api.search_timeline(query)
+            return self._update_cursor(None)
 
     def _select_tweet_source(self):
         strategy = self.config("strategy")
@@ -854,6 +924,24 @@ class TwitterQuotesExtractor(TwitterExtractor):
         yield Message.Queue, url, data
 
 
+class TwitterInfoExtractor(TwitterExtractor):
+    """Extractor for a user's profile data"""
+    subcategory = "info"
+    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/info"
+    example = "https://x.com/USER/info"
+
+    def items(self):
+        api = TwitterAPI(self)
+
+        screen_name = self.user
+        if screen_name.startswith("id:"):
+            user = api.user_by_rest_id(screen_name[3:])
+        else:
+            user = api.user_by_screen_name(screen_name)
+
+        return iter(((Message.Directory, self._transform_user(user)),))
+
+
 class TwitterAvatarExtractor(TwitterExtractor):
     subcategory = "avatar"
     filename_fmt = "avatar {date}.{extension}"
@@ -1388,7 +1476,11 @@ class TwitterAPI():
                 "%s %s (%s)", response.status_code, response.reason, errors)
 
     def _pagination_legacy(self, endpoint, params):
-        original_retweets = (self.extractor.retweets == "original")
+        extr = self.extractor
+        cursor = extr._init_cursor()
+        if cursor:
+            params["cursor"] = cursor
+        original_retweets = (extr.retweets == "original")
         bottom = ("cursor-bottom-", "sq-cursor-bottom")
 
         while True:
@@ -1396,7 +1488,7 @@ class TwitterAPI():
 
             instructions = data["timeline"]["instructions"]
             if not instructions:
-                return
+                return extr._update_cursor(None)
 
             tweets = data["globalObjects"]["tweets"]
             users = data["globalObjects"]["users"]
@@ -1477,8 +1569,8 @@ class TwitterAPI():
 
             # stop on empty response
             if not cursor or (not tweets and not tweet_id):
-                return
-            params["cursor"] = cursor
+                return extr._update_cursor(None)
+            params["cursor"] = extr._update_cursor(cursor)
 
     def _pagination_tweets(self, endpoint, variables,
                            path=None, stop_tweets=True, features=None):
@@ -1487,6 +1579,9 @@ class TwitterAPI():
         pinned_tweet = extr.pinned
 
         params = {"variables": None}
+        cursor = extr._init_cursor()
+        if cursor:
+            variables["cursor"] = cursor
         if features is None:
             features = self.features_pagination
         if features:
@@ -1523,7 +1618,7 @@ class TwitterAPI():
                             cursor = entry["content"]["value"]
                 if entries is None:
                     if not cursor:
-                        return
+                        return extr._update_cursor(None)
                     entries = ()
 
             except LookupError:
@@ -1672,12 +1767,16 @@ class TwitterAPI():
                         continue
 
             if stop_tweets and not tweet:
-                return
+                return extr._update_cursor(None)
             if not cursor or cursor == variables.get("cursor"):
-                return
-            variables["cursor"] = cursor
+                return extr._update_cursor(None)
+            variables["cursor"] = extr._update_cursor(cursor)
 
     def _pagination_users(self, endpoint, variables, path=None):
+        extr = self.extractor
+        cursor = extr._init_cursor()
+        if cursor:
+            variables["cursor"] = cursor
         params = {
             "variables": None,
             "features" : self._json_dumps(self.features_pagination),
@@ -1697,7 +1796,7 @@ class TwitterAPI():
                         data = data[key]
                     instructions = data["instructions"]
             except KeyError:
-                return
+                return extr._update_cursor(None)
 
             for instr in instructions:
                 if instr["type"] == "TimelineAddEntries":
@@ -1715,8 +1814,8 @@ class TwitterAPI():
                             cursor = entry["content"]["value"]
 
             if not cursor or cursor.startswith(("-1|", "0|")) or not entry:
-                return
-            variables["cursor"] = cursor
+                return extr._update_cursor(None)
+            variables["cursor"] = extr._update_cursor(cursor)
 
     def _handle_ratelimit(self, response):
         rl = self.extractor.config("ratelimit")
@@ -1864,7 +1963,7 @@ def _login_impl(extr, username, password):
                 },
             }
         elif subtask == "LoginEnterAlternateIdentifierSubtask":
-            alt = extr.config("username_alt") or extr.input(
+            alt = extr.config("username-alt") or extr.input(
                 "Alternate Identifier (username, email, phone number): ")
             data = {
                 "enter_text": {
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 6dfb23c..5cde0d6 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -101,7 +101,8 @@ class VipergirlsExtractor(Extractor):
 class VipergirlsThreadExtractor(VipergirlsExtractor):
     """Extractor for vipergirls threads"""
     subcategory = "thread"
-    pattern = BASE_PATTERN + r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?$"
+    pattern = (BASE_PATTERN +
+               r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?(?:$|#|\?(?!p=))")
     example = "https://vipergirls.to/threads/12345-TITLE"
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index c112f4a..922a591 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -115,9 +115,28 @@ class VscoExtractor(Extractor):
 
 
 class VscoUserExtractor(VscoExtractor):
-    """Extractor for images from a user on vsco.co"""
+    """Extractor for a vsco user profile"""
     subcategory = "user"
-    pattern = USER_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
+    pattern = USER_PATTERN + r"/?$"
+    example = "https://vsco.co/USER"
+
+    def initialize(self):
+        pass
+
+    def items(self):
+        base = "{}/{}/".format(self.root, self.user)
+        return self._dispatch_extractors((
+            (VscoAvatarExtractor    , base + "avatar"),
+            (VscoGalleryExtractor   , base + "gallery"),
+            (VscoSpacesExtractor    , base + "spaces"),
+            (VscoCollectionExtractor, base + "collection"),
+        ), ("gallery",))
+
+
+class VscoGalleryExtractor(VscoExtractor):
+    """Extractor for a vsco user's gallery"""
+    subcategory = "gallery"
+    pattern = USER_PATTERN + r"/(?:gallery|images)"
     example = "https://vsco.co/USER/gallery"
 
     def images(self):
diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py
index faf3b0d..796f3f8 100644
--- a/gallery_dl/extractor/wallpapercave.py
+++ b/gallery_dl/extractor/wallpapercave.py
@@ -18,7 +18,7 @@ class WallpapercaveImageExtractor(Extractor):
     category = "wallpapercave"
     subcategory = "image"
     root = "https://wallpapercave.com"
-    pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com"
+    pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com/"
     example = "https://wallpapercave.com/w/wp12345"
 
     def items(self):
@@ -40,3 +40,12 @@ class WallpapercaveImageExtractor(Extractor):
                 image = text.nameext_from_url(path)
                 yield Message.Directory, image
                 yield Message.Url, self.root + path, image
+
+        if path is None:
+            for wp in text.extract_iter(
+                    page, 'class="wallpaper" id="wp', '</picture>'):
+                path = text.rextract(wp, ' src="', '"')[0]
+                if path:
+                    image = text.nameext_from_url(path)
+                    yield Message.Directory, image
+                    yield Message.Url, self.root + path, image
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index e91f45f..61a36d5 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -64,7 +64,7 @@ class WarosuThreadExtractor(Extractor):
     def parse(self, post):
         """Build post object by extracting data from an HTML post"""
         data = self._extract_post(post)
-        if "<span> File:" in post and self._extract_image(post, data):
+        if "<span class=fileinfo>" in post and self._extract_image(post, data):
             part = data["image"].rpartition("/")[2]
             data["tim"], _, data["extension"] = part.partition(".")
             data["ext"] = "." + data["extension"]
@@ -83,7 +83,7 @@ class WarosuThreadExtractor(Extractor):
 
     def _extract_image(self, post, data):
         extr = text.extract_from(post)
-        data["fsize"] = extr("<span> File: ", ", ")
+        data["fsize"] = extr("<span class=fileinfo> File: ", ", ")
         data["w"] = extr("", "x")
         data["h"] = extr("", ", ")
         data["filename"] = text.unquote(extr(
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index fc61dff..126ef49 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -11,6 +11,8 @@
 from .booru import BooruExtractor
 from ..cache import cache
 from .. import text, util, exception
+import collections
+import re
 
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
 
@@ -76,22 +78,29 @@ class ZerochanExtractor(BooruExtractor):
                 'class="breadcrumbs', '</nav>'))[2:],
             "uploader": extr('href="/user/', '"'),
             "tags"    : extr('<ul id="tags"', '</ul>'),
-            "source"  : extr('<h2>Source</h2>', '</p><h2>').rpartition(
-                ">")[2] or None,
+            "source"  : text.unescape(text.extr(
+                extr('id="source-url"', '</a>'), 'href="', '"')),
         }
 
         html = data["tags"]
         tags = data["tags"] = []
         for tag in html.split("<li class=")[1:]:
-            category = text.extr(tag, 'data-type="', '"')
+            category = text.extr(tag, '"', '"')
             name = text.extr(tag, 'data-tag="', '"')
-            tags.append(category.capitalize() + ":" + name)
+            tags.append(category.partition(" ")[0].capitalize() + ":" + name)
 
         return data
 
     def _parse_entry_api(self, entry_id):
         url = "{}/{}?json".format(self.root, entry_id)
-        item = self.request(url).json()
+        text = self.request(url).text
+        try:
+            item = util.json_loads(text)
+        except ValueError as exc:
+            if " control character " not in str(exc):
+                raise
+            text = re.sub(r"[\x00-\x1f\x7f]", "", text)
+            item = util.json_loads(text)
 
         data = {
             "id"      : item["id"],
@@ -109,6 +118,14 @@ class ZerochanExtractor(BooruExtractor):
 
         return data
 
+    def _tags(self, post, page):
+        tags = collections.defaultdict(list)
+        for tag in post["tags"]:
+            category, _, name = tag.partition(":")
+            tags[category].append(name)
+        for key, value in tags.items():
+            post["tags_" + key.lower()] = value
+
 
 class ZerochanTagExtractor(ZerochanExtractor):
     subcategory = "tag"
@@ -180,10 +197,16 @@ class ZerochanTagExtractor(ZerochanExtractor):
         static = "https://static.zerochan.net/.full."
 
         while True:
-            data = self.request(url, params=params).json()
+            response = self.request(url, params=params, allow_redirects=False)
+            if response.status_code >= 300:
+                url = text.urljoin(self.root, response.headers["location"])
+                response = self.request(url, params=params)
+            data = response.json()
+
             try:
                 posts = data["items"]
-            except ValueError:
+            except Exception:
+                self.log.debug("Server response: %s", data)
                 return
 
             if metadata:
@@ -191,13 +214,13 @@ class ZerochanTagExtractor(ZerochanExtractor):
                     post_id = post["id"]
                     post.update(self._parse_entry_html(post_id))
                     post.update(self._parse_entry_api(post_id))
+                    yield post
             else:
                 for post in posts:
                     base = static + str(post["id"])
                     post["file_url"] = base + ".jpg"
                     post["_fallback"] = (base + ".png",)
-
-            yield from posts
+                    yield post
 
             if not data.get("next"):
                 return
author	Unit 193 <unit193@unit193.net>	2024-08-03 20:27:44 -0400
committer	Unit 193 <unit193@unit193.net>	2024-08-03 20:27:44 -0400
commit	032e5bed275a253e122ed9ac86dac7b8c4204172 (patch)
tree	b4eda52ebfe00c4d22e9d633b1ab2d158a9f0573 /gallery_dl/extractor
parent	80e39a8fc7de105510cbbdca8507f2a4b8c9e01d (diff)