New upstream version 1.27.7.upstream/1.27.7

author: Unit 193 <unit193@unit193.net> 2024-10-25 17:27:30 -0400
committer: Unit 193 <unit193@unit193.net> 2024-10-25 17:27:30 -0400
commit: fc004701f923bb954a22c7fec2ae8d607e78cb2b (patch)
tree: a5bea4ed6447ea43c099131430e3bd6182ee87d7 /gallery_dl/extractor
parent: 0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff)
28 files changed, 887 insertions, 334 deletions
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index f81d2a1..ce1c52a 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -27,12 +27,22 @@ class _8chanExtractor(Extractor):
         Extractor.__init__(self, match)
 
     def _init(self):
-        now = util.datetime_utcnow()
-        domain = self.root.rpartition("/")[2]
-        self.cookies.set(
-            now.strftime("TOS%Y%m%d"), "1", domain=domain)
-        self.cookies.set(
-            (now - timedelta(1)).strftime("TOS%Y%m%d"), "1", domain=domain)
+        tos = self.cookies_tos_name()
+        self.cookies.set(tos, "1", domain=self.root[8:])
+
+    @memcache()
+    def cookies_tos_name(self):
+        url = self.root + "/.static/pages/confirmed.html"
+        headers = {"Referer": self.root + "/.static/pages/disclaimer.html"}
+        response = self.request(url, headers=headers, allow_redirects=False)
+
+        for cookie in response.cookies:
+            if cookie.name.lower().startswith("tos"):
+                self.log.debug("TOS cookie name: %s", cookie.name)
+                return cookie.name
+
+        self.log.error("Unable to determin TOS cookie name")
+        return "TOS20241009"
 
     @memcache()
     def cookies_prepare(self):
@@ -64,16 +74,14 @@ class _8chanThreadExtractor(_8chanExtractor):
                      "{threadId} {subject[:50]}")
     filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
     archive_fmt = "{boardUri}_{postId}_{num}"
-    pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)"
     example = "https://8chan.moe/a/res/12345.html"
 
-    def __init__(self, match):
-        _8chanExtractor.__init__(self, match)
-        _, self.board, self.thread = match.groups()
-
     def items(self):
+        _, board, thread = self.groups
+
         # fetch thread data
-        url = "{}/{}/res/{}.".format(self.root, self.board, self.thread)
+        url = "{}/{}/res/{}.".format(self.root, board, thread)
         self.session.headers["Referer"] = url + "html"
         thread = self.request(url + "json").json()
         thread["postId"] = thread["threadId"]
@@ -106,25 +114,22 @@ class _8chanBoardExtractor(_8chanExtractor):
     pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
     example = "https://8chan.moe/a/"
 
-    def __init__(self, match):
-        _8chanExtractor.__init__(self, match)
-        _, self.board, self.page = match.groups()
-
     def items(self):
-        page = text.parse_int(self.page, 1)
-        url = "{}/{}/{}.json".format(self.root, self.board, page)
-        board = self.request(url).json()
-        threads = board["threads"]
+        _, board, pnum = self.groups
+        pnum = text.parse_int(pnum, 1)
+        url = "{}/{}/{}.json".format(self.root, board, pnum)
+        data = self.request(url).json()
+        threads = data["threads"]
 
         while True:
             for thread in threads:
                 thread["_extractor"] = _8chanThreadExtractor
                 url = "{}/{}/res/{}.html".format(
-                    self.root, self.board, thread["threadId"])
+                    self.root, board, thread["threadId"])
                 yield Message.Queue, url, thread
 
-            page += 1
-            if page > board["pageCount"]:
+            pnum += 1
+            if pnum > data["pageCount"]:
                 return
-            url = "{}/{}/{}.json".format(self.root, self.board, page)
+            url = "{}/{}/{}.json".format(self.root, board, pnum)
             threads = self.request(url).json()["threads"]
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 9885195..4e9fa50 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -141,6 +141,7 @@ modules = [
     "rule34us",
     "sankaku",
     "sankakucomplex",
+    "scrolller",
     "seiga",
     "senmanga",
     "sexcom",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 72f9195..14598b7 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -171,6 +171,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
                     url = text.extr(page, '<source src="', '"')
                     if text.ext_from_url(url) == "m3u8":
                         url = "ytdl:" + url
+                        module["_ytdl_manifest"] = "hls"
                         module["extension"] = "mp4"
                     append((url, module))
                     continue
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index 39c5635..a1a488e 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -42,62 +42,76 @@ class BlueskyExtractor(Extractor):
         self._user = self._user_did = None
         self.instance = self.root.partition("://")[2]
         self.videos = self.config("videos", True)
+        self.quoted = self.config("quoted", False)
 
     def items(self):
         for post in self.posts():
             if "post" in post:
                 post = post["post"]
-
-            pid = post["uri"].rpartition("/")[2]
             if self._user_did and post["author"]["did"] != self._user_did:
-                self.log.debug("Skipping %s (repost)", pid)
-                continue
-
-            post.update(post["record"])
-            del post["record"]
-
-            if self._metadata_facets:
-                if "facets" in post:
-                    post["hashtags"] = tags = []
-                    post["mentions"] = dids = []
-                    post["uris"] = uris = []
-                    for facet in post["facets"]:
-                        features = facet["features"][0]
-                        if "tag" in features:
-                            tags.append(features["tag"])
-                        elif "did" in features:
-                            dids.append(features["did"])
-                        elif "uri" in features:
-                            uris.append(features["uri"])
-                else:
-                    post["hashtags"] = post["mentions"] = post["uris"] = ()
-
-            if self._metadata_user:
-                post["user"] = self._user or post["author"]
-
-            files = self._extract_files(post)
-            post["instance"] = self.instance
-            post["post_id"] = pid
-            post["count"] = len(files)
-            post["date"] = text.parse_datetime(
-                post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
-
-            yield Message.Directory, post
-
-            if not files:
+                self.log.debug("Skipping %s (repost)", self._pid(post))
                 continue
-
-            base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
-                    "?did={}&cid=".format(post["author"]["did"]))
-            for post["num"], file in enumerate(files, 1):
-                post.update(file)
-                yield Message.Url, base + file["filename"], post
+            embed = post.get("embed")
+            post.update(post.pop("record"))
+
+            while True:
+                self._prepare(post)
+                files = self._extract_files(post)
+
+                yield Message.Directory, post
+                if files:
+                    base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
+                            "?did={}&cid=".format(post["author"]["did"]))
+                    for post["num"], file in enumerate(files, 1):
+                        post.update(file)
+                        yield Message.Url, base + file["filename"], post
+
+                if not self.quoted or not embed or "record" not in embed:
+                    break
+
+                quote = embed["record"]
+                if "record" in quote:
+                    quote = quote["record"]
+                quote["quote_id"] = self._pid(post)
+                quote["quote_by"] = post["author"]
+                embed = quote.get("embed")
+                quote.update(quote.pop("value"))
+                post = quote
 
     def posts(self):
         return ()
 
+    def _pid(self, post):
+        return post["uri"].rpartition("/")[2]
+
+    def _prepare(self, post):
+        if self._metadata_facets:
+            if "facets" in post:
+                post["hashtags"] = tags = []
+                post["mentions"] = dids = []
+                post["uris"] = uris = []
+                for facet in post["facets"]:
+                    features = facet["features"][0]
+                    if "tag" in features:
+                        tags.append(features["tag"])
+                    elif "did" in features:
+                        dids.append(features["did"])
+                    elif "uri" in features:
+                        uris.append(features["uri"])
+            else:
+                post["hashtags"] = post["mentions"] = post["uris"] = ()
+
+        if self._metadata_user:
+            post["user"] = self._user or post["author"]
+
+        post["instance"] = self.instance
+        post["post_id"] = self._pid(post)
+        post["date"] = text.parse_datetime(
+            post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+
     def _extract_files(self, post):
         if "embed" not in post:
+            post["count"] = 0
             return ()
 
         files = []
@@ -111,6 +125,7 @@ class BlueskyExtractor(Extractor):
         if "video" in media and self.videos:
             files.append(self._extract_media(media, "video"))
 
+        post["count"] = len(files)
         return files
 
     def _extract_media(self, media, key):
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 9022ffc..6c79d0a 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -8,9 +8,10 @@
 
 """Extractors for https://bunkr.si/"""
 
+from .common import Extractor
 from .lolisafe import LolisafeAlbumExtractor
-from .. import text, config
-
+from .. import text, config, exception
+import random
 
 if config.get(("extractor", "bunkr"), "tlds"):
     BASE_PATTERN = (
@@ -21,11 +22,28 @@ else:
     BASE_PATTERN = (
         r"(?:bunkr:(?:https?://)?([^/?#]+)|"
         r"(?:https?://)?(?:app\.)?(bunkr+"
-        r"\.(?:s[kiu]|[cf]i|pk|ru|la|is|to|a[cx]"
+        r"\.(?:s[kiu]|[cf]i|p[hks]|ru|la|is|to|a[cx]"
         r"|black|cat|media|red|site|ws|org)))"
     )
 
+DOMAINS = [
+    "bunkr.ac",
+    "bunkr.ci",
+    "bunkr.fi",
+    "bunkr.ph",
+    "bunkr.pk",
+    "bunkr.ps",
+    "bunkr.si",
+    "bunkr.sk",
+    "bunkr.ws",
+    "bunkr.black",
+    "bunkr.red",
+    "bunkr.media",
+    "bunkr.site",
+]
 LEGACY_DOMAINS = {
+    "bunkr.ax",
+    "bunkr.cat",
     "bunkr.ru",
     "bunkrr.ru",
     "bunkr.su",
@@ -34,6 +52,7 @@ LEGACY_DOMAINS = {
     "bunkr.is",
     "bunkr.to",
 }
+CF_DOMAINS = set()
 
 
 class BunkrAlbumExtractor(LolisafeAlbumExtractor):
@@ -49,45 +68,96 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
         if domain not in LEGACY_DOMAINS:
             self.root = "https://" + domain
 
+    def request(self, url, **kwargs):
+        kwargs["allow_redirects"] = False
+
+        while True:
+            try:
+                response = Extractor.request(self, url, **kwargs)
+                if response.status_code < 300:
+                    return response
+
+                # redirect
+                url = response.headers["Location"]
+                root, path = self._split(url)
+                if root not in CF_DOMAINS:
+                    continue
+                self.log.debug("Redirect to known CF challenge domain '%s'",
+                               root)
+
+            except exception.HttpError as exc:
+                if exc.status != 403:
+                    raise
+
+                # CF challenge
+                root, path = self._split(url)
+                CF_DOMAINS.add(root)
+                self.log.debug("Added '%s' to CF challenge domains", root)
+
+                try:
+                    DOMAINS.remove(root.rpartition("/")[2])
+                except ValueError:
+                    pass
+                else:
+                    if not DOMAINS:
+                        raise exception.StopExtraction(
+                            "All Bunkr domains require solving a CF challenge")
+
+            # select alternative domain
+            root = "https://" + random.choice(DOMAINS)
+            self.log.debug("Trying '%s' as fallback", root)
+            url = root + path
+
     def fetch_album(self, album_id):
         # album metadata
         page = self.request(self.root + "/a/" + self.album_id).text
-        info = text.split_html(text.extr(
-            page, "<h1", "</div>").partition(">")[2])
-        count, _, size = info[1].split(None, 2)
+        title, size = text.split_html(text.extr(
+            page, "<h1", "</span>").partition(">")[2])
 
-        pos = page.index('class="grid-images')
-        urls = list(text.extract_iter(page, '<a href="', '"', pos))
-
-        return self._extract_files(urls), {
+        items = list(text.extract_iter(page, "<!-- item -->", "<!--  -->"))
+        return self._extract_files(items), {
             "album_id"   : self.album_id,
-            "album_name" : text.unescape(info[0]),
-            "album_size" : size[1:-1],
-            "count"      : len(urls),
-            "_http_validate": self._validate,
+            "album_name" : title,
+            "album_size" : text.extr(size, "(", ")"),
+            "count"      : len(items),
         }
 
-    def _extract_files(self, urls):
-        for url in urls:
+    def _extract_files(self, items):
+        for item in items:
             try:
-                url = self._extract_file(text.unescape(url))
+                url = text.extr(item, ' href="', '"')
+                file = self._extract_file(text.unescape(url))
+
+                info = text.split_html(item)
+                file["name"] = info[0]
+                file["size"] = info[2]
+                file["date"] = text.parse_datetime(
+                    info[-1], "%H:%M:%S %d/%m/%Y")
+
+                yield file
+            except exception.StopExtraction:
+                raise
             except Exception as exc:
                 self.log.error("%s: %s", exc.__class__.__name__, exc)
-                continue
-            yield {"file": text.unescape(url)}
-
-    def _extract_file(self, url):
-        page = self.request(url).text
-        url = (text.extr(page, '<source src="', '"') or
-               text.extr(page, '<img src="', '"'))
-
-        if not url:
-            url_download = text.rextract(
-                page, ' href="', '"', page.rindex("Download"))[0]
-            page = self.request(text.unescape(url_download)).text
-            url = text.unescape(text.rextract(page, ' href="', '"')[0])
-
-        return url
+                self.log.debug("", exc_info=exc)
+
+    def _extract_file(self, webpage_url):
+        response = self.request(webpage_url)
+        page = response.text
+        file_url = (text.extr(page, '<source src="', '"') or
+                    text.extr(page, '<img src="', '"'))
+
+        if not file_url:
+            webpage_url = text.unescape(text.rextract(
+                page, ' href="', '"', page.rindex("Download"))[0])
+            response = self.request(webpage_url)
+            file_url = text.rextract(response.text, ' href="', '"')[0]
+
+        return {
+            "file"          : text.unescape(file_url),
+            "_http_headers" : {"Referer": response.url},
+            "_http_validate": self._validate,
+        }
 
     def _validate(self, response):
         if response.history and response.url.endswith("/maintenance-vid.mp4"):
@@ -95,6 +165,10 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             return False
         return True
 
+    def _split(self, url):
+        pos = url.index("/", 8)
+        return url[:pos], url[pos:]
+
 
 class BunkrMediaExtractor(BunkrAlbumExtractor):
     """Extractor for bunkr.si media links"""
@@ -105,16 +179,15 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
 
     def fetch_album(self, album_id):
         try:
-            url = self._extract_file(self.root + self.album_id)
+            file = self._extract_file(self.root + album_id)
         except Exception as exc:
             self.log.error("%s: %s", exc.__class__.__name__, exc)
             return (), {}
 
-        return ({"file": text.unescape(url)},), {
+        return (file,), {
             "album_id"   : "",
             "album_name" : "",
             "album_size" : -1,
             "description": "",
             "count"      : 1,
-            "_http_validate": self._validate,
         }
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 725af3a..0b1e44a 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -9,7 +9,7 @@
 """Extractors for https://www.civitai.com/"""
 
 from .common import Extractor, Message
-from .. import text, util
+from .. import text, util, exception
 import itertools
 import time
 
@@ -23,7 +23,7 @@ class CivitaiExtractor(Extractor):
     root = "https://civitai.com"
     directory_fmt = ("{category}", "{username|user[username]}", "images")
     filename_fmt = "{file[id]|id|filename}.{extension}"
-    archive_fmt = "{file[hash]|hash}"
+    archive_fmt = "{file[uuid]|uuid}"
     request_interval = (0.5, 1.5)
 
     def _init(self):
@@ -101,9 +101,11 @@ class CivitaiExtractor(Extractor):
     def _url(self, image):
         url = image["url"]
         if "/" in url:
-            parts = url.rsplit("/", 2)
-            parts[1] = self._image_quality
+            parts = url.rsplit("/", 3)
+            image["uuid"] = parts[1]
+            parts[2] = self._image_quality
             return "/".join(parts)
+        image["uuid"] = url
 
         name = image.get("name")
         if not name:
@@ -133,8 +135,6 @@ class CivitaiModelExtractor(CivitaiExtractor):
     directory_fmt = ("{category}", "{user[username]}",
                      "{model[id]}{model[name]:? //}",
                      "{version[id]}{version[name]:? //}")
-    filename_fmt = "{file[id]}.{extension}"
-    archive_fmt = "{file[hash]}"
     pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?"
     example = "https://civitai.com/models/12345/TITLE"
 
@@ -195,19 +195,25 @@ class CivitaiModelExtractor(CivitaiExtractor):
         )
 
     def _extract_files_model(self, model, version, user):
-        return [
-            {
+        files = []
+
+        for num, file in enumerate(version["files"], 1):
+            file["uuid"] = "model-{}-{}-{}".format(
+                model["id"], version["id"], file["id"])
+            files.append({
                 "num"      : num,
                 "file"     : file,
                 "filename" : file["name"],
                 "extension": "bin",
-                "url"      : file["downloadUrl"],
+                "url"      : file.get("downloadUrl") or
+                             "{}/api/download/models/{}".format(
+                                 self.root, version["id"]),
                 "_http_headers" : {
                     "Authorization": self.api.headers.get("Authorization")},
                 "_http_validate": self._validate_file_model,
-            }
-            for num, file in enumerate(version["files"], 1)
-        ]
+            })
+
+        return files
 
     def _extract_files_image(self, model, version, user):
         if "images" in version:
@@ -263,24 +269,14 @@ class CivitaiPostExtractor(CivitaiExtractor):
         return ({"id": int(self.groups[0])},)
 
 
-class CivitaiTagModelsExtractor(CivitaiExtractor):
-    subcategory = "tag-models"
-    pattern = BASE_PATTERN + r"/(?:tag/|models\?tag=)([^/?&#]+)"
+class CivitaiTagExtractor(CivitaiExtractor):
+    subcategory = "tag"
+    pattern = BASE_PATTERN + r"/tag/([^/?&#]+)"
     example = "https://civitai.com/tag/TAG"
 
     def models(self):
         tag = text.unquote(self.groups[0])
-        return self.api.models({"tag": tag})
-
-
-class CivitaiTagImagesExtractor(CivitaiExtractor):
-    subcategory = "tag-images"
-    pattern = BASE_PATTERN + r"/images\?tags=([^&#]+)"
-    example = "https://civitai.com/images?tags=12345"
-
-    def images(self):
-        tag = text.unquote(self.groups[0])
-        return self.api.images({"tag": tag})
+        return self.api.models_tag(tag)
 
 
 class CivitaiSearchExtractor(CivitaiExtractor):
@@ -293,6 +289,26 @@ class CivitaiSearchExtractor(CivitaiExtractor):
         return self.api.models(params)
 
 
+class CivitaiModelsExtractor(CivitaiExtractor):
+    subcategory = "models"
+    pattern = BASE_PATTERN + r"/models(?:/?\?([^#]+))?(?:$|#)"
+    example = "https://civitai.com/models"
+
+    def models(self):
+        params = text.parse_query(self.groups[0])
+        return self.api.models(params)
+
+
+class CivitaiImagesExtractor(CivitaiExtractor):
+    subcategory = "images"
+    pattern = BASE_PATTERN + r"/images(?:/?\?([^#]+))?(?:$|#)"
+    example = "https://civitai.com/images"
+
+    def images(self):
+        params = text.parse_query(self.groups[0])
+        return self.api.images(params)
+
+
 class CivitaiUserExtractor(CivitaiExtractor):
     subcategory = "user"
     pattern = USER_PATTERN + r"/?(?:$|\?|#)"
@@ -339,11 +355,35 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
     pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?"
     example = "https://civitai.com/user/USER/images"
 
+    def __init__(self, match):
+        self.params = text.parse_query_list(match.group(2))
+        if self.params.get("section") == "reactions":
+            self.subcategory = "reactions"
+            self.images = self.images_reactions
+        CivitaiExtractor.__init__(self, match)
+
     def images(self):
-        params = text.parse_query(self.groups[1])
+        params = self.params
         params["username"] = text.unquote(self.groups[0])
         return self.api.images(params)
 
+    def images_reactions(self):
+        if "Authorization" not in self.api.headers and \
+                not self.cookies.get(
+                "__Secure-civitai-token", domain=".civitai.com"):
+            raise exception.AuthorizationError("api-key or cookies required")
+
+        params = self.params
+        params["authed"] = True
+        params["useIndex"] = False
+        if "reactions" in params:
+            if isinstance(params["reactions"], str):
+                params["reactions"] = (params["reactions"],)
+        else:
+            params["reactions"] = (
+                "Like", "Dislike", "Heart", "Laugh", "Cry")
+        return self.api.images(params)
+
 
 class CivitaiRestAPI():
     """Interface for the Civitai Public REST API
@@ -396,6 +436,9 @@ class CivitaiRestAPI():
     def models(self, params):
         return self._pagination("/v1/models", params)
 
+    def models_tag(self, tag):
+        return self.models({"tag": tag})
+
     def _call(self, endpoint, params=None):
         if endpoint[0] == "/":
             url = self.root + endpoint
@@ -419,14 +462,14 @@ class CivitaiRestAPI():
 
 
 class CivitaiTrpcAPI():
-    """Interface for the Civitai TRPC API"""
+    """Interface for the Civitai tRPC API"""
 
     def __init__(self, extractor):
         self.extractor = extractor
         self.root = extractor.root + "/api/trpc/"
         self.headers = {
             "content-type"    : "application/json",
-            "x-client-version": "5.0.146",
+            "x-client-version": "5.0.185",
             "x-client-date"   : "",
             "x-client"        : "web",
             "x-fingerprint"   : "undefined",
@@ -463,6 +506,7 @@ class CivitaiTrpcAPI():
                 "include"      : ["cosmetics"],
             })
 
+        params = self._type_params(params)
         return self._pagination(endpoint, params)
 
     def images_gallery(self, model, version, user):
@@ -516,6 +560,9 @@ class CivitaiTrpcAPI():
 
         return self._pagination(endpoint, params)
 
+    def models_tag(self, tag):
+        return self.models({"tagname": tag})
+
     def post(self, post_id):
         endpoint = "post.get"
         params = {"id": int(post_id)}
@@ -580,3 +627,13 @@ class CivitaiTrpcAPI():
     def _merge_params(self, params_user, params_default):
         params_default.update(params_user)
         return params_default
+
+    def _type_params(self, params):
+        for key, type in (
+            ("tags"          , int),
+            ("modelId"       , int),
+            ("modelVersionId", int),
+        ):
+            if key in params:
+                params[key] = type(params[key])
+        return params
diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py
index 4722a4f..0524239 100644
--- a/gallery_dl/extractor/cohost.py
+++ b/gallery_dl/extractor/cohost.py
@@ -109,7 +109,7 @@ class CohostUserExtractor(CohostExtractor):
             "projectHandle": self.groups[0],
             "page": 0,
             "options": {
-                "pinnedPostsAtTop"    : bool(self.pinned),
+                "pinnedPostsAtTop"    : True if self.pinned else False,
                 "hideReplies"         : not self.replies,
                 "hideShares"          : not self.shares,
                 "hideAsks"            : not self.asks,
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 32c8e67..2146fa6 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -185,7 +185,9 @@ class Extractor():
                     self._dump_response(response)
                 if (
                     code < 400 or
-                    code < 500 and (not fatal and code != 429 or fatal is None)
+                    code < 500 and (
+                        not fatal and code != 429 or fatal is None) or
+                    fatal is ...
                 ):
                     if encoding:
                         response.encoding = encoding
@@ -454,46 +456,49 @@ class Extractor():
                     cookies = random.choice(cookies)
             self.cookies_load(cookies)
 
-    def cookies_load(self, cookies):
-        if isinstance(cookies, dict):
-            self.cookies_update_dict(cookies, self.cookies_domain)
+    def cookies_load(self, cookies_source):
+        if isinstance(cookies_source, dict):
+            self.cookies_update_dict(cookies_source, self.cookies_domain)
 
-        elif isinstance(cookies, str):
-            path = util.expand_path(cookies)
+        elif isinstance(cookies_source, str):
+            path = util.expand_path(cookies_source)
             try:
                 with open(path) as fp:
-                    util.cookiestxt_load(fp, self.cookies)
+                    cookies = util.cookiestxt_load(fp)
             except Exception as exc:
                 self.log.warning("cookies: %s", exc)
             else:
-                self.log.debug("Loading cookies from '%s'", cookies)
+                self.log.debug("Loading cookies from '%s'", cookies_source)
+                set_cookie = self.cookies.set_cookie
+                for cookie in cookies:
+                    set_cookie(cookie)
                 self.cookies_file = path
 
-        elif isinstance(cookies, (list, tuple)):
-            key = tuple(cookies)
-            cookiejar = _browser_cookies.get(key)
+        elif isinstance(cookies_source, (list, tuple)):
+            key = tuple(cookies_source)
+            cookies = _browser_cookies.get(key)
 
-            if cookiejar is None:
+            if cookies is None:
                 from ..cookies import load_cookies
-                cookiejar = self.cookies.__class__()
                 try:
-                    load_cookies(cookiejar, cookies)
+                    cookies = load_cookies(cookies_source)
                 except Exception as exc:
                     self.log.warning("cookies: %s", exc)
+                    cookies = ()
                 else:
-                    _browser_cookies[key] = cookiejar
+                    _browser_cookies[key] = cookies
             else:
                 self.log.debug("Using cached cookies from %s", key)
 
             set_cookie = self.cookies.set_cookie
-            for cookie in cookiejar:
+            for cookie in cookies:
                 set_cookie(cookie)
 
         else:
             self.log.warning(
                 "Expected 'dict', 'list', or 'str' value for 'cookies' "
                 "option, got '%s' (%s)",
-                cookies.__class__.__name__, cookies)
+                cookies_source.__class__.__name__, cookies_source)
 
     def cookies_store(self):
         """Store the session's cookies in a cookies.txt file"""
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 836fae7..693def9 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -401,7 +401,7 @@ class DeviantartExtractor(Extractor):
         html = content["html"]
         markup = html["markup"]
 
-        if not markup.startswith("{"):
+        if not markup or markup[0] != "{":
             return markup
 
         if html["type"] == "tiptap":
@@ -1301,7 +1301,7 @@ class DeviantartOAuthAPI():
 
         metadata = extractor.config("metadata", False)
         if not metadata:
-            metadata = bool(extractor.extra)
+            metadata = True if extractor.extra else False
         if metadata:
             self.metadata = True
 
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 01af7a4..3e6d537 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -260,9 +260,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             "torrentcount" : extr('>Torrent Download (', ')'),
         }
 
-        if data["uploader"].startswith("<"):
-            data["uploader"] = text.unescape(text.extr(
-                data["uploader"], ">", "<"))
+        uploader = data["uploader"]
+        if uploader and uploader[0] == "<":
+            data["uploader"] = text.unescape(text.extr(uploader, ">", "<"))
 
         f = data["favorites"][0]
         if f == "N":
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 85dd896..44c4542 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -37,7 +37,7 @@ class FoolfuukaExtractor(BaseExtractor):
 
             if not url and "remote_media_link" in media:
                 url = self.remote(media)
-            if url.startswith("/"):
+            if url and url[0] == "/":
                 url = self.root + url
 
             post["filename"], _, post["extension"] = \
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index 12e8860..72a6453 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -17,42 +17,30 @@ class LensdumpBase():
     category = "lensdump"
     root = "https://lensdump.com"
 
-    def nodes(self, page=None):
-        if page is None:
-            page = self.request(self.url).text
-
-        # go through all pages starting from the oldest
-        page_url = text.urljoin(self.root, text.extr(
-            text.extr(page, ' id="list-most-oldest-link"', '>'),
-            'href="', '"'))
-        while page_url is not None:
-            if page_url == self.url:
-                current_page = page
-            else:
-                current_page = self.request(page_url).text
-
-            for node in text.extract_iter(
-                    current_page, ' class="list-item ', '>'):
-                yield node
-
-            # find url of next page
-            page_url = text.extr(
-                text.extr(current_page, ' data-pagination="next"', '>'),
-                'href="', '"')
-            if page_url is not None and len(page_url) > 0:
-                page_url = text.urljoin(self.root, page_url)
-            else:
-                page_url = None
+    def _pagination(self, page, begin, end):
+        while True:
+            yield from text.extract_iter(page, begin, end)
+
+            next = text.extr(page, ' data-pagination="next"', '>')
+            if not next:
+                return
+
+            url = text.urljoin(self.root, text.extr(next, 'href="', '"'))
+            page = self.request(url).text
 
 
 class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
     subcategory = "album"
-    pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
+    pattern = BASE_PATTERN + r"/a/(\w+)(?:/?\?([^#]+))?"
     example = "https://lensdump.com/a/ID"
 
     def __init__(self, match):
-        GalleryExtractor.__init__(self, match, match.string)
-        self.gallery_id = match.group(1) or match.group(2)
+        self.gallery_id, query = match.groups()
+        if query:
+            url = "{}/a/{}/?{}".format(self.root, self.gallery_id, query)
+        else:
+            url = "{}/a/{}".format(self.root, self.gallery_id)
+        GalleryExtractor.__init__(self, match, url)
 
     def metadata(self, page):
         return {
@@ -62,40 +50,48 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
         }
 
     def images(self, page):
-        for node in self.nodes(page):
-            # get urls and filenames of images in current page
-            json_data = util.json_loads(text.unquote(
-                text.extr(node, "data-object='", "'") or
-                text.extr(node, 'data-object="', '"')))
-            image_id = json_data.get('name')
-            image_url = json_data.get('url')
-            image_title = json_data.get('title')
+        for image in self._pagination(page, ' class="list-item ', '>'):
+
+            data = util.json_loads(text.unquote(
+                text.extr(image, "data-object='", "'") or
+                text.extr(image, 'data-object="', '"')))
+            image_id = data.get("name")
+            image_url = data.get("url")
+            image_title = data.get("title")
             if image_title is not None:
                 image_title = text.unescape(image_title)
+
             yield (image_url, {
-                'id': image_id,
-                'url': image_url,
-                'title': image_title,
-                'name': json_data.get('filename'),
-                'filename': image_id,
-                'extension': json_data.get('extension'),
-                'height': text.parse_int(json_data.get('height')),
-                'width': text.parse_int(json_data.get('width')),
+                "id"       : image_id,
+                "url"      : image_url,
+                "title"    : image_title,
+                "name"     : data.get("filename"),
+                "filename" : image_id,
+                "extension": data.get("extension"),
+                "width"    : text.parse_int(data.get("width")),
+                "height"   : text.parse_int(data.get("height")),
             })
 
 
 class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
     """Extractor for album list from lensdump.com"""
     subcategory = "albums"
-    pattern = BASE_PATTERN + r"/\w+/albums"
-    example = "https://lensdump.com/USER/albums"
+    pattern = BASE_PATTERN + r"/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?"
+    example = "https://lensdump.com/USER"
 
     def items(self):
-        for node in self.nodes():
-            album_url = text.urljoin(self.root, text.extr(
-                node, 'data-url-short="', '"'))
-            yield Message.Queue, album_url, {
-                "_extractor": LensdumpAlbumExtractor}
+        user, query = self.groups
+        url = "{}/{}/".format(self.root, user)
+        if query:
+            params = text.parse_query(query)
+        else:
+            params = {"sort": "date_asc", "page": "1"}
+        page = self.request(url, params=params).text
+
+        data = {"_extractor": LensdumpAlbumExtractor}
+        for album_path in self._pagination(page, 'data-url-short="', '"'):
+            album_url = text.urljoin(self.root, album_path)
+            yield Message.Queue, album_url, data
 
 
 class LensdumpImageExtractor(LensdumpBase, Extractor):
@@ -107,16 +103,13 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
     pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
     example = "https://lensdump.com/i/ID"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.key = match.group(1)
-
     def items(self):
-        url = "{}/i/{}".format(self.root, self.key)
+        key = self.groups[0]
+        url = "{}/i/{}".format(self.root, key)
         extr = text.extract_from(self.request(url).text)
 
         data = {
-            "id"    : self.key,
+            "id"    : key,
             "title" : text.unescape(extr(
                 'property="og:title" content="', '"')),
             "url"   : extr(
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 6fc0689..044f4f5 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -47,7 +47,15 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
             url = file["file"]
             file.update(data)
             text.nameext_from_url(url, file)
-            file["name"], sep, file["id"] = file["filename"].rpartition("-")
+
+            if "name" in file:
+                name = file["name"]
+                file["name"] = name.rpartition(".")[0] or name
+                file["id"] = file["filename"].rpartition("-")[2]
+            else:
+                file["name"], sep, file["id"] = \
+                    file["filename"].rpartition("-")
+
             yield Message.Url, url, file
 
     def fetch_album(self, album_id):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index bca7e4d..1f24593 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -174,6 +174,20 @@ class MangadexListExtractor(MangadexExtractor):
                 yield Message.Queue, url, data
 
 
+class MangadexAuthorExtractor(MangadexExtractor):
+    """Extractor for mangadex authors"""
+    subcategory = "author"
+    pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)"
+    example = ("https://mangadex.org/author"
+               "/01234567-89ab-cdef-0123-456789abcdef/NAME")
+
+    def items(self):
+        for manga in self.api.manga_author(self.uuid):
+            manga["_extractor"] = MangadexMangaExtractor
+            url = "{}/title/{}".format(self.root, manga["id"])
+            yield Message.Queue, url, manga
+
+
 class MangadexAPI():
     """Interface for the MangaDex API v5
 
@@ -195,6 +209,10 @@ class MangadexAPI():
     def athome_server(self, uuid):
         return self._call("/at-home/server/" + uuid)
 
+    def author(self, uuid, manga=False):
+        params = {"includes[]": ("manga",)} if manga else None
+        return self._call("/author/" + uuid, params)["data"]
+
     def chapter(self, uuid):
         params = {"includes[]": ("scanlation_group",)}
         return self._call("/chapter/" + uuid, params)["data"]
@@ -210,6 +228,10 @@ class MangadexAPI():
         params = {"includes[]": ("artist", "author")}
         return self._call("/manga/" + uuid, params)["data"]
 
+    def manga_author(self, uuid_author):
+        params = {"authorOrArtist": uuid_author}
+        return self._pagination("/manga", params)
+
     def manga_feed(self, uuid):
         order = "desc" if self.extractor.config("chapter-reverse") else "asc"
         params = {
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index 0183b25..9fc8681 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -19,7 +19,7 @@ BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv"
 class MangakakalotBase():
     """Base class for mangakakalot extractors"""
     category = "mangakakalot"
-    root = "https://ww6.mangakakalot.tv"
+    root = "https://ww8.mangakakalot.tv"
 
 
 class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
@@ -40,7 +40,7 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
         match = re.match(
             r"(?:[Vv]ol\. *(\d+) )?"
             r"[Cc]hapter *([^:]*)"
-            r"(?:: *(.+))?", info)
+            r"(?:: *(.+))?", info or "")
         volume, chapter, title = match.groups() if match else ("", "", info)
         chapter, sep, minor = chapter.partition(".")
 
@@ -86,7 +86,7 @@ class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
             data["chapter"] = text.parse_int(chapter)
             data["chapter_minor"] = sep + minor
 
-            if url.startswith("/"):
+            if url[0] == "/":
                 url = self.root + url
             results.append((url, data.copy()))
         return results
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 2928573..61ffdee 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -14,6 +14,9 @@ from ..cache import cache
 import itertools
 import re
 
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com"
+USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com"
+
 
 class NewgroundsExtractor(Extractor):
     """Base class for newgrounds extractors"""
@@ -93,7 +96,7 @@ class NewgroundsExtractor(Extractor):
 
     def posts(self):
         """Return URLs of all relevant post pages"""
-        return self._pagination(self._path)
+        return self._pagination(self._path, self.groups[1])
 
     def metadata(self):
         """Return general metadata"""
@@ -334,10 +337,10 @@ class NewgroundsExtractor(Extractor):
         for fmt in formats:
             yield fmt[1][0]["src"]
 
-    def _pagination(self, kind):
+    def _pagination(self, kind, pnum=1):
         url = "{}/{}".format(self.user_root, kind)
         params = {
-            "page": 1,
+            "page": text.parse_int(pnum, 1),
             "isAjaxRequest": "1",
         }
         headers = {
@@ -400,8 +403,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
 class NewgroundsMediaExtractor(NewgroundsExtractor):
     """Extractor for a media file from newgrounds.com"""
     subcategory = "media"
-    pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
-               r"(/(?:portal/view|audio/listen)/\d+)")
+    pattern = BASE_PATTERN + r"(/(?:portal/view|audio/listen)/\d+)"
     example = "https://www.newgrounds.com/portal/view/12345"
 
     def __init__(self, match):
@@ -416,35 +418,35 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
 class NewgroundsArtExtractor(NewgroundsExtractor):
     """Extractor for all images of a newgrounds user"""
     subcategory = _path = "art"
-    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
+    pattern = USER_PATTERN + r"/art(?:(?:/page/|/?\?page=)(\d+))?/?$"
     example = "https://USER.newgrounds.com/art"
 
 
 class NewgroundsAudioExtractor(NewgroundsExtractor):
     """Extractor for all audio submissions of a newgrounds user"""
     subcategory = _path = "audio"
-    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
+    pattern = USER_PATTERN + r"/audio(?:(?:/page/|/?\?page=)(\d+))?/?$"
     example = "https://USER.newgrounds.com/audio"
 
 
 class NewgroundsMoviesExtractor(NewgroundsExtractor):
     """Extractor for all movies of a newgrounds user"""
     subcategory = _path = "movies"
-    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
+    pattern = USER_PATTERN + r"/movies(?:(?:/page/|/?\?page=)(\d+))?/?$"
     example = "https://USER.newgrounds.com/movies"
 
 
 class NewgroundsGamesExtractor(NewgroundsExtractor):
     """Extractor for a newgrounds user's games"""
     subcategory = _path = "games"
-    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/games/?$"
+    pattern = USER_PATTERN + r"/games(?:(?:/page/|/?\?page=)(\d+))?/?$"
     example = "https://USER.newgrounds.com/games"
 
 
 class NewgroundsUserExtractor(NewgroundsExtractor):
     """Extractor for a newgrounds user profile"""
     subcategory = "user"
-    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
+    pattern = USER_PATTERN + r"/?$"
     example = "https://USER.newgrounds.com"
 
     def initialize(self):
@@ -464,25 +466,22 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
     """Extractor for posts favorited by a newgrounds user"""
     subcategory = "favorite"
     directory_fmt = ("{category}", "{user}", "Favorites")
-    pattern = (r"(?:https?://)?([\w-]+)\.newgrounds\.com"
-               r"/favorites(?!/following)(?:/(art|audio|movies))?/?")
+    pattern = (USER_PATTERN + r"/favorites(?!/following)(?:/(art|audio|movies)"
+               r"(?:(?:/page/|/?\?page=)(\d+))?)?")
     example = "https://USER.newgrounds.com/favorites"
 
-    def __init__(self, match):
-        NewgroundsExtractor.__init__(self, match)
-        self.kind = match.group(2)
-
     def posts(self):
-        if self.kind:
-            return self._pagination(self.kind)
+        _, kind, pnum = self.groups
+        if kind:
+            return self._pagination_favorites(kind, pnum)
         return itertools.chain.from_iterable(
-            self._pagination(k) for k in ("art", "audio", "movies")
+            self._pagination_favorites(k) for k in ("art", "audio", "movies")
         )
 
-    def _pagination(self, kind):
+    def _pagination_favorites(self, kind, pnum=1):
         url = "{}/favorites/{}".format(self.user_root, kind)
         params = {
-            "page": 1,
+            "page": text.parse_int(pnum, 1),
             "isAjaxRequest": "1",
         }
         headers = {
@@ -514,12 +513,13 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
 class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
     """Extractor for a newgrounds user's favorited users"""
     subcategory = "following"
-    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/favorites/(following)"
+    pattern = USER_PATTERN + r"/favorites/(following)"
     example = "https://USER.newgrounds.com/favorites/following"
 
     def items(self):
+        _, kind, pnum = self.groups
         data = {"_extractor": NewgroundsUserExtractor}
-        for url in self._pagination(self.kind):
+        for url in self._pagination_favorites(kind, pnum):
             yield Message.Queue, url, data
 
     @staticmethod
@@ -534,13 +534,12 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
     """Extractor for newgrounds.com search reesults"""
     subcategory = "search"
     directory_fmt = ("{category}", "search", "{search_tags}")
-    pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
-               r"/search/conduct/([^/?#]+)/?\?([^#]+)")
+    pattern = BASE_PATTERN + r"/search/conduct/([^/?#]+)/?\?([^#]+)"
     example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
 
     def __init__(self, match):
         NewgroundsExtractor.__init__(self, match)
-        self._path, query = match.groups()
+        self._path, query = self.groups
         self.query = text.parse_query(query)
 
     def posts(self):
@@ -550,19 +549,20 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
                     for s in suitabilities.split(",")}
             self.request(self.root + "/suitabilities",
                          method="POST", data=data)
-        return self._pagination("/search/conduct/" + self._path, self.query)
+        return self._pagination_search(
+            "/search/conduct/" + self._path, self.query)
 
     def metadata(self):
         return {"search_tags": self.query.get("terms", "")}
 
-    def _pagination(self, path, params):
+    def _pagination_search(self, path, params):
         url = self.root + path
+        params["inner"] = "1"
+        params["page"] = text.parse_int(params.get("page"), 1)
         headers = {
             "Accept": "application/json, text/javascript, */*; q=0.01",
             "X-Requested-With": "XMLHttpRequest",
         }
-        params["inner"] = "1"
-        params["page"] = 1
 
         while True:
             data = self.request(url, params=params, headers=headers).json()
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 8c7ffe5..851f663 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -63,7 +63,8 @@ class NozomiExtractor(Extractor):
             yield Message.Directory, post
             for post["num"], image in enumerate(images, 1):
                 post["filename"] = post["dataid"] = did = image["dataid"]
-                post["is_video"] = video = bool(image.get("is_video"))
+                post["is_video"] = video = \
+                    True if image.get("is_video") else False
 
                 ext = image["type"]
                 if video:
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index d47ffa2..0b64ea3 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -56,6 +56,7 @@ class PatreonExtractor(Extractor):
                     text.nameext_from_url(name, post)
                     if text.ext_from_url(url) == "m3u8":
                         url = "ytdl:" + url
+                        post["_ytdl_manifest"] = "hls"
                         post["extension"] = "mp4"
                     yield Message.Url, url, post
                 else:
@@ -310,7 +311,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
     subcategory = "creator"
     pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
                r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
-               r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
+               r"(?:c/)?([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
     example = "https://www.patreon.com/USER"
 
     def posts(self):
@@ -340,9 +341,9 @@ class PatreonCreatorExtractor(PatreonExtractor):
 
         user_id = query.get("u")
         if user_id:
-            url = "{}/user/posts?u={}".format(self.root, user_id)
+            url = "{}/user?u={}".format(self.root, user_id)
         else:
-            url = "{}/{}/posts".format(self.root, creator)
+            url = "{}/{}".format(self.root, creator)
         page = self.request(url, notfound="creator").text
 
         try:
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 8c04ed5..499c579 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -18,8 +18,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
 class PinterestExtractor(Extractor):
     """Base class for pinterest extractors"""
     category = "pinterest"
-    filename_fmt = "{category}_{id}{media_id:?_//}.{extension}"
-    archive_fmt = "{id}{media_id}"
+    filename_fmt = "{category}_{id}{media_id|page_id:?_//}.{extension}"
+    archive_fmt = "{id}{media_id|page_id}"
     root = "https://www.pinterest.com"
 
     def _init(self):
@@ -30,12 +30,12 @@ class PinterestExtractor(Extractor):
             self.root = text.ensure_http_scheme(domain)
 
         self.api = PinterestAPI(self)
+        self.stories = self.config("stories", True)
+        self.videos = self.config("videos", True)
 
     def items(self):
         data = self.metadata()
-        videos = self.config("videos", True)
 
-        yield Message.Directory, data
         for pin in self.pins():
 
             if isinstance(pin, tuple):
@@ -43,40 +43,35 @@ class PinterestExtractor(Extractor):
                 yield Message.Queue, url, data
                 continue
 
+            try:
+                files = self._extract_files(pin)
+            except Exception as exc:
+                self.log.debug("", exc_info=exc)
+                self.log.warning(
+                    "%s: Error when extracting download URLs (%s: %s)",
+                    pin.get("id"), exc.__class__.__name__, exc)
+                continue
+
             pin.update(data)
+            pin["count"] = len(files)
 
-            carousel_data = pin.get("carousel_data")
-            if carousel_data:
-                pin["count"] = len(carousel_data["carousel_slots"])
-                for num, slot in enumerate(carousel_data["carousel_slots"], 1):
-                    slot["media_id"] = slot.pop("id")
-                    pin.update(slot)
-                    pin["num"] = num
-                    size, image = next(iter(slot["images"].items()))
-                    url = image["url"].replace("/" + size + "/", "/originals/")
-                    yield Message.Url, url, text.nameext_from_url(url, pin)
-
-            else:
-                try:
-                    media = self._media_from_pin(pin)
-                except Exception:
-                    self.log.debug("Unable to fetch download URL for pin %s",
-                                   pin.get("id"))
-                    continue
+            yield Message.Directory, pin
+            for pin["num"], file in enumerate(files, 1):
+                url = file["url"]
+                text.nameext_from_url(url, pin)
+                pin.update(file)
 
-                if videos or media.get("duration") is None:
-                    pin.update(media)
-                    pin["num"] = pin["count"] = 1
+                if "media_id" not in file:
                     pin["media_id"] = ""
+                if "page_id" not in file:
+                    pin["page_id"] = ""
 
-                    url = media["url"]
-                    text.nameext_from_url(url, pin)
+                if pin["extension"] == "m3u8":
+                    url = "ytdl:" + url
+                    pin["_ytdl_manifest"] = "hls"
+                    pin["extension"] = "mp4"
 
-                    if pin["extension"] == "m3u8":
-                        url = "ytdl:" + url
-                        pin["extension"] = "mp4"
-
-                    yield Message.Url, url, pin
+                yield Message.Url, url, pin
 
     def metadata(self):
         """Return general metadata"""
@@ -84,26 +79,108 @@ class PinterestExtractor(Extractor):
     def pins(self):
         """Return all relevant pin objects"""
 
-    @staticmethod
-    def _media_from_pin(pin):
+    def _extract_files(self, pin):
+        story_pin_data = pin.get("story_pin_data")
+        if story_pin_data and self.stories:
+            return self._extract_story(pin, story_pin_data)
+
+        carousel_data = pin.get("carousel_data")
+        if carousel_data:
+            return self._extract_carousel(pin, carousel_data)
+
         videos = pin.get("videos")
-        if videos:
-            video_formats = videos["video_list"]
+        if videos and self.videos:
+            return (self._extract_video(videos),)
 
-            for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
-                if fmt in video_formats:
-                    media = video_formats[fmt]
-                    break
-            else:
-                media = max(video_formats.values(),
-                            key=lambda x: x.get("width", 0))
+        try:
+            return (pin["images"]["orig"],)
+        except Exception:
+            self.log.debug("%s: No files found", pin.get("id"))
+            return ()
+
+    def _extract_story(self, pin, story):
+        files = []
+        story_id = story.get("id")
+
+        for page in story["pages"]:
+            page_id = page.get("id")
+
+            for block in page["blocks"]:
+                type = block.get("type")
+
+                if type == "story_pin_image_block":
+                    if 1 == len(page["blocks"]) == len(story["pages"]):
+                        try:
+                            media = pin["images"]["orig"]
+                        except Exception:
+                            media = self._extract_image(page, block)
+                    else:
+                        media = self._extract_image(page, block)
+
+                elif type == "story_pin_video_block":
+                    video = block["video"]
+                    media = self._extract_video(video)
+                    media["media_id"] = video.get("id") or ""
+
+                elif type == "story_pin_paragraph_block":
+                    media = {"url": "text:" + block["text"],
+                             "extension": "txt",
+                             "media_id": block.get("id")}
+
+                else:
+                    self.log.warning("%s: Unsupported story block '%s'",
+                                     pin.get("id"), type)
+                    continue
 
-            if "V_720P" in video_formats:
-                media["_fallback"] = (video_formats["V_720P"]["url"],)
+                media["story_id"] = story_id
+                media["page_id"] = page_id
+                files.append(media)
+
+        return files
+
+    def _extract_carousel(self, pin, carousel_data):
+        files = []
+        for slot in carousel_data["carousel_slots"]:
+            size, image = next(iter(slot["images"].items()))
+            slot["media_id"] = slot.pop("id")
+            slot["url"] = image["url"].replace(
+                "/" + size + "/", "/originals/", 1)
+            files.append(slot)
+        return files
+
+    def _extract_image(self, page, block):
+        sig = block.get("image_signature") or page["image_signature"]
+        url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format(
+            sig[0:2], sig[2:4], sig[4:6], sig)
+        url_jpg = url_base + "jpg"
+        url_png = url_base + "png"
+        url_webp = url_base + "webp"
 
-            return media
+        try:
+            media = block["image"]["images"]["originals"]
+        except Exception:
+            media = {"url": url_jpg, "_fallback": (url_png, url_webp,)}
 
-        return pin["images"]["orig"]
+        if media["url"] == url_jpg:
+            media["_fallback"] = (url_png, url_webp,)
+        else:
+            media["_fallback"] = (url_jpg, url_png, url_webp,)
+        media["media_id"] = sig
+
+        return media
+
+    def _extract_video(self, video):
+        video_formats = video["video_list"]
+        for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
+            if fmt in video_formats:
+                media = video_formats[fmt]
+                break
+        else:
+            media = max(video_formats.values(),
+                        key=lambda x: x.get("width", 0))
+        if "V_720P" in video_formats:
+            media["_fallback"] = (video_formats["V_720P"]["url"],)
+        return media
 
 
 class PinterestPinExtractor(PinterestExtractor):
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index c2d1243..8c6e6d8 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -38,6 +38,7 @@ class PixivExtractor(Extractor):
         self.meta_user = self.config("metadata")
         self.meta_bookmark = self.config("metadata-bookmark")
         self.meta_comments = self.config("comments")
+        self.meta_captions = self.config("captions")
 
     def items(self):
         tags = self.config("tags", "japanese")
@@ -76,8 +77,8 @@ class PixivExtractor(Extractor):
                 detail = self.api.illust_bookmark_detail(work["id"])
                 work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
                                          if tag["is_registered"]]
-            if self.sanity_workaround and not work.get("caption") and \
-                    not work.get("_mypixiv"):
+            if self.meta_captions and not work.get("caption") and \
+                    not work.get("_mypixiv") and not work.get("_ajax"):
                 body = self._request_ajax("/illust/" + str(work["id"]))
                 if body:
                     work["caption"] = text.unescape(body["illustComment"])
@@ -108,10 +109,10 @@ class PixivExtractor(Extractor):
             if self.load_ugoira:
                 try:
                     return self._extract_ugoira(work)
-                except exception.StopExtraction as exc:
+                except Exception as exc:
                     self.log.warning(
-                        "Unable to retrieve Ugoira metatdata (%s - %s)",
-                        work["id"], exc.message)
+                        "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
+                        work["id"], exc.__class__.__name__, exc)
 
         elif work["page_count"] == 1:
             url = meta_single_page["original_image_url"]
@@ -186,6 +187,7 @@ class PixivExtractor(Extractor):
             return None
 
     def _extract_ajax(self, work, body):
+        work["_ajax"] = True
         url = self._extract_ajax_url(body)
         if not url:
             return ()
@@ -243,12 +245,12 @@ class PixivExtractor(Extractor):
             original = body["urls"]["original"]
             if original:
                 return original
-        except KeyError:
+        except Exception:
             pass
 
         try:
             square1200 = body["userIllusts"][body["id"]]["url"]
-        except KeyError:
+        except Exception:
             return
         parts = square1200.rpartition("_p0")[0].split("/")
         del parts[3:5]
@@ -293,9 +295,6 @@ class PixivExtractor(Extractor):
             "x_restrict"      : 0,
         }
 
-    def _web_to_mobile(self, work):
-        return work
-
     def works(self):
         """Return an iterable containing all relevant 'work' objects"""
 
@@ -334,15 +333,17 @@ class PixivUserExtractor(PixivExtractor):
 class PixivArtworksExtractor(PixivExtractor):
     """Extractor for artworks of a pixiv user"""
     subcategory = "artworks"
+    _warning = True
     pattern = (BASE_PATTERN + r"/(?:"
                r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
                r"(?:/([^/?#]+))?/?(?:$|[?#])"
                r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
     example = "https://www.pixiv.net/en/users/12345/artworks"
 
-    def __init__(self, match):
-        PixivExtractor.__init__(self, match)
-        u1, t1, u2, t2 = match.groups()
+    def _init(self):
+        PixivExtractor._init(self)
+
+        u1, t1, u2, t2 = self.groups
         if t1:
             t1 = text.unquote(t1)
         elif t2:
@@ -350,6 +351,14 @@ class PixivArtworksExtractor(PixivExtractor):
         self.user_id = u1 or u2
         self.tag = t1 or t2
 
+        if self.sanity_workaround:
+            self.cookies_domain = d = ".pixiv.net"
+            self._init_cookies()
+            if self._warning and not self.cookies.get("PHPSESSID", domain=d):
+                PixivArtworksExtractor._warning = False
+                self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
+                                 "non R-18 'sanity_level' works.")
+
     def metadata(self):
         if self.config("metadata"):
             self.api.user_detail(self.user_id)
@@ -358,6 +367,19 @@ class PixivArtworksExtractor(PixivExtractor):
     def works(self):
         works = self.api.user_illusts(self.user_id)
 
+        if self.sanity_workaround:
+            body = self._request_ajax(
+                "/user/{}/profile/all".format(self.user_id))
+            try:
+                ajax_ids = list(map(int, body["illusts"]))
+                ajax_ids.extend(map(int, body["manga"]))
+                ajax_ids.sort()
+            except Exception as exc:
+                self.log.warning("Unable to collect artwork IDs using AJAX "
+                                 "API (%s: %s)", exc.__class__.__name__, exc)
+            else:
+                works = self._extend_sanity(works, ajax_ids)
+
         if self.tag:
             tag = self.tag.lower()
             works = (
@@ -367,6 +389,35 @@ class PixivArtworksExtractor(PixivExtractor):
 
         return works
 
+    def _extend_sanity(self, works, ajax_ids):
+        user = {"id": 1}
+        index = len(ajax_ids) - 1
+
+        for work in works:
+            while index >= 0:
+                work_id = work["id"]
+                ajax_id = ajax_ids[index]
+
+                if ajax_id == work_id:
+                    index -= 1
+                    break
+
+                elif ajax_id > work_id:
+                    index -= 1
+                    self.log.debug("Inserting work %s", ajax_id)
+                    yield self._make_work(ajax_id, self.sanity_url, user)
+
+                else:  # ajax_id < work_id
+                    break
+
+            yield work
+
+        while index >= 0:
+            ajax_id = ajax_ids[index]
+            self.log.debug("Inserting work %s", ajax_id)
+            yield self._make_work(ajax_id, self.sanity_url, user)
+            index -= 1
+
 
 class PixivAvatarExtractor(PixivExtractor):
     """Extractor for pixiv avatars"""
diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py
index 29b351b..8877175 100644
--- a/gallery_dl/extractor/postmill.py
+++ b/gallery_dl/extractor/postmill.py
@@ -50,7 +50,7 @@ class PostmillExtractor(BaseExtractor):
             forum = match.group(1)
             id = int(match.group(2))
 
-            is_text_post = url.startswith("/")
+            is_text_post = (url[0] == "/")
             is_image_post = self._search_image_tag(page) is not None
             data = {
                 "title": title,
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index ce602f6..8577e74 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -31,6 +31,7 @@ class RedditExtractor(Extractor):
         parentdir = self.config("parent-directory")
         max_depth = self.config("recursion", 0)
         previews = self.config("previews", True)
+        embeds = self.config("embeds", True)
 
         videos = self.config("videos", True)
         if videos:
@@ -100,7 +101,7 @@ class RedditExtractor(Extractor):
                     for comment in comments:
                         html = comment["body_html"] or ""
                         href = (' href="' in html)
-                        media = ("media_metadata" in comment)
+                        media = (embeds and "media_metadata" in comment)
 
                         if media or href:
                             comment["date"] = text.parse_timestamp(
@@ -211,8 +212,9 @@ class RedditExtractor(Extractor):
     def _extract_video_dash(self, submission):
         submission["_ytdl_extra"] = {"title": submission["title"]}
         try:
-            return (submission["secure_media"]["reddit_video"]["dash_url"] +
-                    "#__youtubedl_smuggle=%7B%22to_generic%22%3A+1%7D")
+            url = submission["secure_media"]["reddit_video"]["dash_url"]
+            submission["_ytdl_manifest"] = "dash"
+            return url
         except Exception:
             return submission["url"]
 
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
new file mode 100644
index 0000000..9f9f0c4
--- /dev/null
+++ b/gallery_dl/extractor/scrolller.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://scrolller.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
+
+
+class ScrolllerExtractor(Extractor):
+    """Base class for scrolller extractors"""
+    category = "scrolller"
+    root = "https://scrolller.com"
+    directory_fmt = ("{category}", "{subredditTitle}")
+    filename_fmt = "{id}{title:? //}.{extension}"
+    archive_fmt = "{id}"
+    request_interval = (0.5, 1.5)
+
+    def _init(self):
+        self.auth_token = None
+
+    def items(self):
+        self.login()
+
+        for post in self.posts():
+
+            src = max(post["mediaSources"], key=self._sort_key)
+            post.update(src)
+            url = src["url"]
+            text.nameext_from_url(url, post)
+
+            yield Message.Directory, post
+            yield Message.Url, url, post
+
+    def posts(self):
+        return ()
+
+    def login(self):
+        username, password = self._get_auth_info()
+        if username:
+            self.auth_token = self._login_impl(username, password)
+
+    @cache(maxage=28*86400, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        variables = {
+            "username": username,
+            "password": password,
+        }
+
+        try:
+            data = self._request_graphql("LoginQuery", variables)
+        except exception.HttpError as exc:
+            if exc.status == 403:
+                raise exception.AuthenticationError()
+            raise
+
+        return data["login"]["token"]
+
+    def _request_graphql(self, opname, variables):
+        url = "https://api.scrolller.com/api/v2/graphql"
+        headers = {
+            "Content-Type"  : "text/plain;charset=UTF-8",
+            "Origin"        : self.root,
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-site",
+        }
+        data = {
+            "query"        : QUERIES[opname],
+            "variables"    : variables,
+            "authorization": self.auth_token,
+        }
+        return self.request(
+            url, method="POST", headers=headers, data=util.json_dumps(data),
+        ).json()["data"]
+
+    def _pagination(self, opname, variables):
+        while True:
+            data = self._request_graphql(opname, variables)
+
+            while "items" not in data:
+                data = data.popitem()[1]
+            yield from data["items"]
+
+            if not data["iterator"]:
+                return
+            variables["iterator"] = data["iterator"]
+
+    def _sort_key(self, src):
+        return src["width"], not src["isOptimized"]
+
+
+class ScrolllerSubredditExtractor(ScrolllerExtractor):
+    """Extractor for media from a scrolller subreddit"""
+    subcategory = "subreddit"
+    pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
+    example = "https://scrolller.com/r/SUBREDDIT"
+
+    def posts(self):
+        url, query = self.groups
+        filter = None
+
+        if query:
+            params = text.parse_query(query)
+            if "filter" in params:
+                filter = params["filter"].upper().rstrip("S")
+
+        variables = {
+            "url"      : url,
+            "iterator" : None,
+            "filter"   : filter,
+            "hostsDown": None,
+        }
+        return self._pagination("SubredditQuery", variables)
+
+
+class ScrolllerFollowingExtractor(ScrolllerExtractor):
+    """Extractor for followed scrolller subreddits"""
+    subcategory = "following"
+    pattern = BASE_PATTERN + r"/following"
+    example = "https://scrolller.com/following"
+
+    def items(self):
+        self.login()
+
+        if not self.auth_token:
+            raise exception.AuthorizationError("Login required")
+
+        variables = {
+            "iterator" : None,
+            "hostsDown": None,
+        }
+
+        for subreddit in self._pagination("FollowingQuery", variables):
+            url = self.root + subreddit["url"]
+            subreddit["_extractor"] = ScrolllerSubredditExtractor
+            yield Message.Queue, url, subreddit
+
+
+class ScrolllerPostExtractor(ScrolllerExtractor):
+    """Extractor for media from a single scrolller post"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
+    example = "https://scrolller.com/title-slug-a1b2c3d4f5"
+
+    def posts(self):
+        url = "{}/{}".format(self.root, self.groups[0])
+        page = self.request(url).text
+        data = util.json_loads(text.extr(
+            page, '<script>window.scrolllerConfig="', '"</script>')
+            .replace('\\"', '"'))
+        return (data["item"],)
+
+
+QUERIES = {
+
+    "SubredditQuery": """\
+query SubredditQuery(
+    $url: String!
+    $filter: SubredditPostFilter
+    $iterator: String
+) {
+    getSubreddit(
+        url: $url
+    ) {
+        children(
+            limit: 50
+            iterator: $iterator
+            filter: $filter
+            disabledHosts: null
+        ) {
+            iterator items {
+                __typename id url title subredditId subredditTitle
+                subredditUrl redditPath isNsfw albumUrl hasAudio
+                fullLengthSource gfycatSource redgifsSource ownerAvatar
+                username displayName isPaid tags isFavorite
+                mediaSources { url width height isOptimized }
+                blurredMediaSources { url width height isOptimized }
+            }
+        }
+    }
+}
+""",
+
+    "FollowingQuery": """\
+query FollowingQuery(
+    $iterator: String
+) {
+    getFollowing(
+        limit: 10
+        iterator: $iterator
+    ) {
+        iterator items {
+            __typename id url title secondaryTitle description createdAt isNsfw
+            subscribers isComplete itemCount videoCount pictureCount albumCount
+            isPaid username tags isFollowing
+            banner { url width height isOptimized }
+        }
+    }
+}
+""",
+
+    "LoginQuery": """\
+query LoginQuery(
+    $username: String!,
+    $password: String!
+) {
+    login(
+        username: $username,
+        password: $password
+    ) {
+        username token expiresAt isAdmin status isPremium
+    }
+}
+""",
+
+}
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index dd5988f..468840b 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -49,7 +49,7 @@ class TelegraphGalleryExtractor(GalleryExtractor):
             url, pos = text.extract(figure, 'src="', '"')
             if url.startswith("/embed/"):
                 continue
-            elif url.startswith("/"):
+            elif url[0] == "/":
                 url = self.root + url
             caption, pos = text.extract(figure, "<figcaption>", "<", pos)
             num += 1
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index bce661a..b196aeb 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -148,8 +148,10 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor):
             data["PageNumber"] += 1
 
     def _parse(self, query):
+        if not query:
+            return {}
         try:
-            if query.startswith("?"):
+            if query[0] == "?":
                 return self._parse_simple(query)
             return self._parse_jsurl(query)
         except Exception as exc:
@@ -187,8 +189,6 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor):
         Example: ~(name~'John*20Doe~age~42~children~(~'Mary~'Bill))
         Ref: https://github.com/Sage/jsurl
         """
-        if not data:
-            return {}
         i = 0
         imax = len(data)
 
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
index b21709a..f7ce44b 100644
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -7,7 +7,7 @@
 """Extractors for https://urlgalleries.net/"""
 
 from .common import GalleryExtractor, Message
-from .. import text
+from .. import text, exception
 
 
 class UrlgalleriesGalleryExtractor(GalleryExtractor):
@@ -16,27 +16,31 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
     root = "urlgalleries.net"
     request_interval = (0.5, 1.0)
     pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
-    example = "https://blog.urlgalleries.net/gallery-12345/TITLE"
+    example = "https://BLOG.urlgalleries.net/gallery-12345/TITLE"
 
-    def __init__(self, match):
-        self.blog, self.gallery_id = match.groups()
+    def items(self):
+        blog, self.gallery_id = self.groups
         url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format(
-            self.blog, self.gallery_id)
-        GalleryExtractor.__init__(self, match, url)
+            blog, self.gallery_id)
+
+        with self.request(url, allow_redirects=False, fatal=...) as response:
+            if 300 <= response.status_code < 500:
+                if response.headers.get("location", "").endswith(
+                        "/not_found_adult.php"):
+                    raise exception.NotFoundError("gallery")
+                raise exception.HttpError(None, response)
+            page = response.text
 
-    def items(self):
-        page = self.request(self.gallery_url).text
         imgs = self.images(page)
         data = self.metadata(page)
         data["count"] = len(imgs)
-        del page
 
-        root = "https://{}.urlgalleries.net".format(self.blog)
+        root = "https://{}.urlgalleries.net".format(blog)
         yield Message.Directory, data
         for data["num"], img in enumerate(imgs, 1):
-            response = self.request(
-                root + img, method="HEAD", allow_redirects=False)
-            yield Message.Queue, response.headers["Location"], data
+            page = self.request(root + img).text
+            url = text.extr(page, "window.location.href = '", "'")
+            yield Message.Queue, url, data
 
     def metadata(self, page):
         extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 95eeafe..ea034a7 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -24,6 +24,13 @@ class VkExtractor(Extractor):
     root = "https://vk.com"
     request_interval = (0.5, 1.5)
 
+    def _init(self):
+        self.offset = text.parse_int(self.config("offset"))
+
+    def skip(self, num):
+        self.offset += num
+        return num
+
     def items(self):
         sub = re.compile(r"/imp[fg]/").sub
         sizes = "wzyxrqpo"
@@ -75,7 +82,7 @@ class VkExtractor(Extractor):
             "al"       : "1",
             "direction": "1",
             "list"     : photos_id,
-            "offset"   : 0,
+            "offset"   : self.offset,
         }
 
         while True:
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 116f557..4eae537 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -193,7 +193,10 @@ class WikimediaArticleExtractor(WikimediaExtractor):
     def __init__(self, match):
         WikimediaExtractor.__init__(self, match)
 
-        path = match.group(match.lastindex)
+        path = self.groups[-1]
+        if path[2] == "/":
+            self.root = self.root + "/" + path[:2]
+            path = path[3:]
         if path.startswith("wiki/"):
             path = path[5:]
author	Unit 193 <unit193@unit193.net>	2024-10-25 17:27:30 -0400
committer	Unit 193 <unit193@unit193.net>	2024-10-25 17:27:30 -0400
commit	fc004701f923bb954a22c7fec2ae8d607e78cb2b (patch)
tree	a5bea4ed6447ea43c099131430e3bd6182ee87d7 /gallery_dl/extractor
parent	0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff)