New upstream version 1.26.9.upstream/1.26.9

author: Unit 193 <unit193@unit193.net> 2024-03-25 02:57:44 -0400
committer: Unit 193 <unit193@unit193.net> 2024-03-25 02:57:44 -0400
commit: 6e662211019a89caec44de8a57c675872b0b5498 (patch)
tree: 5d9d5a2b7efc3a24dd6074e99b253b639fe5af1d /gallery_dl/extractor
parent: 01166fa52707cc282467427cf0e65c1b8983c4be (diff)
37 files changed, 745 insertions, 317 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index a665249..591e6a8 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -194,7 +194,6 @@ modules = [
     "directlink",
     "recursive",
     "oauth",
-    "test",
     "ytdl",
     "generic",
 ]
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index b58b3d3..49fde7b 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -29,11 +29,13 @@ class ArtstationExtractor(Extractor):
         self.user = match.group(1) or match.group(2)
 
     def items(self):
-        data = self.metadata()
-
-        projects = self.projects()
+        videos = self.config("videos", True)
+        previews = self.config("previews", False)
         external = self.config("external", False)
         max_posts = self.config("max-posts")
+
+        data = self.metadata()
+        projects = self.projects()
         if max_posts:
             projects = itertools.islice(projects, max_posts)
 
@@ -45,13 +47,29 @@ class ArtstationExtractor(Extractor):
                 asset["num"] = num
                 yield Message.Directory, asset
 
-                if adict["has_embedded_player"] and external:
+                if adict["has_embedded_player"]:
                     player = adict["player_embedded"]
                     url = (text.extr(player, 'src="', '"') or
                            text.extr(player, "src='", "'"))
-                    if url and not url.startswith(self.root):
-                        asset["extension"] = None
-                        yield Message.Url, "ytdl:" + url, asset
+                    if url.startswith(self.root):
+                        # video clip hosted on artstation
+                        if videos:
+                            page = self.request(url).text
+                            url = text.extr(page, ' src="', '"')
+                            text.nameext_from_url(url, asset)
+                            yield Message.Url, url, asset
+                    elif url:
+                        # external URL
+                        if external:
+                            asset["extension"] = "mp4"
+                            yield Message.Url, "ytdl:" + url, asset
+                    else:
+                        self.log.debug(player)
+                        self.log.warning(
+                            "Failed to extract embedded player URL (%s)",
+                            adict.get("id"))
+
+                    if not previews:
                         continue
 
                 if adict["has_image"]:
@@ -59,10 +77,11 @@ class ArtstationExtractor(Extractor):
                     text.nameext_from_url(url, asset)
 
                     url = self._no_cache(url)
-                    lhs, _, rhs = url.partition("/large/")
-                    if rhs:
-                        url = lhs + "/4k/" + rhs
-                        asset["_fallback"] = self._image_fallback(lhs, rhs)
+                    if "/video_clips/" not in url:
+                        lhs, _, rhs = url.partition("/large/")
+                        if rhs:
+                            url = lhs + "/4k/" + rhs
+                            asset["_fallback"] = self._image_fallback(lhs, rhs)
 
                     yield Message.Url, url, asset
 
@@ -175,7 +194,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
     subcategory = "user"
     pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
                r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?"
-               r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
+               r"|((?!www)[\w-]+)\.artstation\.com(?:/projects)?)/?$")
     example = "https://www.artstation.com/USER"
 
     def projects(self):
@@ -192,7 +211,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
     archive_fmt = "a_{album[id]}_{asset[id]}"
     pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
                r"/(?!artwork|projects|search)([^/?#]+)"
-               r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
+               r"|((?!www)[\w-]+)\.artstation\.com)/albums/(\d+)")
     example = "https://www.artstation.com/USER/albums/12345"
 
     def __init__(self, match):
@@ -226,7 +245,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
     directory_fmt = ("{category}", "{userinfo[username]}", "Likes")
     archive_fmt = "f_{userinfo[id]}_{asset[id]}"
     pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
-               r"/(?!artwork|projects|search)([^/?#]+)/likes/?")
+               r"/(?!artwork|projects|search)([^/?#]+)/likes")
     example = "https://www.artstation.com/USER/likes"
 
     def projects(self):
@@ -234,6 +253,54 @@ class ArtstationLikesExtractor(ArtstationExtractor):
         return self._pagination(url)
 
 
+class ArtstationCollectionExtractor(ArtstationExtractor):
+    """Extractor for an artstation collection"""
+    subcategory = "collection"
+    directory_fmt = ("{category}", "{user}",
+                     "{collection[id]} {collection[name]}")
+    archive_fmt = "c_{collection[id]}_{asset[id]}"
+    pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
+               r"/(?!artwork|projects|search)([^/?#]+)/collections/(\d+)")
+    example = "https://www.artstation.com/USER/collections/12345"
+
+    def __init__(self, match):
+        ArtstationExtractor.__init__(self, match)
+        self.collection_id = match.group(2)
+
+    def metadata(self):
+        url = "{}/collections/{}.json".format(
+            self.root, self.collection_id)
+        params = {"username": self.user}
+        collection = self.request(
+            url, params=params, notfound="collection").json()
+        return {"collection": collection, "user": self.user}
+
+    def projects(self):
+        url = "{}/collections/{}/projects.json".format(
+            self.root, self.collection_id)
+        params = {"collection_id": self.collection_id}
+        return self._pagination(url, params)
+
+
+class ArtstationCollectionsExtractor(ArtstationExtractor):
+    """Extractor for an artstation user's collections"""
+    subcategory = "collections"
+    pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
+               r"/(?!artwork|projects|search)([^/?#]+)/collections/?$")
+    example = "https://www.artstation.com/USER/collections"
+
+    def items(self):
+        url = self.root + "/collections.json"
+        params = {"username": self.user}
+
+        for collection in self.request(
+                url, params=params, notfound="collections").json():
+            url = "{}/{}/collections/{}".format(
+                self.root, self.user, collection["id"])
+            collection["_extractor"] = ArtstationCollectionExtractor
+            yield Message.Queue, url, collection
+
+
 class ArtstationChallengeExtractor(ArtstationExtractor):
     """Extractor for submissions of artstation challenges"""
     subcategory = "challenge"
@@ -355,7 +422,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
     """Extractor for images from a single artstation project"""
     subcategory = "image"
     pattern = (r"(?:https?://)?(?:"
-               r"(?:\w+\.)?artstation\.com/(?:artwork|projects|search)"
+               r"(?:[\w-]+\.)?artstation\.com/(?:artwork|projects|search)"
                r"|artstn\.co/p)/(\w+)")
     example = "https://www.artstation.com/artwork/abcde"
 
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index 8de0d7b..84c3187 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -39,12 +39,19 @@ class BlueskyExtractor(Extractor):
         self._metadata_facets = ("facets" in meta)
 
         self.api = BlueskyAPI(self)
-        self._user = None
+        self._user = self._user_did = None
+        self.instance = self.root.partition("://")[2]
 
     def items(self):
         for post in self.posts():
             if "post" in post:
                 post = post["post"]
+
+            pid = post["uri"].rpartition("/")[2]
+            if self._user_did and post["author"]["did"] != self._user_did:
+                self.log.debug("Skipping %s (repost)", pid)
+                continue
+
             post.update(post["record"])
             del post["record"]
 
@@ -75,7 +82,8 @@ class BlueskyExtractor(Extractor):
             if self._metadata_user:
                 post["user"] = self._user or post["author"]
 
-            post["post_id"] = post["uri"].rpartition("/")[2]
+            post["instance"] = self.instance
+            post["post_id"] = pid
             post["count"] = len(images)
             post["date"] = text.parse_datetime(
                 post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
@@ -101,10 +109,14 @@ class BlueskyExtractor(Extractor):
                     post["width"] = post["height"] = 0
 
                 image = file["image"]
-                post["filename"] = link = image["ref"]["$link"]
+                try:
+                    cid = image["ref"]["$link"]
+                except KeyError:
+                    cid = image["cid"]
+                post["filename"] = cid
                 post["extension"] = image["mimeType"].rpartition("/")[2]
 
-                yield Message.Url, base + link, post
+                yield Message.Url, base + cid, post
 
     def posts(self):
         return ()
@@ -230,6 +242,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor):
     def items(self):
         for user in self.api.get_follows(self.user):
             url = "https://bsky.app/profile/" + user["did"]
+            user["_extractor"] = BlueskyUserExtractor
             yield Message.Queue, url, user
 
 
@@ -314,7 +327,7 @@ class BlueskyAPI():
         endpoint = "app.bsky.feed.getFeed"
         params = {
             "feed" : "at://{}/app.bsky.feed.generator/{}".format(
-                self._did_from_actor(actor), feed),
+                self._did_from_actor(actor, False), feed),
             "limit": "100",
         }
         return self._pagination(endpoint, params)
@@ -331,7 +344,7 @@ class BlueskyAPI():
         endpoint = "app.bsky.feed.getListFeed"
         params = {
             "list" : "at://{}/app.bsky.graph.list/{}".format(
-                self._did_from_actor(actor), list),
+                self._did_from_actor(actor, False), list),
             "limit": "100",
         }
         return self._pagination(endpoint, params)
@@ -378,14 +391,17 @@ class BlueskyAPI():
         }
         return self._pagination(endpoint, params, "posts")
 
-    def _did_from_actor(self, actor):
+    def _did_from_actor(self, actor, user_did=True):
         if actor.startswith("did:"):
             did = actor
         else:
             did = self.resolve_handle(actor)
 
-        if self.extractor._metadata_user:
-            self.extractor._user = self.get_profile(did)
+        extr = self.extractor
+        if user_did and not extr.config("reposts", False):
+            extr._user_did = did
+        if extr._metadata_user:
+            extr._user = self.get_profile(did)
 
         return did
 
@@ -434,13 +450,20 @@ class BlueskyAPI():
             if response.status_code < 400:
                 return response.json()
             if response.status_code == 429:
-                self.extractor.wait(seconds=60)
+                until = response.headers.get("RateLimit-Reset")
+                self.extractor.wait(until=until)
                 continue
 
+            try:
+                data = response.json()
+                msg = "API request failed ('{}: {}')".format(
+                    data["error"], data["message"])
+            except Exception:
+                msg = "API request failed ({} {})".format(
+                    response.status_code, response.reason)
+
             self.extractor.log.debug("Server response: %s", response.text)
-            raise exception.StopExtraction(
-                "API request failed (%s %s)",
-                response.status_code, response.reason)
+            raise exception.StopExtraction(msg)
 
     def _pagination(self, endpoint, params, key="feed"):
         while True:
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 1a0e47d..a093347 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -54,7 +54,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             "album_id"   : self.album_id,
             "album_name" : text.unescape(info[0]),
             "album_size" : size[1:-1],
-            "description": text.unescape(info[2]) if len(info) > 2 else "",
             "count"      : len(urls),
         }
 
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index cf0f8c9..d14e13a 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -203,9 +203,15 @@ class Extractor():
             self.log.debug("%s (%s/%s)", msg, tries, retries+1)
             if tries > retries:
                 break
-            self.sleep(
-                max(tries, self._interval()) if self._interval else tries,
-                "retry")
+
+            if self._interval:
+                seconds = self._interval()
+                if seconds < tries:
+                    seconds = tries
+            else:
+                seconds = tries
+
+            self.sleep(seconds, "retry")
             tries += 1
 
         raise exception.HttpError(msg, response)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 0cf4f88..ca8acaa 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -18,12 +18,12 @@ import binascii
 import time
 import re
 
-
 BASE_PATTERN = (
     r"(?:https?://)?(?:"
     r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
     r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
 )
+DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
 
 
 class DeviantartExtractor(Extractor):
@@ -47,8 +47,9 @@ class DeviantartExtractor(Extractor):
         self.extra = self.config("extra", False)
         self.quality = self.config("quality", "100")
         self.original = self.config("original", True)
-        self.comments = self.config("comments", False)
         self.intermediary = self.config("intermediary", True)
+        self.comments_avatars = self.config("comments-avatars", False)
+        self.comments = self.comments_avatars or self.config("comments", False)
 
         self.api = DeviantartOAuthAPI(self)
         self.group = False
@@ -83,6 +84,16 @@ class DeviantartExtractor(Extractor):
         else:
             self.commit_journal = None
 
+    def request(self, url, **kwargs):
+        if "fatal" not in kwargs:
+            kwargs["fatal"] = False
+        while True:
+            response = Extractor.request(self, url, **kwargs)
+            if response.status_code != 403 or \
+                    b"Request blocked." not in response.content:
+                return response
+            self.wait(seconds=300, reason="CloudFront block")
+
     def skip(self, num):
         self.offset += num
         return num
@@ -100,9 +111,9 @@ class DeviantartExtractor(Extractor):
         if self.user:
             group = self.config("group", True)
             if group:
-                profile = self.api.user_profile(self.user)
-                if profile:
-                    self.user = profile["user"]["username"]
+                user = _user_details(self, self.user)
+                if user:
+                    self.user = user["username"]
                     self.group = False
                 elif group == "skip":
                     self.log.info("Skipping group '%s'", self.user)
@@ -172,6 +183,20 @@ class DeviantartExtractor(Extractor):
                     deviation["is_original"] = True
                     yield self.commit_journal(deviation, journal)
 
+            if self.comments_avatars:
+                for comment in deviation["comments"]:
+                    user = comment["user"]
+                    name = user["username"].lower()
+                    if user["usericon"] == DEFAULT_AVATAR:
+                        self.log.debug(
+                            "Skipping avatar of '%s' (default)", name)
+                        continue
+                    _user_details.update(name, user)
+
+                    url = "{}/{}/avatar/".format(self.root, name)
+                    comment["_extractor"] = DeviantartAvatarExtractor
+                    yield Message.Queue, url, comment
+
             if not self.extra:
                 continue
 
@@ -198,7 +223,9 @@ class DeviantartExtractor(Extractor):
         """Adjust the contents of a Deviation-object"""
         if "index" not in deviation:
             try:
-                if deviation["url"].startswith("https://sta.sh"):
+                if deviation["url"].startswith((
+                    "https://www.deviantart.com/stash/", "https://sta.sh",
+                )):
                     filename = deviation["content"]["src"].split("/")[5]
                     deviation["index_base36"] = filename.partition("-")[0][1:]
                     deviation["index"] = id_from_base36(
@@ -445,18 +472,12 @@ class DeviantartExtractor(Extractor):
 
     def _limited_request(self, url, **kwargs):
         """Limits HTTP requests to one every 2 seconds"""
-        kwargs["fatal"] = None
         diff = time.time() - DeviantartExtractor._last_request
         if diff < 2.0:
             self.sleep(2.0 - diff, "request")
-
-        while True:
-            response = self.request(url, **kwargs)
-            if response.status_code != 403 or \
-                    b"Request blocked." not in response.content:
-                DeviantartExtractor._last_request = time.time()
-                return response
-            self.wait(seconds=180)
+        response = self.request(url, **kwargs)
+        DeviantartExtractor._last_request = time.time()
+        return response
 
     def _fetch_premium(self, deviation):
         try:
@@ -569,13 +590,18 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
 
     def deviations(self):
         name = self.user.lower()
-        profile = self.api.user_profile(name)
-        if not profile:
+        user = _user_details(self, name)
+        if not user:
             return ()
 
-        user = profile["user"]
         icon = user["usericon"]
-        index = icon.rpartition("?")[2]
+        if icon == DEFAULT_AVATAR:
+            self.log.debug("Skipping avatar of '%s' (default)", name)
+            return ()
+
+        _, sep, index = icon.rpartition("?")
+        if not sep:
+            index = "0"
 
         formats = self.config("formats")
         if not formats:
@@ -658,7 +684,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
     """Extractor for sta.sh-ed deviations"""
     subcategory = "stash"
     archive_fmt = "{index}.{extension}"
-    pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
+    pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)"
+               r"/([a-z0-9]+)")
     example = "https://sta.sh/abcde"
 
     skip = Extractor.skip
@@ -679,7 +706,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
             if uuid:
                 deviation = self.api.deviation(uuid)
                 deviation["index"] = text.parse_int(text.extr(
-                    page, 'gmi-deviationid="', '"'))
+                    page, '\\"deviationId\\":', ','))
                 yield deviation
                 return
 
@@ -1086,9 +1113,8 @@ class DeviantartOAuthAPI():
         if not isinstance(self.mature, str):
             self.mature = "true" if self.mature else "false"
 
-        self.folders = extractor.config("folders", False)
-        self.metadata = extractor.extra or extractor.config("metadata", False)
         self.strategy = extractor.config("pagination")
+        self.folders = extractor.config("folders", False)
         self.public = extractor.config("public", True)
 
         client_id = extractor.config("client-id")
@@ -1106,6 +1132,42 @@ class DeviantartOAuthAPI():
                 token = None
         self.refresh_token_key = token
 
+        metadata = extractor.config("metadata", False)
+        if not metadata:
+            metadata = bool(extractor.extra)
+        if metadata:
+            self.metadata = True
+
+            if isinstance(metadata, str):
+                if metadata == "all":
+                    metadata = ("submission", "camera", "stats",
+                                "collection", "gallery")
+                else:
+                    metadata = metadata.replace(" ", "").split(",")
+            elif not isinstance(metadata, (list, tuple)):
+                metadata = ()
+
+            self._metadata_params = {"mature_content": self.mature}
+            self._metadata_public = None
+            if metadata:
+                # extended metadata
+                self.limit = 10
+                for param in metadata:
+                    self._metadata_params["ext_" + param] = "1"
+                if "ext_collection" in self._metadata_params or \
+                        "ext_gallery" in self._metadata_params:
+                    if token:
+                        self._metadata_public = False
+                    else:
+                        self.log.error("'collection' and 'gallery' metadata "
+                                       "require a refresh token")
+            else:
+                # base metadata
+                self.limit = 50
+        else:
+            self.metadata = False
+            self.limit = None
+
         self.log.debug(
             "Using %s API credentials (client-id %s)",
             "default" if self.client_id == self.CLIENT_ID else "custom",
@@ -1115,14 +1177,14 @@ class DeviantartOAuthAPI():
     def browse_deviantsyouwatch(self, offset=0):
         """Yield deviations from users you watch"""
         endpoint = "/browse/deviantsyouwatch"
-        params = {"limit": "50", "offset": offset,
+        params = {"limit": 50, "offset": offset,
                   "mature_content": self.mature}
         return self._pagination(endpoint, params, public=False)
 
     def browse_posts_deviantsyouwatch(self, offset=0):
         """Yield posts from users you watch"""
         endpoint = "/browse/posts/deviantsyouwatch"
-        params = {"limit": "50", "offset": offset,
+        params = {"limit": 50, "offset": offset,
                   "mature_content": self.mature}
         return self._pagination(endpoint, params, public=False, unpack=True)
 
@@ -1131,7 +1193,7 @@ class DeviantartOAuthAPI():
         endpoint = "/browse/newest"
         params = {
             "q"             : query,
-            "limit"         : 50 if self.metadata else 120,
+            "limit"         : 120,
             "offset"        : offset,
             "mature_content": self.mature,
         }
@@ -1142,7 +1204,7 @@ class DeviantartOAuthAPI():
         endpoint = "/browse/popular"
         params = {
             "q"             : query,
-            "limit"         : 50 if self.metadata else 120,
+            "limit"         : 120,
             "timerange"     : timerange,
             "offset"        : offset,
             "mature_content": self.mature,
@@ -1249,8 +1311,11 @@ class DeviantartOAuthAPI():
             "deviationids[{}]={}".format(num, deviation["deviationid"])
             for num, deviation in enumerate(deviations)
         )
-        params = {"mature_content": self.mature}
-        return self._call(endpoint, params=params)["metadata"]
+        return self._call(
+            endpoint,
+            params=self._metadata_params,
+            public=self._metadata_public,
+        )["metadata"]
 
     def gallery(self, username, folder_id, offset=0, extend=True, public=None):
         """Yield all Deviation-objects contained in a gallery folder"""
@@ -1357,9 +1422,14 @@ class DeviantartOAuthAPI():
             self.authenticate(None if public else self.refresh_token_key)
             kwargs["headers"] = self.headers
             response = self.extractor.request(url, **kwargs)
-            data = response.json()
-            status = response.status_code
 
+            try:
+                data = response.json()
+            except ValueError:
+                self.log.error("Unable to parse API response")
+                data = {}
+
+            status = response.status_code
             if 200 <= status < 400:
                 if self.delay > self.delay_min:
                     self.delay -= 1
@@ -1412,6 +1482,9 @@ class DeviantartOAuthAPI():
         if public is None:
             public = self.public
 
+        if self.limit and params["limit"] > self.limit:
+            params["limit"] = (params["limit"] // self.limit) * self.limit
+
         while True:
             data = self._call(endpoint, params=params, public=public)
             try:
@@ -1483,6 +1556,15 @@ class DeviantartOAuthAPI():
 
     def _metadata(self, deviations):
         """Add extended metadata to each deviation object"""
+        if len(deviations) <= self.limit:
+            self._metadata_batch(deviations)
+        else:
+            n = self.limit
+            for index in range(0, len(deviations), n):
+                self._metadata_batch(deviations[index:index+n])
+
+    def _metadata_batch(self, deviations):
+        """Fetch extended metadata for a single batch of deviations"""
         for deviation, metadata in zip(
                 deviations, self.deviation_metadata(deviations)):
             deviation.update(metadata)
@@ -1667,6 +1749,14 @@ class DeviantartEclipseAPI():
         return token
 
 
+@memcache(keyarg=1)
+def _user_details(extr, name):
+    try:
+        return extr.api.user_profile(name)["user"]
+    except Exception:
+        return None
+
+
 @cache(maxage=36500*86400, keyarg=0)
 def _refresh_token_cache(token):
     if token and token[0] == "#":
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index aff8e61..838ae7b 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -42,7 +42,8 @@ class FapelloPostExtractor(Extractor):
             "type" : "video" if 'type="video' in page else "photo",
             "thumbnail": text.extr(page, 'poster="', '"'),
         }
-        url = text.extr(page, 'src="', '"')
+        url = text.extr(page, 'src="', '"').replace(
+            ".md", "").replace(".th", "")
         yield Message.Directory, data
         yield Message.Url, url, text.nameext_from_url(url, data)
 
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index f7dc3cc..c94a110 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -77,6 +77,8 @@ class FlickrImageExtractor(FlickrExtractor):
         photo = self.api.photos_getInfo(self.item_id)
         if self.api.exif:
             photo.update(self.api.photos_getExif(self.item_id))
+        if self.api.contexts:
+            photo.update(self.api.photos_getAllContexts(self.item_id))
 
         if photo["media"] == "video" and self.api.videos:
             self.api._extract_video(photo)
@@ -268,6 +270,8 @@ class FlickrAPI(oauth.OAuth1API):
 
         self.exif = extractor.config("exif", False)
         self.videos = extractor.config("videos", True)
+        self.contexts = extractor.config("contexts", False)
+
         self.maxsize = extractor.config("size-max")
         if isinstance(self.maxsize, str):
             for fmt, fmtname, fmtwidth in self.FORMATS:
@@ -311,6 +315,13 @@ class FlickrAPI(oauth.OAuth1API):
         params = {"user_id": user_id}
         return self._pagination("people.getPhotos", params)
 
+    def photos_getAllContexts(self, photo_id):
+        """Returns all visible sets and pools the photo belongs to."""
+        params = {"photo_id": photo_id}
+        data = self._call("photos.getAllContexts", params)
+        del data["stat"]
+        return data
+
     def photos_getExif(self, photo_id):
         """Retrieves a list of EXIF/TIFF/GPS tags for a given photo."""
         params = {"photo_id": photo_id}
@@ -444,6 +455,8 @@ class FlickrAPI(oauth.OAuth1API):
 
         if self.exif:
             photo.update(self.photos_getExif(photo["id"]))
+        if self.contexts:
+            photo.update(self.photos_getAllContexts(photo["id"]))
         photo["id"] = text.parse_int(photo["id"])
 
         if "owner" in photo:
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 83f1392..2459a61 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -32,6 +32,9 @@ class GelbooruBase():
         url = self.root + "/index.php?page=dapi&q=index&json=1"
         data = self.request(url, params=params).json()
 
+        if not key:
+            return data
+
         try:
             posts = data[key]
         except KeyError:
@@ -167,13 +170,61 @@ class GelbooruFavoriteExtractor(GelbooruBase,
         params = {
             "s"    : "favorite",
             "id"   : self.favorite_id,
-            "limit": "1",
+            "limit": "2",
         }
+        data = self._api_request(params, None, True)
 
-        count = self._api_request(params, "@attributes", True)[0]["count"]
-        if count <= self.offset:
-            return
+        count = data["@attributes"]["count"]
+        self.log.debug("API reports %s favorite entries", count)
+
+        favs = data["favorite"]
+        try:
+            order = 1 if favs[0]["id"] < favs[1]["id"] else -1
+        except LookupError as exc:
+            self.log.debug(
+                "Error when determining API favorite order (%s: %s)",
+                exc.__class__.__name__, exc)
+            order = -1
+        else:
+            self.log.debug("API yields favorites in %sscending order",
+                           "a" if order > 0 else "de")
+
+        order_favs = self.config("order-posts")
+        if order_favs and order_favs[0] in ("r", "a"):
+            self.log.debug("Returning them in reverse")
+            order = -order
+
+        if order < 0:
+            return self._pagination(params, count)
+        return self._pagination_reverse(params, count)
+
+    def _pagination(self, params, count):
+        if self.offset:
+            pnum, skip = divmod(self.offset, self.per_page)
+        else:
+            pnum = skip = 0
+
+        params["pid"] = pnum
+        params["limit"] = self.per_page
+
+        while True:
+            favs = self._api_request(params, "favorite")
+
+            if not favs:
+                return
+
+            if skip:
+                favs = favs[skip:]
+                skip = 0
+
+            for fav in favs:
+                for post in self._api_request({"id": fav["favorite"]}):
+                    post["date_favorited"] = text.parse_timestamp(fav["added"])
+                    yield post
+
+            params["pid"] += 1
 
+    def _pagination_reverse(self, params, count):
         pnum, last = divmod(count-1, self.per_page)
         if self.offset > last:
             # page number change
@@ -182,12 +233,11 @@ class GelbooruFavoriteExtractor(GelbooruBase,
             pnum -= diff + 1
         skip = self.offset
 
-        # paginate over them in reverse
         params["pid"] = pnum
         params["limit"] = self.per_page
 
         while True:
-            favs = self._api_request(params, "favorite", True)
+            favs = self._api_request(params, "favorite")
             favs.reverse()
 
             if skip:
@@ -195,7 +245,9 @@ class GelbooruFavoriteExtractor(GelbooruBase,
                 skip = 0
 
             for fav in favs:
-                yield from self._api_request({"id": fav["favorite"]})
+                for post in self._api_request({"id": fav["favorite"]}):
+                    post["date_favorited"] = text.parse_timestamp(fav["added"])
+                    yield post
 
             params["pid"] -= 1
             if params["pid"] < 0:
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index 289f91c..f0eb4e9 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -41,9 +41,13 @@ class GofileFolderExtractor(Extractor):
         folder = self._get_content(self.content_id, password)
         yield Message.Directory, folder
 
+        try:
+            contents = folder.pop("children")
+        except KeyError:
+            raise exception.AuthorizationError("Password required")
+
         num = 0
-        contents = folder.pop("contents")
-        for content_id in folder["childs"]:
+        for content_id in folder["childrenIds"]:
             content = contents[content_id]
             content["folder"] = folder
 
@@ -67,31 +71,32 @@ class GofileFolderExtractor(Extractor):
     @memcache()
     def _create_account(self):
         self.log.debug("Creating temporary account")
-        return self._api_request("createAccount")["token"]
+        return self._api_request("accounts", method="POST")["token"]
 
     @cache(maxage=86400)
     def _get_website_token(self):
         self.log.debug("Fetching website token")
         page = self.request(self.root + "/dist/js/alljs.js").text
-        return text.extr(page, 'fetchData.wt = "', '"')
+        return text.extr(page, 'wt: "', '"')
 
     def _get_content(self, content_id, password=None):
+        headers = {"Authorization": "Bearer " + self.api_token}
+        params = {"wt": self.website_token}
         if password is not None:
-            password = hashlib.sha256(password.encode()).hexdigest()
-        return self._api_request("getContent", {
-            "contentId"   : content_id,
-            "token"       : self.api_token,
-            "wt"          : self.website_token,
-            "password"    : password,
-        })
-
-    def _api_request(self, endpoint, params=None):
+            params["password"] = hashlib.sha256(password.encode()).hexdigest()
+        return self._api_request("contents/" + content_id, params, headers)
+
+    def _api_request(self, endpoint, params=None, headers=None, method="GET"):
         response = self.request(
-            "https://api.gofile.io/" + endpoint, params=params).json()
+            "https://api.gofile.io/" + endpoint,
+            method=method, params=params, headers=headers,
+        ).json()
 
         if response["status"] != "ok":
             if response["status"] == "error-notFound":
                 raise exception.NotFoundError("content")
+            if response["status"] == "error-passwordRequired":
+                raise exception.AuthorizationError("Password required")
             raise exception.StopExtraction(
                 "%s failed (Status: %s)", endpoint, response["status"])
 
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 20491b5..aadce6c 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -25,7 +25,7 @@ class HiperdexBase():
     @memcache(keyarg=1)
     def manga_data(self, manga, page=None):
         if not page:
-            url = "{}/manga/{}/".format(self.root, manga)
+            url = "{}/mangas/{}/".format(self.root, manga)
             page = self.request(url).text
         extr = text.extract_from(page)
 
@@ -33,7 +33,7 @@ class HiperdexBase():
             "url"    : text.unescape(extr(
                 'property="og:url" content="', '"')),
             "manga"  : text.unescape(extr(
-                '"headline": "', '"')),
+                ' property="name" title="', '"')),
             "score"  : text.parse_float(extr(
                 'id="averagerate">', '<')),
             "author" : text.remove_html(extr(
@@ -68,8 +68,8 @@ class HiperdexBase():
 
 class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
     """Extractor for manga chapters from hiperdex.com"""
-    pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
-    example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
+    pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
+    example = "https://hiperdex.com/mangas/MANGA/CHAPTER/"
 
     def __init__(self, match):
         root, path, self.manga, self.chapter = match.groups()
@@ -90,8 +90,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
 class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
     """Extractor for manga from hiperdex.com"""
     chapterclass = HiperdexChapterExtractor
-    pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
-    example = "https://hiperdex.com/manga/MANGA/"
+    pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
+    example = "https://hiperdex.com/mangas/MANGA/"
 
     def __init__(self, match):
         root, path, self.manga = match.groups()
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index c249a3e..dfd9a31 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -101,9 +101,8 @@ class IdolcomplexExtractor(SankakuExtractor):
         page = self.request(url, retries=10).text
         extr = text.extract_from(page)
 
-        pid_alnum = extr('/posts/', '"')
-        vavg = extr('itemprop="ratingValue">', "<")
-        vcnt = extr('itemprop="reviewCount">', "<")
+        vavg = extr('id="rating"', "</ul>")
+        vcnt = extr('>Votes</strong>:', "<")
         pid = extr(">Post ID:", "<")
         created = extr(' title="', '"')
 
@@ -120,10 +119,10 @@ class IdolcomplexExtractor(SankakuExtractor):
         rating = extr(">Rating:", "<br")
 
         data = {
-            "id"          : text.parse_int(pid),
-            "id_alnum"    : pid_alnum,
+            "id"          : pid.strip(),
             "md5"         : file_url.rpartition("/")[2].partition(".")[0],
-            "vote_average": text.parse_float(vavg),
+            "vote_average": (1.0 * vavg.count('class="star-full"') +
+                             0.5 * vavg.count('class="star-half"')),
             "vote_count"  : text.parse_int(vcnt),
             "created_at"  : created,
             "date"        : text.parse_datetime(
@@ -222,8 +221,8 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
     subcategory = "pool"
     directory_fmt = ("{category}", "pool", "{pool}")
     archive_fmt = "p_{pool}_{id}"
-    pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
-    example = "https://idol.sankakucomplex.com/pools/show/12345"
+    pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
+    example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
     per_page = 24
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 3bdcfdf..85446c0 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -161,11 +161,12 @@ class ImagefapFolderExtractor(ImagefapExtractor):
         self.user = user or profile
 
     def items(self):
-        for gallery_id, name in self.galleries(self.folder_id):
+        for gallery_id, name, folder in self.galleries(self.folder_id):
             url = "{}/gallery/{}".format(self.root, gallery_id)
             data = {
                 "gallery_id": gallery_id,
                 "title"     : text.unescape(name),
+                "folder"    : text.unescape(folder),
                 "_extractor": ImagefapGalleryExtractor,
             }
             yield Message.Queue, url, data
@@ -173,6 +174,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
     def galleries(self, folder_id):
         """Yield gallery IDs and titles of a folder"""
         if folder_id == "-1":
+            folder_name = "Uncategorized"
             if self._id:
                 url = "{}/usergallery.php?userid={}&folderid=-1".format(
                     self.root, self.user)
@@ -180,23 +182,28 @@ class ImagefapFolderExtractor(ImagefapExtractor):
                 url = "{}/profile/{}/galleries?folderid=-1".format(
                     self.root, self.user)
         else:
+            folder_name = None
             url = "{}/organizer/{}/".format(self.root, folder_id)
 
         params = {"page": 0}
+        extr = text.extract_from(self.request(url, params=params).text)
+        if not folder_name:
+            folder_name = extr("class'blk_galleries'><b>", "</b>")
+
         while True:
-            extr = text.extract_from(self.request(url, params=params).text)
             cnt = 0
 
             while True:
-                gid = extr('<a  href="/gallery/', '"')
+                gid = extr(' id="gid-', '"')
                 if not gid:
                     break
-                yield gid, extr("<b>", "<")
+                yield gid, extr("<b>", "<"), folder_name
                 cnt += 1
 
             if cnt < 20:
                 break
             params["page"] += 1
+            extr = text.extract_from(self.request(url, params=params).text)
 
 
 class ImagefapUserExtractor(ImagefapExtractor):
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 8884d3e..86b1edd 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -39,10 +39,15 @@ class ImgurExtractor(Extractor):
         image["url"] = url = "https://i.imgur.com/{}.{}".format(
             image["id"], image["ext"])
         image["date"] = text.parse_datetime(image["created_at"])
+        image["_http_validate"] = self._validate
         text.nameext_from_url(url, image)
 
         return url
 
+    def _validate(self, response):
+        return (not response.history or
+                not response.url.endswith("/removed.png"))
+
     def _items_queue(self, items):
         album_ex = ImgurAlbumExtractor
         image_ex = ImgurImageExtractor
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 6eae7db..9c2b1de 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -165,7 +165,7 @@ class InstagramExtractor(Extractor):
             data = {
                 "post_id" : post["pk"],
                 "post_shortcode": post["code"],
-                "likes": post["like_count"],
+                "likes": post.get("like_count", 0),
                 "pinned": post.get("timeline_pinned_user_ids", ()),
                 "date": text.parse_timestamp(post.get("taken_at")),
             }
@@ -689,7 +689,10 @@ class InstagramRestAPI():
     def reels_media(self, reel_ids):
         endpoint = "/v1/feed/reels_media/"
         params = {"reel_ids": reel_ids}
-        return self._call(endpoint, params=params)["reels_media"]
+        try:
+            return self._call(endpoint, params=params)["reels_media"]
+        except KeyError:
+            raise exception.AuthorizationError("Login required")
 
     def tags_media(self, tag):
         for section in self.tags_sections(tag):
@@ -733,7 +736,7 @@ class InstagramRestAPI():
                 not user["followed_by_viewer"]:
             name = user["username"]
             s = "" if name.endswith("s") else "s"
-            raise exception.StopExtraction("%s'%s posts are private", name, s)
+            self.extractor.log.warning("%s'%s posts are private", name, s)
         self.extractor._assign_user(user)
         return user["id"]
 
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index fd5a73a..9c77b7a 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -15,7 +15,7 @@ import itertools
 import json
 import re
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(su|party)"
 USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
 HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
 
@@ -41,9 +41,12 @@ class KemonopartyExtractor(Extractor):
         self.revisions = self.config("revisions")
         if self.revisions:
             self.revisions_unique = (self.revisions == "unique")
+        order = self.config("order-revisions")
+        self.revisions_reverse = order[0] in ("r", "a") if order else False
+
         self._prepare_ddosguard_cookies()
         self._find_inline = re.compile(
-            r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
+            r'src="(?:https?://(?:kemono|coomer)\.(?:su|party))?(/inline/[^"]+'
             r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
         self._json_dumps = json.JSONEncoder(
             ensure_ascii=False, check_circular=False,
@@ -232,6 +235,7 @@ class KemonopartyExtractor(Extractor):
         except exception.HttpError:
             post["revision_hash"] = self._revision_hash(post)
             post["revision_index"] = 1
+            post["revision_count"] = 1
             return (post,)
         revs.insert(0, post)
 
@@ -247,22 +251,30 @@ class KemonopartyExtractor(Extractor):
                     uniq.append(rev)
             revs = uniq
 
-        idx = len(revs)
+        cnt = idx = len(revs)
         for rev in revs:
             rev["revision_index"] = idx
+            rev["revision_count"] = cnt
             idx -= 1
 
+        if self.revisions_reverse:
+            revs.reverse()
+
         return revs
 
     def _revisions_all(self, url):
         revs = self.request(url + "/revisions").json()
 
-        idx = len(revs)
+        cnt = idx = len(revs)
         for rev in revs:
             rev["revision_hash"] = self._revision_hash(rev)
             rev["revision_index"] = idx
+            rev["revision_count"] = cnt
             idx -= 1
 
+        if self.revisions_reverse:
+            revs.reverse()
+
         return revs
 
     def _revision_hash(self, revision):
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index d4ccf33..12e8860 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -104,7 +104,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
     filename_fmt = "{category}_{id}{title:?_//}.{extension}"
     directory_fmt = ("{category}",)
     archive_fmt = "{id}"
-    pattern = BASE_PATTERN + r"/i/(\w+)"
+    pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
     example = "https://lensdump.com/i/ID"
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 68b4196..030d7d1 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -70,7 +70,11 @@ class MastodonExtractor(BaseExtractor):
 
     def _check_moved(self, account):
         self._check_moved = None
-        if "moved" in account:
+        # Certain fediverse software (such as Iceshrimp and Sharkey) have a
+        # null account "moved" field instead of not having it outright.
+        # To handle this, check if the "moved" value is truthy instead
+        # if only it exists.
+        if account.get("moved"):
             self.log.warning("Account '%s' moved to '%s'",
                              account["acct"], account["moved"]["acct"])
 
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index 55faf9e..d3150e6 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -26,7 +26,8 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
                      "{post[date]:%Y-%m-%d} {post[title]}")
     archive_fmt = "{blog[id]}_{post[num]}_{num}"
     pattern = (r"(?:https?://)?blog\.naver\.com/"
-               r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
+               r"(?:PostView\.n(?:aver|hn)\?blogId=(\w+)&logNo=(\d+)|"
+               r"(\w+)/(\d+)/?$)")
     example = "https://blog.naver.com/BLOGID/12345"
 
     def __init__(self, match):
@@ -46,8 +47,10 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
         extr = text.extract_from(page)
         data = {
             "post": {
-                "title"      : extr('"og:title" content="', '"'),
-                "description": extr('"og:description" content="', '"'),
+                "title"      : text.unescape(extr(
+                    '"og:title" content="', '"')),
+                "description": text.unescape(extr(
+                    '"og:description" content="', '"')).replace("&nbsp;", " "),
                 "num"        : text.parse_int(self.post_id),
             },
             "blog": {
@@ -62,10 +65,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
         return data
 
     def images(self, page):
-        return [
-            (url.replace("://post", "://blog", 1).partition("?")[0], None)
-            for url in text.extract_iter(page, 'data-lazy-src="', '"')
-        ]
+        results = []
+        for url in text.extract_iter(page, 'data-lazy-src="', '"'):
+            url = url.replace("://post", "://blog", 1).partition("?")[0]
+            if "\ufffd" in text.unquote(url):
+                url = text.unquote(url, encoding="EUC-KR")
+            results.append((url, None))
+        return results
 
 
 class NaverBlogExtractor(NaverBase, Extractor):
@@ -73,7 +79,8 @@ class NaverBlogExtractor(NaverBase, Extractor):
     subcategory = "blog"
     categorytransfer = True
     pattern = (r"(?:https?://)?blog\.naver\.com/"
-               r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
+               r"(?:PostList\.n(?:aver|hn)\?(?:[^&#]+&)*blogId=([^&#]+)|"
+               r"(\w+)/?$)")
     example = "https://blog.naver.com/BLOGID"
 
     def __init__(self, match):
@@ -81,12 +88,11 @@ class NaverBlogExtractor(NaverBase, Extractor):
         self.blog_id = match.group(1) or match.group(2)
 
     def items(self):
-
         # fetch first post number
         url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
-        post_num = text.extract(
+        post_num = text.extr(
             self.request(url).text, 'gnFirstLogNo = "', '"',
-        )[0]
+        )
 
         # setup params for API calls
         url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root)
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 9614513..c50c013 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -19,7 +19,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
     directory_fmt = ("{category}", "{user_id}")
     filename_fmt = "{image_id}_p{num}.{extension}"
     archive_fmt = "{image_id}_{num}"
-    request_interval = (1.0, 2.0)
+    request_interval = (2.0, 4.0)
 
     def __init__(self, match):
         BaseExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index d36f509..2bce597 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -219,7 +219,10 @@ class NitterExtractor(BaseExtractor):
                 self.user_obj = self._user_from_html(tweets_html[0])
 
             for html, quote in map(self._extract_quote, tweets_html[1:]):
-                yield self._tweet_from_html(html)
+                tweet = self._tweet_from_html(html)
+                if not tweet["date"]:
+                    continue
+                yield tweet
                 if quoted and quote:
                     yield self._tweet_from_quote(quote)
 
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 5226724..b21e1eb 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -26,13 +26,13 @@ class PahealExtractor(Extractor):
         data = self.get_metadata()
 
         for post in self.get_posts():
-            url = post["file_url"]
-            for key in ("id", "width", "height"):
-                post[key] = text.parse_int(post[key])
+            post["id"] = text.parse_int(post["id"])
             post["tags"] = text.unquote(post["tags"])
+            post["width"] = text.parse_int(post["width"])
+            post["height"] = text.parse_int(post["height"])
             post.update(data)
             yield Message.Directory, post
-            yield Message.Url, url, post
+            yield Message.Url, post["file_url"], post
 
     def get_metadata(self):
         """Return general metadata"""
@@ -114,17 +114,19 @@ class PahealTagExtractor(PahealExtractor):
 
         tags, data, date = data.split("\n")
         dimensions, size, ext = data.split(" // ")
-        tags = text.unescape(tags)
         width, _, height = dimensions.partition("x")
         height, _, duration = height.partition(", ")
 
         return {
-            "id": pid, "md5": md5, "file_url": url,
-            "width": width, "height": height,
-            "duration": text.parse_float(duration[:-1]),
-            "tags": tags,
-            "size": text.parse_bytes(size[:-1]),
-            "date": text.parse_datetime(date, "%B %d, %Y; %H:%M"),
+            "id"       : pid,
+            "md5"      : md5,
+            "file_url" : url,
+            "width"    : width,
+            "height"   : height,
+            "duration" : text.parse_float(duration[:-1]),
+            "tags"     : text.unescape(tags),
+            "size"     : text.parse_bytes(size[:-1]),
+            "date"     : text.parse_datetime(date, "%B %d, %Y; %H:%M"),
             "filename" : "{} - {}".format(pid, tags),
             "extension": ext,
         }
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index b9821f2..862a7db 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -650,7 +650,7 @@ class PixivNovelExtractor(PixivExtractor):
             yield Message.Directory, novel
 
             try:
-                content = self.api.novel_text(novel["id"])["novel_text"]
+                content = self.api.novel_webview(novel["id"])["text"]
             except Exception:
                 self.log.warning("Unable to download novel %s", novel["id"])
                 continue
@@ -663,7 +663,7 @@ class PixivNovelExtractor(PixivExtractor):
                 illusts = {}
 
                 for marker in text.extract_iter(content, "[", "]"):
-                    if marker.startswith("[jumpuri:If you would like to "):
+                    if marker.startswith("uploadedimage:"):
                         desktop = True
                     elif marker.startswith("pixivimage:"):
                         illusts[marker[11:].partition("-")[0]] = None
@@ -918,6 +918,15 @@ class PixivAppAPI():
         params = {"novel_id": novel_id}
         return self._call("/v1/novel/text", params)
 
+    def novel_webview(self, novel_id):
+        params = {"id": novel_id, "viewer_version": "20221031_ai"}
+        return self._call(
+            "/webview/v2/novel", params, self._novel_webview_parse)
+
+    def _novel_webview_parse(self, response):
+        return util.json_loads(text.extr(
+            response.text, "novel: ", ",\n"))
+
     def search_illust(self, word, sort=None, target=None, duration=None,
                       date_start=None, date_end=None):
         params = {"word": word, "search_target": target,
@@ -962,13 +971,17 @@ class PixivAppAPI():
         params = {"illust_id": illust_id}
         return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
 
-    def _call(self, endpoint, params=None):
+    def _call(self, endpoint, params=None, parse=None):
         url = "https://app-api.pixiv.net" + endpoint
 
         while True:
             self.login()
             response = self.extractor.request(url, params=params, fatal=False)
-            data = response.json()
+
+            if parse:
+                data = parse(response)
+            else:
+                data = response.json()
 
             if "error" not in data:
                 return data
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 7ff40a3..c7283fc 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -143,6 +143,9 @@ class PornhubGifExtractor(PornhubExtractor):
             "url"  : extr('"contentUrl": "', '"'),
             "date" : text.parse_datetime(
                 extr('"uploadDate": "', '"'), "%Y-%m-%d"),
+            "viewkey"  : extr('From this video: '
+                              '<a href="/view_video.php?viewkey=', '"'),
+            "timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
             "user" : text.remove_html(extr("Created by:", "</div>")),
         }
 
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 2ef0f9f..e099c7e 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -191,6 +191,8 @@ class RedditExtractor(Extractor):
         try:
             if "reddit_video_preview" in post["preview"]:
                 video = post["preview"]["reddit_video_preview"]
+                if "fallback_url" in video:
+                    yield video["fallback_url"]
                 if "dash_url" in video:
                     yield "ytdl:" + video["dash_url"]
                 if "hls_url" in video:
@@ -200,6 +202,12 @@ class RedditExtractor(Extractor):
 
         try:
             for image in post["preview"]["images"]:
+                variants = image.get("variants")
+                if variants:
+                    if "gif" in variants:
+                        yield variants["gif"]["source"]["url"]
+                    if "mp4" in variants:
+                        yield variants["mp4"]["source"]["url"]
                 yield image["source"]["url"]
         except Exception as exc:
             self.log.debug("%s: %s", exc.__class__.__name__, exc)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 6185acb..327bcd1 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -52,23 +52,22 @@ class RedgifsExtractor(Extractor):
 
             gif.update(metadata)
             gif["count"] = cnt
+            gif["date"] = text.parse_timestamp(gif.get("createDate"))
             yield Message.Directory, gif
 
             for num, gif in enumerate(gifs, enum):
-                url = self._process(gif)
+                gif["_fallback"] = formats = self._formats(gif)
+                url = next(formats, None)
+
                 if not url:
                     self.log.warning(
                         "Skipping '%s' (format not available)", gif["id"])
                     continue
+
                 gif["num"] = num
                 gif["count"] = cnt
                 yield Message.Url, url, gif
 
-    def _process(self, gif):
-        gif["_fallback"] = formats = self._formats(gif)
-        gif["date"] = text.parse_timestamp(gif.get("createDate"))
-        return next(formats, None)
-
     def _formats(self, gif):
         urls = gif["urls"]
         for fmt in self.formats:
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 0b29ed0..38a2d16 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -7,7 +7,7 @@
 """Extractors for https://skeb.jp/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, exception
 import itertools
 
 
@@ -26,6 +26,19 @@ class SkebExtractor(Extractor):
     def _init(self):
         self.thumbnails = self.config("thumbnails", False)
         self.article = self.config("article", False)
+        self.headers = {"Accept": "application/json, text/plain, */*"}
+
+        if "Authorization" not in self.session.headers:
+            self.headers["Authorization"] = "Bearer null"
+
+    def request(self, url, **kwargs):
+        while True:
+            try:
+                return Extractor.request(self, url, **kwargs)
+            except exception.HttpError as exc:
+                if exc.status == 429 and "request_key" in exc.response.cookies:
+                    continue
+                raise
 
     def items(self):
         metadata = self.metadata()
@@ -42,6 +55,12 @@ class SkebExtractor(Extractor):
                 url = file["file_url"]
                 yield Message.Url, url, text.nameext_from_url(url, post)
 
+    def _items_users(self):
+        base = self.root + "/@"
+        for user in self.users():
+            user["_extractor"] = SkebUserExtractor
+            yield Message.Queue, base + user["screen_name"], user
+
     def posts(self):
         """Return post number"""
 
@@ -49,11 +68,11 @@ class SkebExtractor(Extractor):
         """Return additional metadata"""
 
     def _pagination(self, url, params):
-        headers = {"Authorization": "Bearer null"}
         params["offset"] = 0
 
         while True:
-            posts = self.request(url, params=params, headers=headers).json()
+            posts = self.request(
+                url, params=params, headers=self.headers).json()
 
             for post in posts:
                 parts = post["path"].split("/")
@@ -70,11 +89,24 @@ class SkebExtractor(Extractor):
                 return
             params["offset"] += 30
 
+    def _pagination_users(self, endpoint, params):
+        url = "{}/api{}".format(self.root, endpoint)
+        params["offset"] = 0
+        params["limit"] = 90
+
+        while True:
+            data = self.request(
+                url, params=params, headers=self.headers).json()
+            yield from data
+
+            if len(data) < params["limit"]:
+                return
+            params["offset"] += params["limit"]
+
     def _get_post_data(self, user_name, post_num):
         url = "{}/api/users/{}/works/{}".format(
             self.root, user_name, post_num)
-        headers = {"Authorization": "Bearer null"}
-        resp = self.request(url, headers=headers).json()
+        resp = self.request(url, headers=self.headers).json()
         creator = resp["creator"]
         post = {
             "post_id"          : resp["id"],
@@ -244,22 +276,23 @@ class SkebFollowingExtractor(SkebExtractor):
     pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
     example = "https://skeb.jp/@USER/following_creators"
 
-    def items(self):
-        for user in self.users():
-            url = "{}/@{}".format(self.root, user["screen_name"])
-            user["_extractor"] = SkebUserExtractor
-            yield Message.Queue, url, user
+    items = SkebExtractor._items_users
 
     def users(self):
-        url = "{}/api/users/{}/following_creators".format(
-            self.root, self.user_name)
-        params = {"sort": "date", "offset": 0, "limit": 90}
-        headers = {"Authorization": "Bearer null"}
+        endpoint = "/users/{}/following_creators".format(self.user_name)
+        params = {"sort": "date"}
+        return self._pagination_users(endpoint, params)
 
-        while True:
-            data = self.request(url, params=params, headers=headers).json()
-            yield from data
 
-            if len(data) < params["limit"]:
-                return
-            params["offset"] += params["limit"]
+class SkebFollowingUsersExtractor(SkebExtractor):
+    """Extractor for your followed users"""
+    subcategory = "following-users"
+    pattern = r"(?:https?://)?skeb\.jp/following_users()"
+    example = "https://skeb.jp/following_users"
+
+    items = SkebExtractor._items_users
+
+    def users(self):
+        endpoint = "/following_users"
+        params = {}
+        return self._pagination_users(endpoint, params)
diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py
index 9d46fd6..8582824 100644
--- a/gallery_dl/extractor/steamgriddb.py
+++ b/gallery_dl/extractor/steamgriddb.py
@@ -163,6 +163,9 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
     def assets(self):
         endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
         asset = self._call(endpoint)["asset"]
+        if asset is None:
+            raise exception.NotFoundError("asset ({}:{})".format(
+                self.asset_type, self.asset_id))
         return (asset,)
 
 
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 31fb891..d4adfed 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -175,7 +175,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
             "author_id"  : text.parse_int(extr('data-user-id="', '"')),
             "author_nick": text.unescape(extr('alt="', '"')),
             "date"       : self._parse_datetime(extr(
-                'class="section-subtitle">', '<')),
+                '<span class="star_link-types">', '<')),
             "content"    : (extr(
                 '<div class="post-content', '<div class="post-uploads')
                 .partition(">")[2]),
diff --git a/gallery_dl/extractor/test.py b/gallery_dl/extractor/test.py
deleted file mode 100644
index e3f9f74..0000000
--- a/gallery_dl/extractor/test.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2016-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Utility extractor to execute tests of other extractors"""
-
-from .common import Extractor, Message
-from .. import extractor, exception
-
-
-class TestExtractor(Extractor):
-    """Extractor to select and run the test URLs of other extractors
-
-    The general form is 'test:<categories>:<subcategories>:<indices>', where
-    <categories> and <subcategories> are comma-separated (sub)category names
-    and <indices> is a comma-seperated list of array indices.
-    To select all possible values for a field use the star '*' character or
-    leave the field empty.
-
-    Examples:
-        - test:pixiv
-            run all pixiv tests
-
-        - test:pixiv:user,favorite:0
-            run the first test of the PixivUser- and PixivFavoriteExtractor
-
-        - test:
-            run all tests
-    """
-    category = "test"
-    pattern = r"t(?:est)?:([^:]*)(?::([^:]*)(?::(\*|[\d,]*))?)?$"
-    example = "test:CATEGORY"
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        categories, subcategories, indices = match.groups()
-        self.categories = self._split(categories)
-        self.subcategories = self._split(subcategories)
-        self.indices = self._split(indices) or self
-
-    def items(self):
-        extractors = extractor.extractors()
-
-        if self.categories:
-            extractors = [
-                extr for extr in extractors
-                if extr.category in self.categories
-            ]
-
-        if self.subcategories:
-            extractors = [
-                extr for extr in extractors
-                if extr.subcategory in self.subcategories
-            ]
-
-        tests = [
-            test
-            for extr in extractors
-            for index, test in enumerate(extr._get_tests())
-            if str(index) in self.indices
-        ]
-
-        if not tests:
-            raise exception.NotFoundError("test")
-
-        for test in tests:
-            yield Message.Queue, test[0], {}
-
-    @staticmethod
-    def __contains__(_):
-        return True
-
-    @staticmethod
-    def _split(value):
-        if value and value != "*":
-            return value.split(",")
-        return None
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ad5bfc6..a5bd984 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -340,6 +340,8 @@ class TwitterExtractor(Extractor):
         txt, _, tco = content.rpartition(" ")
         tdata["content"] = txt if tco.startswith("https://t.co/") else content
 
+        if "birdwatch_pivot" in tweet:
+            tdata["birdwatch"] = tweet["birdwatch_pivot"]["subtitle"]["text"]
         if "in_reply_to_screen_name" in legacy:
             tdata["reply_to"] = legacy["in_reply_to_screen_name"]
         if "quoted_by" in legacy:
@@ -380,6 +382,7 @@ class TwitterExtractor(Extractor):
             "date"            : text.parse_datetime(
                 uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
             "verified"        : uget("verified", False),
+            "protected"       : uget("protected", False),
             "profile_banner"  : uget("profile_banner_url", ""),
             "profile_image"   : uget(
                 "profile_image_url_https", "").replace("_normal.", "."),
@@ -731,9 +734,9 @@ class TwitterEventExtractor(TwitterExtractor):
 
 
 class TwitterTweetExtractor(TwitterExtractor):
-    """Extractor for images from individual tweets"""
+    """Extractor for individual tweets"""
     subcategory = "tweet"
-    pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
+    pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$"
     example = "https://twitter.com/USER/status/12345"
 
     def __init__(self, match):
@@ -810,6 +813,18 @@ class TwitterTweetExtractor(TwitterExtractor):
         return itertools.chain(buffer, tweets)
 
 
+class TwitterQuotesExtractor(TwitterExtractor):
+    """Extractor for quotes of a Tweet"""
+    subcategory = "quotes"
+    pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
+    example = "https://twitter.com/USER/status/12345/quotes"
+
+    def items(self):
+        url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
+        data = {"_extractor": TwitterSearchExtractor}
+        yield Message.Queue, url, data
+
+
 class TwitterAvatarExtractor(TwitterExtractor):
     subcategory = "avatar"
     filename_fmt = "avatar {date}.{extension}"
@@ -882,6 +897,7 @@ class TwitterAPI():
 
     def __init__(self, extractor):
         self.extractor = extractor
+        self.log = extractor.log
 
         self.root = "https://twitter.com/i/api"
         self._nsfw_warning = True
@@ -1244,7 +1260,7 @@ class TwitterAPI():
     @cache(maxage=3600)
     def _guest_token(self):
         endpoint = "/1.1/guest/activate.json"
-        self.extractor.log.info("Requesting guest token")
+        self.log.info("Requesting guest token")
         return str(self._call(
             endpoint, None, "POST", False, "https://api.twitter.com",
         )["guest_token"])
@@ -1274,17 +1290,35 @@ class TwitterAPI():
 
             if response.status_code < 400:
                 data = response.json()
-                if not data.get("errors") or not any(
-                        (e.get("message") or "").lower().startswith("timeout")
-                        for e in data["errors"]):
-                    return data  # success or non-timeout errors
 
-                msg = data["errors"][0].get("message") or "Unspecified"
-                self.extractor.log.debug("Internal Twitter error: '%s'", msg)
+                errors = data.get("errors")
+                if not errors:
+                    return data
 
-                if self.headers["x-twitter-auth-type"]:
-                    self.extractor.log.debug("Retrying API request")
-                    continue  # retry
+                retry = False
+                for error in errors:
+                    msg = error.get("message") or "Unspecified"
+                    self.log.debug("API error: '%s'", msg)
+
+                    if "this account is temporarily locked" in msg:
+                        msg = "Account temporarily locked"
+                        if self.extractor.config("locked") != "wait":
+                            raise exception.AuthorizationError(msg)
+                        self.log.warning("%s. Press ENTER to retry.", msg)
+                        try:
+                            input()
+                        except (EOFError, OSError):
+                            pass
+                        retry = True
+
+                    elif msg.lower().startswith("timeout"):
+                        retry = True
+
+                if not retry:
+                    return data
+                elif self.headers["x-twitter-auth-type"]:
+                    self.log.debug("Retrying API request")
+                    continue
 
                 # fall through to "Login Required"
                 response.status_code = 404
@@ -1374,7 +1408,7 @@ class TwitterAPI():
                 try:
                     tweet = tweets[tweet_id]
                 except KeyError:
-                    self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
+                    self.log.debug("Skipping %s (deleted)", tweet_id)
                     continue
 
                 if "retweeted_status_id_str" in tweet:
@@ -1606,8 +1640,10 @@ class TwitterAPI():
             variables["cursor"] = cursor
 
     def _pagination_users(self, endpoint, variables, path=None):
-        params = {"variables": None,
-                  "features" : self._json_dumps(self.features_pagination)}
+        params = {
+            "variables": None,
+            "features" : self._json_dumps(self.features_pagination),
+        }
 
         while True:
             cursor = entry = None
@@ -1651,9 +1687,9 @@ class TwitterAPI():
         if text.startswith("Age-restricted"):
             if self._nsfw_warning:
                 self._nsfw_warning = False
-                self.extractor.log.warning('"%s"', text)
+                self.log.warning('"%s"', text)
 
-        self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
+        self.log.debug("Skipping %s ('%s')", tweet_id, text)
 
 
 @cache(maxage=365*86400, keyarg=1)
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 5374f1c..6dfb23c 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -26,17 +26,39 @@ class VipergirlsExtractor(Extractor):
     cookies_domain = ".vipergirls.to"
     cookies_names = ("vg_userid", "vg_password")
 
+    def _init(self):
+        domain = self.config("domain")
+        if domain:
+            self.root = text.ensure_http_scheme(domain)
+
     def items(self):
         self.login()
+        posts = self.posts()
+
+        like = self.config("like")
+        if like:
+            user_hash = posts[0].get("hash")
+            if len(user_hash) < 16:
+                self.log.warning("Login required to like posts")
+                like = False
 
-        for post in self.posts():
+        posts = posts.iter("post")
+        if self.page:
+            util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+
+        for post in posts:
             data = post.attrib
             data["thread_id"] = self.thread_id
 
             yield Message.Directory, data
+
+            image = None
             for image in post:
                 yield Message.Queue, image.attrib["main_url"], data
 
+            if image is not None and like:
+                self.like(post, user_hash)
+
     def login(self):
         if self.cookies_check(self.cookies_names):
             return
@@ -64,6 +86,17 @@ class VipergirlsExtractor(Extractor):
         return {cookie.name: cookie.value
                 for cookie in response.cookies}
 
+    def like(self, post, user_hash):
+        url = self.root + "/post_thanks.php"
+        params = {
+            "do"           : "post_thanks_add",
+            "p"            : post.get("id"),
+            "securitytoken": user_hash,
+        }
+
+        with self.request(url, params=params, allow_redirects=False):
+            pass
+
 
 class VipergirlsThreadExtractor(VipergirlsExtractor):
     """Extractor for vipergirls threads"""
@@ -77,12 +110,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
 
     def posts(self):
         url = "{}/vr.php?t={}".format(self.root, self.thread_id)
-        root = ElementTree.fromstring(self.request(url).text)
-        posts = root.iter("post")
-
-        if self.page:
-            util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
-        return posts
+        return ElementTree.fromstring(self.request(url).text)
 
 
 class VipergirlsPostExtractor(VipergirlsExtractor):
@@ -95,8 +123,8 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
     def __init__(self, match):
         VipergirlsExtractor.__init__(self, match)
         self.thread_id, self.post_id = match.groups()
+        self.page = 0
 
     def posts(self):
         url = "{}/vr.php?p={}".format(self.root, self.post_id)
-        root = ElementTree.fromstring(self.request(url).text)
-        return root.iter("post")
+        return ElementTree.fromstring(self.request(url).text)
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 3bb635d..e91f45f 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -50,7 +50,7 @@ class WarosuThreadExtractor(Extractor):
         title = text.unescape(text.extr(page, "class=filetitle>", "<"))
         return {
             "board"     : self.board,
-            "board_name": boardname.rpartition(" - ")[2],
+            "board_name": boardname.split(" - ")[1],
             "thread"    : self.thread,
             "title"     : title,
         }
@@ -64,8 +64,7 @@ class WarosuThreadExtractor(Extractor):
     def parse(self, post):
         """Build post object by extracting data from an HTML post"""
         data = self._extract_post(post)
-        if "<span> File:" in post:
-            self._extract_image(post, data)
+        if "<span> File:" in post and self._extract_image(post, data):
             part = data["image"].rpartition("/")[2]
             data["tim"], _, data["extension"] = part.partition(".")
             data["ext"] = "." + data["extension"]
@@ -91,6 +90,11 @@ class WarosuThreadExtractor(Extractor):
             "", "<").rstrip().rpartition(".")[0])
         extr("<br>", "")
 
-        data["image"] = url = extr("<a href=", ">")
-        if url[0] == "/":
-            data["image"] = self.root + url
+        url = extr("<a href=", ">")
+        if url:
+            if url[0] == "/":
+                data["image"] = self.root + url
+            else:
+                data["image"] = url
+            return True
+        return False
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 5b45148..83b1642 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -30,9 +30,9 @@ class WeiboExtractor(Extractor):
         self._prefix, self.user = match.groups()
 
     def _init(self):
-        self.retweets = self.config("retweets", True)
-        self.videos = self.config("videos", True)
         self.livephoto = self.config("livephoto", True)
+        self.retweets = self.config("retweets", False)
+        self.videos = self.config("videos", True)
         self.gifs = self.config("gifs", True)
         self.gifs_video = (self.gifs == "video")
 
@@ -59,15 +59,25 @@ class WeiboExtractor(Extractor):
 
         for status in self.statuses():
 
-            files = []
-            if self.retweets and "retweeted_status" in status:
+            if "ori_mid" in status and not self.retweets:
+                self.log.debug("Skipping %s (快转 retweet)", status["id"])
+                continue
+
+            if "retweeted_status" in status:
+                if not self.retweets:
+                    self.log.debug("Skipping %s (retweet)", status["id"])
+                    continue
+
+                # videos of the original post are in status
+                # images of the original post are in status["retweeted_status"]
+                files = []
+                self._extract_status(status, files)
+                self._extract_status(status["retweeted_status"], files)
+
                 if original_retweets:
                     status = status["retweeted_status"]
-                    self._extract_status(status, files)
-                else:
-                    self._extract_status(status, files)
-                    self._extract_status(status["retweeted_status"], files)
             else:
+                files = []
                 self._extract_status(status, files)
 
             status["date"] = text.parse_datetime(
@@ -118,7 +128,7 @@ class WeiboExtractor(Extractor):
                     append(pic["largest"].copy())
 
                     file = {"url": pic["video"]}
-                    file["filehame"], _, file["extension"] = \
+                    file["filename"], _, file["extension"] = \
                         pic["video"].rpartition("%2F")[2].rpartition(".")
                     append(file)
 
@@ -176,23 +186,34 @@ class WeiboExtractor(Extractor):
 
             data = data["data"]
             statuses = data["list"]
-            if not statuses:
-                return
             yield from statuses
 
-            if "next_cursor" in data:  # videos, newvideo
-                if data["next_cursor"] == -1:
+            # videos, newvideo
+            cursor = data.get("next_cursor")
+            if cursor:
+                if cursor == -1:
                     return
-                params["cursor"] = data["next_cursor"]
-            elif "page" in params:     # home, article
-                params["page"] += 1
-            elif data["since_id"]:     # album
+                params["cursor"] = cursor
+                continue
+
+            # album
+            since_id = data.get("since_id")
+            if since_id:
                 params["sinceid"] = data["since_id"]
-            else:                      # feed, last album page
-                try:
-                    params["since_id"] = statuses[-1]["id"] - 1
-                except KeyError:
+                continue
+
+            # home, article
+            if "page" in params:
+                if not statuses:
                     return
+                params["page"] += 1
+                continue
+
+            # feed, last album page
+            try:
+                params["since_id"] = statuses[-1]["id"] - 1
+            except LookupError:
+                return
 
     def _sina_visitor_system(self, response):
         self.log.info("Sina Visitor System")
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index c93f33f..ac00682 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -162,6 +162,11 @@ BASE_PATTERN = WikimediaExtractor.update({
         "pattern": r"(?:www\.)?pidgi\.net",
         "api-path": "/wiki/api.php",
     },
+    "azurlanewiki": {
+        "root": "https://azurlane.koumakan.jp",
+        "pattern": r"azurlane\.koumakan\.jp",
+        "api-path": "/w/api.php",
+    },
 })
 
 
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 46e574e..da9d6b0 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -11,6 +11,9 @@
 from .common import GalleryExtractor, Extractor, Message
 from .. import text, util
 
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?xvideos\.com"
+                r"/(?:profiles|(?:amateur-|model-)?channels)")
+
 
 class XvideosBase():
     """Base class for xvideos extractors"""
@@ -25,9 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
                      "{gallery[id]} {gallery[title]}")
     filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}"
     archive_fmt = "{gallery[id]}_{num}"
-    pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
-               r"/(?:profiles|amateur-channels|model-channels)"
-               r"/([^/?#]+)/photos/(\d+)")
+    pattern = BASE_PATTERN + r"/([^/?#]+)/photos/(\d+)"
     example = "https://www.xvideos.com/profiles/USER/photos/12345"
 
     def __init__(self, match):
@@ -58,22 +59,35 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
             },
         }
 
-    @staticmethod
-    def images(page):
-        """Return a list of all image urls for this gallery"""
-        return [
+    def images(self, page):
+        results = [
             (url, None)
             for url in text.extract_iter(
                 page, '<a class="embed-responsive-item" href="', '"')
         ]
 
+        if not results:
+            return
+
+        while len(results) % 500 == 0:
+            path = text.rextract(page, ' href="', '"', page.find(">Next</"))[0]
+            if not path:
+                break
+            page = self.request(self.root + path).text
+            results.extend(
+                (url, None)
+                for url in text.extract_iter(
+                    page, '<a class="embed-responsive-item" href="', '"')
+            )
+
+        return results
+
 
 class XvideosUserExtractor(XvideosBase, Extractor):
     """Extractor for user profiles on xvideos.com"""
     subcategory = "user"
     categorytransfer = True
-    pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
-               r"/profiles/([^/?#]+)/?(?:#.*)?$")
+    pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:#.*)?$"
     example = "https://www.xvideos.com/profiles/USER"
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 6ee96e6..fc61dff 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -10,7 +10,7 @@
 
 from .booru import BooruExtractor
 from ..cache import cache
-from .. import text, exception
+from .. import text, util, exception
 
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
 
@@ -21,8 +21,11 @@ class ZerochanExtractor(BooruExtractor):
     root = "https://www.zerochan.net"
     filename_fmt = "{id}.{extension}"
     archive_fmt = "{id}"
+    page_start = 1
+    per_page = 250
     cookies_domain = ".zerochan.net"
     cookies_names = ("z_id", "z_hash")
+    request_interval = (0.5, 1.5)
 
     def login(self):
         self._logged_in = True
@@ -86,7 +89,7 @@ class ZerochanExtractor(BooruExtractor):
 
         return data
 
-    def _parse_entry_json(self, entry_id):
+    def _parse_entry_api(self, entry_id):
         url = "{}/{}?json".format(self.root, entry_id)
         item = self.request(url).json()
 
@@ -117,14 +120,22 @@ class ZerochanTagExtractor(ZerochanExtractor):
         ZerochanExtractor.__init__(self, match)
         self.search_tag, self.query = match.groups()
 
+    def _init(self):
+        if self.config("pagination") == "html":
+            self.posts = self.posts_html
+            self.per_page = 24
+        else:
+            self.posts = self.posts_api
+            self.session.headers["User-Agent"] = util.USERAGENT
+
     def metadata(self):
         return {"search_tags": text.unquote(
             self.search_tag.replace("+", " "))}
 
-    def posts(self):
+    def posts_html(self):
         url = self.root + "/" + self.search_tag
         params = text.parse_query(self.query)
-        params["p"] = text.parse_int(params.get("p"), 1)
+        params["p"] = text.parse_int(params.get("p"), self.page_start)
         metadata = self.config("metadata")
 
         while True:
@@ -140,7 +151,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
                 if metadata:
                     entry_id = extr('href="/', '"')
                     post = self._parse_entry_html(entry_id)
-                    post.update(self._parse_entry_json(entry_id))
+                    post.update(self._parse_entry_api(entry_id))
                     yield post
                 else:
                     yield {
@@ -157,6 +168,41 @@ class ZerochanTagExtractor(ZerochanExtractor):
                 break
             params["p"] += 1
 
+    def posts_api(self):
+        url = self.root + "/" + self.search_tag
+        metadata = self.config("metadata")
+        params = {
+            "json": "1",
+            "l"   : self.per_page,
+            "p"   : self.page_start,
+        }
+
+        static = "https://static.zerochan.net/.full."
+
+        while True:
+            data = self.request(url, params=params).json()
+            try:
+                posts = data["items"]
+            except ValueError:
+                return
+
+            if metadata:
+                for post in posts:
+                    post_id = post["id"]
+                    post.update(self._parse_entry_html(post_id))
+                    post.update(self._parse_entry_api(post_id))
+            else:
+                for post in posts:
+                    base = static + str(post["id"])
+                    post["file_url"] = base + ".jpg"
+                    post["_fallback"] = (base + ".png",)
+
+            yield from posts
+
+            if not data.get("next"):
+                return
+            params["p"] += 1
+
 
 class ZerochanImageExtractor(ZerochanExtractor):
     subcategory = "image"
@@ -170,5 +216,5 @@ class ZerochanImageExtractor(ZerochanExtractor):
     def posts(self):
         post = self._parse_entry_html(self.image_id)
         if self.config("metadata"):
-            post.update(self._parse_entry_json(self.image_id))
+            post.update(self._parse_entry_api(self.image_id))
         return (post,)
author	Unit 193 <unit193@unit193.net>	2024-03-25 02:57:44 -0400
committer	Unit 193 <unit193@unit193.net>	2024-03-25 02:57:44 -0400
commit	6e662211019a89caec44de8a57c675872b0b5498 (patch)
tree	5d9d5a2b7efc3a24dd6074e99b253b639fe5af1d /gallery_dl/extractor
parent	01166fa52707cc282467427cf0e65c1b8983c4be (diff)