New upstream version 1.23.0.upstream/1.23.0

author: Unit 193 <unit193@unit193.net> 2022-08-29 02:17:16 -0400
committer: Unit 193 <unit193@unit193.net> 2022-08-29 02:17:16 -0400
commit: a768930761f7f20587ae40a8cacca0e55c85290a (patch)
tree: 5a4163db912b93fc45f717e5e43fd5be3e66f16c /gallery_dl/extractor
parent: ae2a0f5622beaa6f402526f8a7b939419283a090 (diff)
34 files changed, 638 insertions, 202 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 70cebb3..9e4507a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -26,6 +26,7 @@ modules = [
     "behance",
     "blogger",
     "bunkr",
+    "catbox",
     "comicvine",
     "cyberdrop",
     "danbooru",
@@ -150,6 +151,7 @@ modules = [
     "wikieat",
     "xhamster",
     "xvideos",
+    "zerochan",
     "booru",
     "moebooru",
     "foolfuuka",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 19b9d97..c0e8e67 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -32,9 +32,11 @@ class ArtstationExtractor(Extractor):
         data = self.metadata()
 
         for project in self.projects():
-            for asset in self.get_project_assets(project["hash_id"]):
+            for num, asset in enumerate(
+                    self.get_project_assets(project["hash_id"]), 1):
                 asset.update(data)
                 adict = asset["asset"]
+                asset["num"] = num
                 yield Message.Directory, asset
 
                 if adict["has_embedded_player"] and self.external:
@@ -85,6 +87,7 @@ class ArtstationExtractor(Extractor):
         assets = data["assets"]
         del data["assets"]
 
+        data["count"] = len(assets)
         if len(assets) == 1:
             data["asset"] = assets[0]
             yield data
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 21ca991..e0885d2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -67,9 +67,6 @@ class BloggerExtractor(Extractor):
                         key=lambda x: x["format_id"],
                     )["play_url"])
 
-            if not files:
-                continue
-
             post["author"] = post["author"]["displayName"]
             post["replies"] = post["replies"]["totalItems"]
             post["content"] = text.remove_html(content)
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 9904d0a..3091f57 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -16,10 +16,10 @@ import json
 class BunkrAlbumExtractor(LolisafeAlbumExtractor):
     """Extractor for bunkr.is albums"""
     category = "bunkr"
-    root = "https://app.bunkr.is"
+    root = "https://bunkr.is"
     pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
     test = (
-        ("https://app.bunkr.is/a/Lktg9Keq", {
+        ("https://bunkr.is/a/Lktg9Keq", {
             "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
             "content": "0c8768055e4e20e7c7259608b67799171b691140",
             "keyword": {
@@ -33,7 +33,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             },
         }),
         # mp4 (#2239)
-        ("https://bunkr.is/a/ptRHaCn2", {
+        ("https://app.bunkr.is/a/ptRHaCn2", {
             "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
             "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
         }),
@@ -70,16 +70,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             album = props["album"]
             files = props["files"]
         except Exception as exc:
-            self.log.debug(exc)
+            self.log.debug(exc.__class__.__name__, exc)
             self.root = self.root.replace("bunkr", "app.bunkr", 1)
             return self._fetch_album_api(album_id)
 
         for file in files:
             name = file["name"]
+            cdn = file["cdn"]
             if name.endswith(".mp4"):
-                file["file"] = "https://media-files.bunkr.is/" + name
-            else:
-                file["file"] = file["cdn"] + "/" + name
+                cdn = cdn.replace("//cdn", "//media-files")
+            file["file"] = cdn + "/" + name
 
         return files, {
             "album_id"   : self.album_id,
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py
new file mode 100644
index 0000000..509108f
--- /dev/null
+++ b/gallery_dl/extractor/catbox.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://catbox.moe/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class CatboxAlbumExtractor(GalleryExtractor):
+    """Extractor for catbox albums"""
+    category = "catbox"
+    subcategory = "album"
+    root = "https://catbox.moe"
+    filename_fmt = "{filename}.{extension}"
+    directory_fmt = ("{category}", "{album_name} ({album_id})")
+    archive_fmt = "{album_id}_{filename}"
+    pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)"
+    test = (
+        ("https://catbox.moe/c/1igcbe", {
+            "url": "35866a88c29462814f103bc22ec031eaeb380f8a",
+            "content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32",
+            "pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$",
+            "count": 3,
+            "keyword": {
+                "album_id": "1igcbe",
+                "album_name": "test",
+                "date": "dt:2022-08-18 00:00:00",
+                "description": "album test &>",
+            },
+        }),
+        ("https://www.catbox.moe/c/cd90s1"),
+        ("https://catbox.moe/c/w7tm47#"),
+    )
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        return {
+            "album_id"   : self.gallery_url.rpartition("/")[2],
+            "album_name" : text.unescape(extr("<h1>", "<")),
+            "date"       : text.parse_datetime(extr(
+                "<p>Created ", "<"), "%B %d %Y"),
+            "description": text.unescape(extr("<p>", "<")),
+        }
+
+    def images(self, page):
+        return [
+            ("https://files.catbox.moe/" + path, None)
+            for path in text.extract_iter(
+                page, ">https://files.catbox.moe/", "<")
+        ]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 6ccae7f..1b41101 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -795,12 +795,23 @@ SSL_CIPHERS = {
 }
 
 
+urllib3 = requests.packages.urllib3
+
 # detect brotli support
 try:
-    BROTLI = requests.packages.urllib3.response.brotli is not None
+    BROTLI = urllib3.response.brotli is not None
 except AttributeError:
     BROTLI = False
 
+# set (urllib3) warnings filter
+action = config.get((), "warnings", "default")
+if action:
+    try:
+        import warnings
+        warnings.simplefilter(action, urllib3.exceptions.HTTPWarning)
+    except Exception:
+        pass
+del action
 
 # Undo automatic pyOpenSSL injection by requests
 pyopenssl = config.get((), "pyopenssl", False)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ec0db68..8c2ed53 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -34,6 +34,7 @@ class DanbooruExtractor(BaseExtractor):
         self.per_page = iget("per-page", 200)
         self.request_interval_min = iget("request-interval-min", 0.0)
         self._pools = iget("pools")
+        self._popular_endpoint = iget("popular", "/explore/posts/popular.json")
 
         BaseExtractor.__init__(self, match)
 
@@ -150,6 +151,7 @@ INSTANCES = {
         "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format(
             __version__)},
         "pools": "sort",
+        "popular": "/popular.json",
         "page-limit": 750,
         "per-page": 320,
         "request-interval-min": 1.0,
@@ -308,7 +310,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
     subcategory = "popular"
     directory_fmt = ("{category}", "popular", "{scale}", "{date}")
     archive_fmt = "P_{scale[0]}_{date}_{id}"
-    pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
+    pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
     test = (
         ("https://danbooru.donmai.us/explore/posts/popular"),
         (("https://danbooru.donmai.us/explore/posts/popular"
@@ -316,7 +318,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
             "range": "1-120",
             "count": 120,
         }),
-        ("https://e621.net/explore/posts/popular"),
+        ("https://e621.net/popular"),
         (("https://e621.net/explore/posts/popular"
           "?date=2019-06-01&scale=month"), {
             "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
@@ -345,8 +347,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
     def posts(self):
         if self.page_start is None:
             self.page_start = 1
-        return self._pagination(
-            "/explore/posts/popular.json", self.params, True)
+        return self._pagination(self._popular_endpoint, self.params, True)
 
 
 class DanbooruFavoriteExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 39ae484..60f644d 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1128,11 +1128,18 @@ class DeviantartOAuthAPI():
             self._folders((deviation,))
         return deviation
 
-    def deviation_content(self, deviation_id, public=False):
+    def deviation_content(self, deviation_id, public=True):
         """Get extended content of a single Deviation"""
         endpoint = "/deviation/content"
         params = {"deviationid": deviation_id}
-        return self._call(endpoint, params=params, public=public)
+        content = self._call(endpoint, params=params, public=public)
+        if public and content["html"].startswith(
+                '        <span class=\"username-with-symbol'):
+            if self.refresh_token_key:
+                content = self._call(endpoint, params=params, public=False)
+            else:
+                self.log.warning("Private Journal")
+        return content
 
     def deviation_download(self, deviation_id, public=True):
         """Get the original file download (if allowed)"""
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 11436cb..8481248 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -6,6 +6,7 @@
 
 """Extractors for https://www.fanbox.cc/"""
 
+import re
 from .common import Extractor, Message
 from .. import text
 
@@ -78,6 +79,7 @@ class FanboxExtractor(Extractor):
         num = 0
         cover_image = post.get("coverImageUrl")
         if cover_image:
+            cover_image = re.sub("/c/[0-9a-z_]+", "", cover_image)
             final_post = post.copy()
             final_post["isCoverImage"] = True
             final_post["fileUrl"] = cover_image
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 34b52ef..5e6da5b 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -88,9 +88,13 @@ BASE_PATTERN = FoolfuukaExtractor.update({
         "root": "https://boards.fireden.net",
         "pattern": r"boards\.fireden\.net",
     },
-    "nyafuu": {
-        "root": "https://archive.nyafuu.org",
-        "pattern": r"(?:archive\.)?nyafuu\.org",
+    "rozenarcana": {
+        "root": "https://archive.alice.al",
+        "pattern": r"(?:archive\.)?alice\.al",
+    },
+    "tokyochronos": {
+        "root": "https://www.tokyochronos.net",
+        "pattern": r"(?:www\.)?tokyochronos\.net",
     },
     "rbt": {
         "root": "https://rbt.asia",
@@ -111,7 +115,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
     pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
     test = (
         ("https://archive.4plebs.org/tg/thread/54059290", {
-            "url": "07452944164b602502b02b24521f8cee5c484d2a",
+            "url": "fd823f17b5001442b941fddcd9ec91bafedfbc79",
         }),
         ("https://archived.moe/gd/thread/309639/", {
             "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
@@ -133,8 +137,11 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
         ("https://boards.fireden.net/sci/thread/11264294/", {
             "url": "61cab625c95584a12a30049d054931d64f8d20aa",
         }),
-        ("https://archive.nyafuu.org/c/thread/2849220/", {
-            "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
+        ("https://archive.alice.al/c/thread/2849220/", {
+            "url": "632e2c8de05de6b3847685f4bf1b4e5c6c9e0ed5",
+        }),
+        ("https://www.tokyochronos.net/a/thread/241664141/", {
+            "url": "ae03852cf44e3dcfce5be70274cb1828e1dbb7d6",
         }),
         ("https://rbt.asia/g/thread/61487650/", {
             "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
@@ -180,7 +187,8 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
         ("https://arch.b4k.co/meta/"),
         ("https://desuarchive.org/a/"),
         ("https://boards.fireden.net/sci/"),
-        ("https://archive.nyafuu.org/c/"),
+        ("https://archive.alice.al/c/"),
+        ("https://www.tokyochronos.net/a/"),
         ("https://rbt.asia/g/"),
         ("https://thebarchive.com/b/"),
     )
@@ -223,7 +231,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
         ("https://archiveofsins.com/_/search/text/test/"),
         ("https://desuarchive.org/_/search/text/test/"),
         ("https://boards.fireden.net/_/search/text/test/"),
-        ("https://archive.nyafuu.org/_/search/text/test/"),
+        ("https://archive.alice.al/_/search/text/test/"),
+        ("https://www.tokyochronos.net/_/search/text/test/"),
         ("https://rbt.asia/_/search/text/test/"),
         ("https://thebarchive.com/_/search/text/test/"),
     )
@@ -288,7 +297,8 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
         ("https://arch.b4k.co/meta/gallery/"),
         ("https://desuarchive.org/a/gallery/5"),
         ("https://boards.fireden.net/sci/gallery/6"),
-        ("https://archive.nyafuu.org/c/gallery/7"),
+        ("https://archive.alice.al/c/gallery/7"),
+        ("https://www.tokyochronos.net/a/gallery/7"),
         ("https://rbt.asia/g/gallery/8"),
         ("https://thebarchive.com/b/gallery/9"),
     )
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index e8bee37..92f7ac2 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -10,7 +10,7 @@
 
 from .common import Extractor, Message
 from . import gelbooru_v02
-from .. import text, util, exception
+from .. import text, exception
 import binascii
 
 
@@ -21,10 +21,15 @@ class GelbooruBase():
     root = "https://gelbooru.com"
 
     def _api_request(self, params):
+        params["api_key"] = self.api_key
+        params["user_id"] = self.user_id
+
         url = self.root + "/index.php?page=dapi&s=post&q=index&json=1"
         data = self.request(url, params=params).json()
+
         if "post" not in data:
             return ()
+
         posts = data["post"]
         if not isinstance(posts, list):
             return (posts,)
@@ -85,28 +90,29 @@ class GelbooruTagExtractor(GelbooruBase,
 
 class GelbooruPoolExtractor(GelbooruBase,
                             gelbooru_v02.GelbooruV02PoolExtractor):
-    """Extractor for image-pools from gelbooru.com"""
+    """Extractor for gelbooru pools"""
+    per_page = 45
     pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
                r"\?page=pool&s=show&id=(?P<pool>\d+)")
     test = (
         ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
             "count": 6,
         }),
-        ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
-            "options": (("api", False),),
-            "count": 6,
-        }),
     )
 
     def metadata(self):
-        url = "{}/index.php?page=pool&s=show&id={}".format(
-            self.root, self.pool_id)
-        page = self.request(url).text
+        url = self.root + "/index.php"
+        self._params = {
+            "page": "pool",
+            "s"   : "show",
+            "id"  : self.pool_id,
+            "pid" : self.page_start,
+        }
+        self._page = self.request(url, params=self._params).text
 
-        name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
+        name, pos = text.extract(self._page, "<h3>Now Viewing: ", "</h3>")
         if not name:
             raise exception.NotFoundError("pool")
-        self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
 
         return {
             "pool": text.parse_int(self.pool_id),
@@ -114,9 +120,23 @@ class GelbooruPoolExtractor(GelbooruBase,
         }
 
     def posts(self):
-        params = {}
-        for params["id"] in util.advance(self.post_ids, self.page_start):
-            yield from self._api_request(params)
+        url = self.root + "/index.php"
+        params = self._params
+
+        page = self._page
+        del self._page
+        data = {}
+
+        while True:
+            num_ids = 0
+            for data["id"] in text.extract_iter(page, '" id="p', '"'):
+                num_ids += 1
+                yield from self._api_request(data)
+
+            if num_ids < self.per_page:
+                return
+            params["pid"] += self.per_page
+            page = self.request(url, params=params).text
 
 
 class GelbooruPostExtractor(GelbooruBase,
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 35a3448..8214614 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -21,6 +21,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
 
     def __init__(self, match):
         booru.BooruExtractor.__init__(self, match)
+        self.api_key = self.config("api-key")
+        self.user_id = self.config("user-id")
+
         try:
             self.api_root = INSTANCES[self.category]["api_root"]
         except KeyError:
@@ -59,6 +62,24 @@ class GelbooruV02Extractor(booru.BooruExtractor):
                 return
             params["pid"] += 1
 
+    def _pagination_html(self, params):
+        url = self.root + "/index.php"
+        params["pid"] = self.page_start * self.per_page
+
+        data = {}
+        while True:
+            num_ids = 0
+            page = self.request(url, params=params).text
+
+            for data["id"] in text.extract_iter(page, '" id="p', '"'):
+                num_ids += 1
+                for post in self._api_request(data):
+                    yield post.attrib
+
+            if num_ids < self.per_page:
+                return
+            params["pid"] += self.per_page
+
     @staticmethod
     def _prepare(post):
         post["date"] = text.parse_datetime(
@@ -204,7 +225,12 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
     def __init__(self, match):
         GelbooruV02Extractor.__init__(self, match)
         self.pool_id = match.group(match.lastindex)
-        self.post_ids = ()
+
+        if self.category == "rule34":
+            self.posts = self._posts_pages
+            self.per_page = 45
+        else:
+            self.post_ids = ()
 
     def skip(self, num):
         self.page_start += num
@@ -232,6 +258,13 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
             for post in self._api_request(params):
                 yield post.attrib
 
+    def _posts_pages(self):
+        return self._pagination_html({
+            "page": "pool",
+            "s"   : "show",
+            "id"  : self.pool_id,
+        })
+
 
 class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
     subcategory = "favorite"
@@ -265,27 +298,11 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
         return {"favorite_id": text.parse_int(self.favorite_id)}
 
     def posts(self):
-        url = self.root + "/index.php"
-        params = {
+        return self._pagination_html({
             "page": "favorites",
             "s"   : "view",
             "id"  : self.favorite_id,
-            "pid" : self.page_start * self.per_page,
-        }
-
-        data = {}
-        while True:
-            num_ids = 0
-            page = self.request(url, params=params).text
-
-            for data["id"] in text.extract_iter(page, '" id="p', '"'):
-                num_ids += 1
-                for post in self._api_request(data):
-                    yield post.attrib
-
-            if num_ids < self.per_page:
-                return
-            params["pid"] += self.per_page
+        })
 
 
 class GelbooruV02PostExtractor(GelbooruV02Extractor):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index ca7e692..f8b0c3b 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -174,23 +174,27 @@ class HitomiTagExtractor(Extractor):
         }
 
         offset = 0
+        total = None
         while True:
             headers["Referer"] = "{}/{}/{}.html?page={}".format(
                 self.root, self.type, self.tag, offset // 100 + 1)
             headers["Range"] = "bytes={}-{}".format(offset, offset+99)
-            nozomi = self.request(nozomi_url, headers=headers).content
+            response = self.request(nozomi_url, headers=headers)
 
-            for gallery_id in decode_nozomi(nozomi):
+            for gallery_id in decode_nozomi(response.content):
                 gallery_url = "{}/galleries/{}.html".format(
                     self.root, gallery_id)
                 yield Message.Queue, gallery_url, data
 
-            if len(nozomi) < 100:
-                return
             offset += 100
+            if total is None:
+                total = text.parse_int(
+                    response.headers["content-range"].rpartition("/")[2])
+            if offset >= total:
+                return
 
 
-@memcache()
+@memcache(maxage=1800)
 def _parse_gg(extr):
     page = extr.request("https://ltn.hitomi.la/gg.js").text
 
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 4a2c3bb..d56af8b 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -270,6 +270,7 @@ class InstagramExtractor(Extractor):
                 "post_shortcode": post["code"],
                 "likes": post["like_count"],
                 "pinned": post.get("timeline_pinned_user_ids", ()),
+                "date": text.parse_timestamp(post.get("taken_at")),
             }
 
             caption = post["caption"]
@@ -399,6 +400,8 @@ class InstagramExtractor(Extractor):
             self.log.debug("Cursor: %s", self._cursor)
 
     def _pagination_api(self, endpoint, params=None):
+        if params is None:
+            params = {}
         while True:
             data = self._request_api(endpoint, params=params)
             yield from data["items"]
@@ -509,7 +512,7 @@ class InstagramChannelExtractor(InstagramExtractor):
 class InstagramSavedExtractor(InstagramExtractor):
     """Extractor for ProfilePage saved media"""
     subcategory = "saved"
-    pattern = USER_PATTERN + r"/saved"
+    pattern = USER_PATTERN + r"/saved/?$"
     test = ("https://www.instagram.com/instagram/saved/",)
 
     def posts(self):
@@ -518,6 +521,30 @@ class InstagramSavedExtractor(InstagramExtractor):
         return self._pagination_graphql(query_hash, variables)
 
 
+class InstagramCollectionExtractor(InstagramExtractor):
+    """Extractor for ProfilePage saved collection media"""
+    subcategory = "collection"
+    pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
+    test = (
+        "https://www.instagram.com/instagram/saved/collection_name/123456789/",
+    )
+
+    def __init__(self, match):
+        InstagramExtractor.__init__(self, match)
+        self.user, self.collection_name, self.collection_id = match.groups()
+
+    def metadata(self):
+        return {
+            "collection_id"  : self.collection_id,
+            "collection_name": text.unescape(self.collection_name),
+        }
+
+    def posts(self):
+        endpoint = "/v1/feed/collection/{}/posts/".format(self.collection_id)
+        for item in self._pagination_api(endpoint):
+            yield item["media"]
+
+
 class InstagramTagExtractor(InstagramExtractor):
     """Extractor for TagPage"""
     subcategory = "tag"
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 6b2cf4c..00a32cd 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -101,9 +101,9 @@ class ItakuImageExtractor(ItakuExtractor):
                             "/gallery_imgs/220504_oUNIAFT/xl.jpg",
                 "liked_by_you": False,
                 "maturity_rating": "SFW",
-                "num_comments": 2,
-                "num_likes": 80,
-                "num_reshares": 2,
+                "num_comments": int,
+                "num_likes": int,
+                "num_reshares": int,
                 "obj_tags": 136446,
                 "owner": 16775,
                 "owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
@@ -115,8 +115,9 @@ class ItakuImageExtractor(ItakuExtractor):
                 "tags": list,
                 "tags_character": ["hatsune_miku"],
                 "tags_copyright": ["vocaloid"],
-                "tags_general"  : ["twintails", "green_hair", "flag", "gloves",
-                                   "green_eyes", "female", "racing_miku"],
+                "tags_general"  : ["female", "green_eyes", "twintails",
+                                   "green_hair", "gloves", "flag",
+                                   "racing_miku"],
                 "title": "Racing Miku 2022 Ver.",
                 "too_mature": False,
                 "uncompressed_filesize": "0.62",
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index f1eb79f..816b561 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -440,20 +440,44 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
 class KemonopartyFavoriteExtractor(KemonopartyExtractor):
     """Extractor for kemono.party favorites"""
     subcategory = "favorite"
-    pattern = BASE_PATTERN + r"/favorites"
-    test = ("https://kemono.party/favorites", {
-        "pattern": KemonopartyUserExtractor.pattern,
-        "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
-        "count": 3,
-    })
+    pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
+    test = (
+        ("https://kemono.party/favorites", {
+            "pattern": KemonopartyUserExtractor.pattern,
+            "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
+            "count": 3,
+        }),
+        ("https://kemono.party/favorites?type=post", {
+            "pattern": KemonopartyPostExtractor.pattern,
+            "url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
+            "count": 3,
+        }),
+    )
+
+    def __init__(self, match):
+        KemonopartyExtractor.__init__(self, match)
+        self.favorites = (text.parse_query(match.group(2)).get("type") or
+                          self.config("favorites") or
+                          "artist")
 
     def items(self):
         self._prepare_ddosguard_cookies()
         self.login()
 
-        users = self.request(self.root + "/api/favorites").json()
-        for user in users:
-            user["_extractor"] = KemonopartyUserExtractor
-            url = "{}/{}/user/{}".format(
-                self.root, user["service"], user["id"])
-            yield Message.Queue, url, user
+        if self.favorites == "artist":
+            users = self.request(
+                self.root + "/api/v1/account/favorites?type=artist").json()
+            for user in users:
+                user["_extractor"] = KemonopartyUserExtractor
+                url = "{}/{}/user/{}".format(
+                    self.root, user["service"], user["id"])
+                yield Message.Queue, url, user
+
+        elif self.favorites == "post":
+            posts = self.request(
+                self.root + "/api/v1/account/favorites?type=post").json()
+            for post in posts:
+                post["_extractor"] = KemonopartyPostExtractor
+                url = "{}/{}/user/{}/post/{}".format(
+                    self.root, post["service"], post["user"], post["id"])
+                yield Message.Queue, url, post
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index b5db3dd..57db0c9 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -49,7 +49,9 @@ class LusciousAlbumExtractor(LusciousExtractor):
                r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
     test = (
         ("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
-            "url": "7e4984a271a1072ac6483e4228a045895aff86f3",
+            "pattern": r"https://storage\.bhs\.cloud\.ovh\.net/v1/AUTH_\w+"
+                       r"/images/NTRshouldbeillegal/277031"
+                       r"/luscious_net_\d+_\d+\.jpg$",
             #  "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
             "keyword": {
                 "album": {
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 6e780e8..493a8ef 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -179,12 +179,11 @@ class MastodonAPI():
             try:
                 access_token = INSTANCES[extractor.category]["access-token"]
             except (KeyError, TypeError):
-                raise exception.StopExtraction(
-                    "Missing access token.\n"
-                    "Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
-                    extractor.instance)
-
-        self.headers = {"Authorization": "Bearer " + access_token}
+                pass
+        if access_token:
+            self.headers = {"Authorization": "Bearer " + access_token}
+        else:
+            self.headers = None
 
     def account_id_by_username(self, username):
         if username.startswith("id:"):
@@ -232,6 +231,11 @@ class MastodonAPI():
 
             if code < 400:
                 return response
+            if code == 401:
+                raise exception.StopExtraction(
+                    "Invalid or missing access token.\n"
+                    "Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
+                    self.extractor.instance)
             if code == 404:
                 raise exception.NotFoundError()
             if code == 429:
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 122ea46..2c8e72c 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -126,7 +126,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
             username, password = self._get_auth_info()
             self._update_cookies(self._login_impl(username, password))
 
-    @cache(maxage=150*24*3600, keyarg=1)
+    @cache(maxage=90*24*3600, keyarg=1)
     def _login_impl(self, username, password):
         if not username or not password:
             raise exception.AuthenticationError(
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 653822f..d6628c4 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -41,7 +41,8 @@ class OAuthBase(Extractor):
         stdout_write("Waiting for response. (Cancel with Ctrl+c)\n")
         server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        server.bind(("localhost", self.config("port", 6414)))
+        server.bind((self.config("host", "localhost"),
+                     self.config("port", 6414)))
         server.listen(1)
 
         # workaround for ctrl+c not working during server.accept on Windows
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index fba1312..225f0ff 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -122,7 +122,7 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
                 "tag_ids": list,
                 "tags": list,
                 "thumbnails_generated": True,
-                "updated_at": "2022-04-25T09:30:57Z",
+                "updated_at": r"re:\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ",
                 "uploader": "Clover the Clever",
                 "uploader_id": 211188,
                 "upvotes": int,
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index e1846cc..8203885 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -51,13 +51,13 @@ class PoipikuExtractor(Extractor):
                 thumb = extr('class="IllustItemThumbImg" src="', '"')
                 if not thumb:
                     break
-                elif thumb.startswith("/img/"):
+                elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
                     continue
                 post["num"] += 1
                 url = text.ensure_http_scheme(thumb[:-8])
                 yield Message.Url, url, text.nameext_from_url(url, post)
 
-            if not extr('</i> show all', '<'):
+            if not extr('> show all', '<'):
                 continue
 
             url = self.root + "/f/ShowAppendFileF.jsp"
@@ -131,7 +131,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
     pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
     test = (
         ("https://poipiku.com/25049/5864576.html", {
-            "pattern": r"https://img\.poipiku\.com/user_img03/000025049"
+            "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
                        r"/005864576_EWN1Y65gQ\.png$",
             "keyword": {
                 "count": "1",
@@ -146,7 +146,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
             },
         }),
         ("https://poipiku.com/2166245/6411749.html", {
-            "pattern": r"https://img\.poipiku\.com/user_img01/002166245"
+            "pattern": r"https://img\.poipiku\.com/user_img\d+/002166245"
                        r"/006411749_\w+\.jpeg$",
             "count": 4,
             "keyword": {
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 6dfc907..cd8c238 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -16,13 +16,14 @@ class SkebExtractor(Extractor):
     category = "skeb"
     directory_fmt = ("{category}", "{creator[screen_name]}")
     filename_fmt = "{post_num}_{file_id}.{extension}"
-    archive_fmt = "{post_num}_{file_id}_{content_category}"
+    archive_fmt = "{post_num}_{_file_id}_{content_category}"
     root = "https://skeb.jp"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user_name = match.group(1)
         self.thumbnails = self.config("thumbnails", False)
+        self.article = self.config("article", False)
 
     def items(self):
         for user_name, post_num in self.posts():
@@ -64,6 +65,7 @@ class SkebExtractor(Extractor):
         resp = self.request(url, headers=headers).json()
         creator = resp["creator"]
         post = {
+            "post_id"          : resp["id"],
             "post_num"         : post_num,
             "post_url"         : self.root + resp["path"],
             "body"             : resp["body"],
@@ -102,12 +104,22 @@ class SkebExtractor(Extractor):
         if self.thumbnails and "og_image_url" in resp:
             post["content_category"] = "thumb"
             post["file_id"] = "thumb"
+            post["_file_id"] = str(resp["id"]) + "t"
             post["file_url"] = resp["og_image_url"]
             yield post
 
+        if self.article and "article_image_url" in resp:
+            url = resp["article_image_url"]
+            if url:
+                post["content_category"] = "article"
+                post["file_id"] = "article"
+                post["_file_id"] = str(resp["id"]) + "a"
+                post["file_url"] = url
+                yield post
+
         for preview in resp["previews"]:
             post["content_category"] = "preview"
-            post["file_id"] = preview["id"]
+            post["file_id"] = post["_file_id"] = preview["id"]
             post["file_url"] = preview["url"]
             info = preview["information"]
             post["original"] = {
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index b0b8f3b..506db26 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -59,7 +59,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
         # mobile URL
         (("https://www.slideshare.net"
           "/mobile/uqudent/introduction-to-fixed-prosthodontics"), {
-            "url": "59993ad7b0cb93c73011547eedcd02c622649e9d",
+            "url": "43eda2adf4dd221a251c8df794dfb82649e94647",
         }),
     )
 
@@ -72,14 +72,14 @@ class SlidesharePresentationExtractor(GalleryExtractor):
     def metadata(self, page):
         extr = text.extract_from(page)
         descr = extr('<meta name="description" content="', '"')
-        title = extr('<span class="j-title-breadcrumb">', '</span>')
-        published = extr('<div class="metadata-item">', '</div>')
         comments = extr('content="UserComments:', '"')
         likes = extr('content="UserLikes:', '"')
         views = extr('content="UserPageVisits:', '"')
+        title = extr('<span class="j-title-breadcrumb">', '</span>')
+        published = extr('<div class="metadata-item">', '</div>')
 
         if descr.endswith("…"):
-            alt_descr = extr('id="slideshow-description-text"', '</p>')
+            alt_descr = extr('slideshow-description-text"', '</p>')
             if alt_descr:
                 descr = text.remove_html(alt_descr.partition(">")[2]).strip()
 
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 98e914e..4010da3 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
     test = (
         ("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
             "url": "e6408fd2c64e721fd146130dceb56a971ceb4259",
-            "keyword": "460a773f5addadd3e216bda346fc524fe4eedc52",
+            "keyword": "b31a63d07c9c26eb0f79f52d60d171a98938f99b",
             "content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
         }),
         # video
         ("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
             "url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
-            "keyword": "eb74e5cf6780d5152ab8f11b431ec1b17fa8f69b",
+            "keyword": "4cef98133ace511adc874c9d9abac5817ba0d856",
         }),
     )
 
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index fcdf18f..545a95b 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -108,7 +108,7 @@ class TapasSeriesExtractor(TapasExtractor):
     test = (
         ("https://tapas.io/series/just-leave-me-be", {
             "pattern": r"https://\w+\.cloudfront\.net/pc/\w\w/[0-9a-f-]+\.jpg",
-            "count": 127,
+            "count": 132,
         }),
         ("https://tapas.io/series/yona", {  # mature
             "count": 26,
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index ded7fd1..b694fa0 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -64,6 +64,7 @@ class TumblrExtractor(Extractor):
         self.inline = self.config("inline", True)
         self.reblogs = self.config("reblogs", True)
         self.external = self.config("external", False)
+        self.original = self.config("original", True)
 
         if len(self.types) == 1:
             self.api.posts_type = next(iter(self.types))
@@ -101,8 +102,7 @@ class TumblrExtractor(Extractor):
                 del post["trail"]
             post["blog"] = blog
             post["date"] = text.parse_timestamp(post["timestamp"])
-            yield Message.Directory, post
-            post["num"] = 0
+            posts = []
 
             if "photos" in post:  # type "photo" or "link"
                 photos = post["photos"]
@@ -110,18 +110,31 @@ class TumblrExtractor(Extractor):
 
                 for photo in photos:
                     post["photo"] = photo
-                    photo.update(photo["original_size"])
+
+                    best_photo = photo["original_size"]
+                    for alt_photo in photo["alt_sizes"]:
+                        if (alt_photo["height"] > best_photo["height"] or
+                                alt_photo["width"] > best_photo["width"]):
+                            best_photo = alt_photo
+                    photo.update(best_photo)
+
+                    if self.original and "/s2048x3072/" in photo["url"] and (
+                            photo["width"] == 2048 or photo["height"] == 3072):
+                        photo["url"] = self._original_image(photo["url"])
+
                     del photo["original_size"]
                     del photo["alt_sizes"]
-                    yield self._prepare_image(photo["url"], post)
+                    posts.append(
+                        self._prepare_image(photo["url"], post.copy()))
+                    del post["photo"]
 
             url = post.get("audio_url")  # type "audio"
             if url and url.startswith("https://a.tumblr.com/"):
-                yield self._prepare(url, post)
+                posts.append(self._prepare(url, post.copy()))
 
             url = post.get("video_url")  # type "video"
             if url:
-                yield self._prepare(_original_video(url), post)
+                posts.append(self._prepare(_original_video(url), post.copy()))
 
             if self.inline and "reblog" in post:  # inline media
                 # only "chat" posts are missing a "reblog" key in their
@@ -129,16 +142,25 @@ class TumblrExtractor(Extractor):
                 body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
                 for url in re.findall('<img src="([^"]+)"', body):
                     url = _original_inline_image(url)
-                    yield self._prepare_image(url, post)
+                    posts.append(self._prepare_image(url, post.copy()))
                 for url in re.findall('<source src="([^"]+)"', body):
                     url = _original_video(url)
-                    yield self._prepare(url, post)
+                    posts.append(self._prepare(url, post.copy()))
 
             if self.external:  # external links
-                post["extension"] = None
                 url = post.get("permalink_url") or post.get("url")
                 if url:
-                    yield Message.Queue, url, post
+                    post["extension"] = None
+                    posts.append((Message.Queue, url, post.copy()))
+                    del post["extension"]
+
+            post["count"] = len(posts)
+            yield Message.Directory, post
+
+            for num, (msg, url, post) in enumerate(posts, 1):
+                post["num"] = num
+                post["count"] = len(posts)
+                yield msg, url, post
 
     def posts(self):
         """Return an iterable containing all relevant posts"""
@@ -167,14 +189,12 @@ class TumblrExtractor(Extractor):
     @staticmethod
     def _prepare(url, post):
         text.nameext_from_url(url, post)
-        post["num"] += 1
         post["hash"] = post["filename"].partition("_")[2]
         return Message.Url, url, post
 
     @staticmethod
     def _prepare_image(url, post):
         text.nameext_from_url(url, post)
-        post["num"] += 1
 
         parts = post["filename"].split("_")
         try:
@@ -188,7 +208,7 @@ class TumblrExtractor(Extractor):
     @staticmethod
     def _prepare_avatar(url, post, blog):
         text.nameext_from_url(url, post)
-        post["num"] = 1
+        post["num"] = post["count"] = 1
         post["blog"] = blog
         post["reblogged"] = False
         post["type"] = post["id"] = post["hash"] = "avatar"
@@ -200,6 +220,12 @@ class TumblrExtractor(Extractor):
     def _skip_reblog_same_blog(self, post):
         return self.blog != post.get("reblogged_root_uuid")
 
+    def _original_image(self, url):
+        url = url.replace("/s2048x3072/", "/s99999x99999/", 1)
+        headers = {"Accept": "text/html,*/*;q=0.8"}
+        response = self.request(url, headers=headers)
+        return text.extract(response.text, '" src="', '"')[0]
+
 
 class TumblrUserExtractor(TumblrExtractor):
     """Extractor for all images from a tumblr-user"""
@@ -279,6 +305,12 @@ class TumblrPostExtractor(TumblrExtractor):
         ("https://mikf123.tumblr.com/post/181022380064/chat-post", {
             "count": 0,
         }),
+        ("https://mikf123.tumblr.com/image/689860196535762944", {
+            "pattern": r"^https://\d+\.media\.tumblr\.com"
+                       r"/134791621559a79793563b636b5fe2c6"
+                       r"/8f1131551cef6e74-bc/s99999x99999"
+                       r"/188cf9b8915b0d0911c6c743d152fc62e8f38491\.png$",
+        }),
         ("http://ziemniax.tumblr.com/post/109697912859/", {
             "exception": exception.NotFoundError,  # HTML response (#297)
         }),
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 36b4806..0df4ea2 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,6 +11,7 @@
 from .common import Extractor, Message
 from .. import text, util, exception
 from ..cache import cache
+import itertools
 import json
 
 BASE_PATTERN = (
@@ -40,7 +41,7 @@ class TwitterExtractor(Extractor):
         self.quoted = self.config("quoted", False)
         self.videos = self.config("videos", True)
         self.cards = self.config("cards", False)
-        self._user_id = None
+        self._user = self._user_obj = None
         self._user_cache = {}
         self._init_sizes()
 
@@ -90,8 +91,9 @@ class TwitterExtractor(Extractor):
             if "in_reply_to_user_id_str" in data and (
                 not self.replies or (
                     self.replies == "self" and
-                    (self._user_id or data["in_reply_to_user_id_str"]) !=
-                    data["user_id_str"]
+                    data["user_id_str"] !=
+                    (self._user_obj["rest_id"] if self._user else
+                     data["in_reply_to_user_id_str"])
                 )
             ):
                 self.log.debug("Skipping %s (reply)", data["id_str"])
@@ -229,11 +231,13 @@ class TwitterExtractor(Extractor):
                     files.append({"url": url})
 
     def _transform_tweet(self, tweet):
-        if "core" in tweet:
-            user = self._transform_user(
-                tweet["core"]["user_results"]["result"])
+        if "author" in tweet:
+            author = tweet["author"]
+        elif "core" in tweet:
+            author = tweet["core"]["user_results"]["result"]
         else:
-            user = self._transform_user(tweet["user"])
+            author = tweet["user"]
+        author = self._transform_user(author)
 
         if "legacy" in tweet:
             tweet = tweet["legacy"]
@@ -245,12 +249,13 @@ class TwitterExtractor(Extractor):
             "retweet_id"    : text.parse_int(
                 tget("retweeted_status_id_str")),
             "quote_id"      : text.parse_int(
-                tget("quoted_status_id_str")),
+                tget("quoted_by_id_str")),
             "reply_id"      : text.parse_int(
                 tget("in_reply_to_status_id_str")),
             "date"          : text.parse_datetime(
                 tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
-            "user"          : user,
+            "user"          : self._user or author,
+            "author"        : author,
             "lang"          : tweet["lang"],
             "favorite_count": tget("favorite_count"),
             "quote_count"   : tget("quote_count"),
@@ -280,13 +285,8 @@ class TwitterExtractor(Extractor):
 
         if "in_reply_to_screen_name" in tweet:
             tdata["reply_to"] = tweet["in_reply_to_screen_name"]
-        if "quoted_by_id_str" in tweet:
-            tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
-
-        if "author" in tweet:
-            tdata["author"] = self._transform_user(tweet["author"])
-        else:
-            tdata["author"] = tdata["user"]
+        if "quoted_by" in tweet:
+            tdata["quote_by"] = tweet["quoted_by"]
 
         return tdata
 
@@ -336,6 +336,10 @@ class TwitterExtractor(Extractor):
 
         return udata
 
+    def _assign_user(self, user):
+        self._user_obj = user
+        self._user = self._transform_user(user)
+
     def _users_result(self, users):
         userfmt = self.config("users")
         if not userfmt or userfmt == "timeline":
@@ -455,33 +459,24 @@ class TwitterTimelineExtractor(TwitterExtractor):
         tweet = None
         for tweet in self._select_tweet_source()(self.user):
             yield tweet
-
         if tweet is None:
             return
 
-        # get username
-        if not self.user.startswith("id:"):
-            username = self.user
-        elif "core" in tweet:
-            username = (tweet["core"]["user_results"]["result"]
-                        ["legacy"]["screen_name"])
-        else:
-            username = tweet["user"]["screen_name"]
-
-        # get tweet data
-        if "legacy" in tweet:
-            tweet = tweet["legacy"]
-
         # build search query
-        query = "from:{} max_id:{}".format(username, tweet["id_str"])
+        query = "from:{} max_id:{}".format(
+            self._user["name"], tweet["rest_id"])
         if self.retweets:
             query += " include:retweets include:nativeretweets"
+
         if not self.textonly:
-            query += (" (filter:images OR"
-                      " filter:native_video OR"
-                      " card_name:animated_gif)")
+            # try to search for media-only tweets
+            tweet = None
+            for tweet in self.api.search_adaptive(query + " filter:links"):
+                yield tweet
+            if tweet is not None:
+                return
 
-        # yield search results starting from last tweet id
+        # yield unfiltered search results
         yield from self.api.search_adaptive(query)
 
     def _select_tweet_source(self):
@@ -625,7 +620,25 @@ class TwitterSearchExtractor(TwitterExtractor):
         return {"search": text.unquote(self.user)}
 
     def tweets(self):
-        return self.api.search_adaptive(text.unquote(self.user))
+        query = text.unquote(self.user.replace("+", " "))
+
+        user = None
+        for item in query.split():
+            item = item.strip("()")
+            if item.startswith("from:"):
+                if user:
+                    user = None
+                    break
+                else:
+                    user = item[5:]
+
+        if user is not None:
+            try:
+                self._assign_user(self.api.user_by_screen_name(user))
+            except KeyError:
+                pass
+
+        return self.api.search_adaptive(query)
 
 
 class TwitterEventExtractor(TwitterExtractor):
@@ -693,7 +706,7 @@ class TwitterTweetExtractor(TwitterExtractor):
         }),
         ("https://twitter.com/i/web/status/1424898916156284928", {
             "options": (("replies", "self"),),
-            "count": 0,
+            "count": 1,
         }),
         # "quoted" option (#854)
         ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
@@ -777,20 +790,38 @@ class TwitterTweetExtractor(TwitterExtractor):
 
     def tweets(self):
         if self.config("conversations", False):
-            return self.api.tweet_detail(self.tweet_id)
+            return self._tweets_conversation(self.tweet_id)
+        else:
+            return self._tweets_single(self.tweet_id)
 
+    def _tweets_single(self, tweet_id):
         tweets = []
-        tweet_id = self.tweet_id
+
         for tweet in self.api.tweet_detail(tweet_id):
             if tweet["rest_id"] == tweet_id or \
                     tweet.get("_retweet_id_str") == tweet_id:
+                self._assign_user(tweet["core"]["user_results"]["result"])
                 tweets.append(tweet)
 
                 tweet_id = tweet["legacy"].get("quoted_status_id_str")
                 if not tweet_id:
                     break
+
         return tweets
 
+    def _tweets_conversation(self, tweet_id):
+        tweets = self.api.tweet_detail(tweet_id)
+        buffer = []
+
+        for tweet in tweets:
+            buffer.append(tweet)
+            if tweet["rest_id"] == tweet_id or \
+                    tweet.get("_retweet_id_str") == tweet_id:
+                self._assign_user(tweet["core"]["user_results"]["result"])
+                break
+
+        return itertools.chain(buffer, tweets)
+
 
 class TwitterImageExtractor(Extractor):
     category = "twitter"
@@ -888,7 +919,6 @@ class TwitterAPI():
         self._nsfw_warning = True
         self._syndication = extractor.config("syndication")
         self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
-        self._user = None
 
         cookies = extractor.session.cookies
         cookiedomain = extractor.cookiedomain
@@ -1050,13 +1080,13 @@ class TwitterAPI():
 
     def _user_id_by_screen_name(self, screen_name):
         if screen_name.startswith("id:"):
-            self._user = util.SENTINEL
             user_id = screen_name[3:]
+            user = self.user_by_rest_id(user_id)
 
         else:
             user = ()
             try:
-                user = self._user = self.user_by_screen_name(screen_name)
+                user = self.user_by_screen_name(screen_name)
                 user_id = user["rest_id"]
             except KeyError:
                 if "unavailable_message" in user:
@@ -1066,7 +1096,7 @@ class TwitterAPI():
                 else:
                     raise exception.NotFoundError("user")
 
-        self.extractor._user_id = user_id
+        self.extractor._assign_user(user)
         return user_id
 
     @cache(maxage=3600)
@@ -1183,7 +1213,7 @@ class TwitterAPI():
                     if quoted:
                         quoted = quoted.copy()
                         quoted["author"] = users[quoted["user_id_str"]]
-                        quoted["user"] = tweet["user"]
+                        quoted["quoted_by"] = tweet["user"]["screen_name"]
                         quoted["quoted_by_id_str"] = tweet["id_str"]
                         yield quoted
 
@@ -1226,17 +1256,10 @@ class TwitterAPI():
             except LookupError:
                 extr.log.debug(data)
 
-                if self._user:
-                    user = self._user
-                    if user is util.SENTINEL:
-                        try:
-                            user = self.user_by_rest_id(variables["userId"])
-                        except KeyError:
-                            raise exception.NotFoundError("user")
-                    user = user.get("legacy")
-                    if not user:
-                        pass
-                    elif user.get("blocked_by"):
+                user = extr._user_obj
+                if user:
+                    user = user["legacy"]
+                    if user.get("blocked_by"):
                         if self.headers["x-twitter-auth-type"] and \
                                 extr.config("logout"):
                             guest_token = self._guest_token()
@@ -1322,7 +1345,7 @@ class TwitterAPI():
                         try:
                             legacy["retweeted_status_id_str"] = \
                                 retweet["rest_id"]
-                            legacy["author"] = \
+                            tweet["author"] = \
                                 retweet["core"]["user_results"]["result"]
                             if "extended_entities" in retweet["legacy"] and \
                                     "extended_entities" not in legacy:
@@ -1336,9 +1359,9 @@ class TwitterAPI():
                 if "quoted_status_result" in tweet:
                     try:
                         quoted = tweet["quoted_status_result"]["result"]
-                        quoted["legacy"]["author"] = \
-                            quoted["core"]["user_results"]["result"]
-                        quoted["core"] = tweet["core"]
+                        quoted["legacy"]["quoted_by"] = (
+                            tweet["core"]["user_results"]["result"]
+                            ["legacy"]["screen_name"])
                         quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
                         yield quoted
                     except KeyError:
@@ -1374,10 +1397,14 @@ class TwitterAPI():
                 if instr["type"] == "TimelineAddEntries":
                     for entry in instr["entries"]:
                         if entry["entryId"].startswith("user-"):
-                            user = (entry["content"]["itemContent"]
-                                    ["user_results"]["result"])
-                            if "rest_id" in user:
-                                yield user
+                            try:
+                                user = (entry["content"]["itemContent"]
+                                        ["user_results"]["result"])
+                            except KeyError:
+                                pass
+                            else:
+                                if "rest_id" in user:
+                                    yield user
                         elif entry["entryId"].startswith("cursor-bottom-"):
                             cursor = entry["content"]["value"]
                 elif instr["type"] == "TimelineTerminateTimeline":
@@ -1439,6 +1466,6 @@ class TwitterAPI():
         return {
             "rest_id": tweet["id_str"],
             "legacy" : tweet,
-            "user"   : tweet["user"],
+            "core"   : {"user_results": {"result": tweet["user"]}},
             "_retweet_id_str": retweet_id,
         }
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index c29d730..623ed94 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -84,7 +84,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
             "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
             "categories": list,
             "color": "#f3c08c",
-            "created_at": "2020-04-08T08:29:42-04:00",
+            "created_at": "2020-04-08T12:29:42Z",
             "date": "dt:2020-04-08 12:29:42",
             "description": "The Island",
             "downloads": int,
@@ -112,7 +112,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
                 },
                 "title": "Beaver Dam, WI 53916, USA"
             },
-            "promoted_at": "2020-04-08T11:12:03-04:00",
+            "promoted_at": "2020-04-08T15:12:03Z",
             "sponsorship": None,
             "tags": list,
             "updated_at": str,
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index ab2153f..25b00fe 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -40,12 +40,17 @@ class VkExtractor(Extractor):
                 continue
 
             try:
+                photo["url"] = photo[size + "src"]
+            except KeyError:
+                self.log.warning("no photo URL found (%s)", photo.get("id"))
+                continue
+
+            try:
                 _, photo["width"], photo["height"] = photo[size]
             except ValueError:
                 # photo without width/height entries (#2535)
                 photo["width"] = photo["height"] = 0
 
-            photo["url"] = photo[size + "src"]
             photo["id"] = photo["id"].rpartition("_")[2]
             photo.update(data)
 
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 756384b..668be0f 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -169,7 +169,7 @@ class VscoCollectionExtractor(VscoExtractor):
         return self._pagination(url, params, tkn, "medias", (
             data["medias"]["byId"][mid["id"]]["media"]
             for mid in data
-            ["collections"]["byCollectionId"][cid]["byPage"]["1"]["collection"]
+            ["collections"]["byId"][cid]["1"]["collection"]
         ))
 
 
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 37eab24..0ad8523 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -189,7 +189,7 @@ class WallhavenAPI():
 
     def collections(self, username):
         endpoint = "/v1/collections/" + username
-        return self._pagination(endpoint)
+        return self._pagination(endpoint, metadata=False)
 
     def search(self, params):
         endpoint = "/v1/search"
@@ -200,13 +200,20 @@ class WallhavenAPI():
         return self.extractor.request(
             url, headers=self.headers, params=params).json()
 
-    def _pagination(self, endpoint, params=None):
+    def _pagination(self, endpoint, params=None, metadata=None):
         if params is None:
             params = {}
+        if metadata is None:
+            metadata = self.extractor.config("metadata")
 
         while True:
             data = self._call(endpoint, params)
-            yield from data["data"]
+
+            if metadata:
+                for wp in data["data"]:
+                    yield self.info(str(wp["id"]))
+            else:
+                yield from data["data"]
 
             meta = data.get("meta")
             if not meta or meta["current_page"] >= meta["last_page"]:
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index bdbdc8c..189c0c5 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -99,13 +99,14 @@ class WeiboExtractor(Extractor):
                 else:
                     yield pic["largest"].copy()
 
-        if "page_info" in status:
-            page_info = status["page_info"]
-            if "media_info" not in page_info or not self.videos:
-                return
-            media = max(page_info["media_info"]["playback_list"],
-                        key=lambda m: m["meta"]["quality_index"])
-            yield media["play_info"].copy()
+        if "page_info" in status and self.videos:
+            try:
+                media = max(status["page_info"]["media_info"]["playback_list"],
+                            key=lambda m: m["meta"]["quality_index"])
+            except KeyError:
+                pass
+            else:
+                yield media["play_info"].copy()
 
     def _status_by_id(self, status_id):
         url = "{}/ajax/statuses/show?id={}".format(self.root, status_id)
@@ -147,14 +148,17 @@ class WeiboExtractor(Extractor):
                 return
             yield from statuses
 
-            if "next_cursor" in data:
+            if "next_cursor" in data:  # videos, newvideo
                 params["cursor"] = data["next_cursor"]
-            elif "page" in params:
+            elif "page" in params:     # home, article
                 params["page"] += 1
-            elif data["since_id"]:
+            elif data["since_id"]:     # album
                 params["sinceid"] = data["since_id"]
-            else:
-                params["since_id"] = statuses[-1]["id"] - 1
+            else:                      # feed, last album page
+                try:
+                    params["since_id"] = statuses[-1]["id"] - 1
+                except KeyError:
+                    return
 
     def _sina_visitor_system(self, response):
         self.log.info("Sina Visitor System")
@@ -366,6 +370,10 @@ class WeiboStatusExtractor(WeiboExtractor):
             "pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104"
                        r"120005tc0E010\.mp4\?label=gif_mp4",
         }),
+        # missing 'playback_list' (#2792)
+        ("https://weibo.com/2909128931/4409545658754086", {
+            "count": 9,
+        }),
         ("https://m.weibo.cn/status/4339748116375525"),
         ("https://m.weibo.cn/5746766133/4339748116375525"),
     )
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
new file mode 100644
index 0000000..2b5acd8
--- /dev/null
+++ b/gallery_dl/extractor/zerochan.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.zerochan.net/"""
+
+from .booru import BooruExtractor
+from ..cache import cache
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
+
+
+class ZerochanExtractor(BooruExtractor):
+    """Base class for zerochan extractors"""
+    category = "zerochan"
+    root = "https://www.zerochan.net"
+    filename_fmt = "{id}.{extension}"
+    archive_fmt = "{id}"
+    cookiedomain = ".zerochan.net"
+    cookienames = ("z_id", "z_hash")
+
+    def login(self):
+        if not self._check_cookies(self.cookienames):
+            username, password = self._get_auth_info()
+            if username:
+                self._update_cookies(self._login_impl(username, password))
+        # force legacy layout
+        self.session.cookies.set("v3", "0", domain=self.cookiedomain)
+
+    @cache(maxage=90*86400, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        url = self.root + "/login"
+        headers = {
+            "Origin"  : self.root,
+            "Referer" : url,
+        }
+        data = {
+            "ref"     : "/",
+            "name"    : username,
+            "password": password,
+            "login"   : "Login",
+        }
+
+        response = self.request(url, method="POST", headers=headers, data=data)
+        if not response.history:
+            raise exception.AuthenticationError()
+
+        return response.cookies
+
+    def _parse_entry_page(self, entry_id):
+        url = "{}/{}".format(self.root, entry_id)
+        extr = text.extract_from(self.request(url).text)
+
+        return {
+            "id"    : entry_id,
+            "author": extr('"author": "', '"'),
+            "file_url": extr('"contentUrl": "', '"'),
+            "date"  : text.parse_datetime(extr(
+                '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"),
+            "width" : extr('"width": "', ' '),
+            "height": extr('"height": "', ' '),
+            "size"  : extr('"contentSize": "', 'B'),
+            "path"  : text.split_html(extr(
+                'class="breadcrumbs', '</p>'))[3::2],
+            "tags"  : extr('alt="Tags: ', '"').split(", ")
+        }
+
+
+class ZerochanTagExtractor(ZerochanExtractor):
+    subcategory = "tag"
+    directory_fmt = ("{category}", "{search_tags}")
+    pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
+    test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
+        "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
+        "count": "> 24",
+        "keywords": {
+            "extension": r"re:jpg|png",
+            "file_url": "",
+            "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+            "height": r"re:^\d+$",
+            "id": r"re:^\d+$",
+            "name": "Perth (Kantai Collection)",
+            "search_tags": "Perth (Kantai Collection)",
+            "size": r"re:^\d+k$",
+            "width": r"re:^\d+$",
+        },
+    })
+
+    def __init__(self, match):
+        ZerochanExtractor.__init__(self, match)
+        self.search_tag, self.query = match.groups()
+
+    def metadata(self):
+        return {"search_tags": text.unquote(
+            self.search_tag.replace("+", " "))}
+
+    def posts(self):
+        url = self.root + "/" + self.search_tag
+        params = text.parse_query(self.query)
+        params["p"] = text.parse_int(params.get("p"), 1)
+
+        while True:
+            page = self.request(url, params=params).text
+            thumbs = text.extract(page, '<ul id="thumbs', '</ul>')[0]
+            extr = text.extract_from(thumbs)
+
+            while True:
+                post = extr('<li class="', '>')
+                if not post:
+                    break
+                yield {
+                    "id"    : extr('href="/', '"'),
+                    "name"  : extr('alt="', '"'),
+                    "width" : extr('title="', 'x'),
+                    "height": extr('', ' '),
+                    "size"  : extr('', 'B'),
+                    "file_url": "https://static." + extr(
+                        '<a href="https://static.', '"'),
+                }
+
+            if 'rel="next"' not in page:
+                break
+            params["p"] += 1
+
+
+class ZerochanImageExtractor(ZerochanExtractor):
+    subcategory = "image"
+    pattern = BASE_PATTERN + r"/(\d+)"
+    test = ("https://www.zerochan.net/2920445", {
+        "pattern": r"https://static\.zerochan\.net/"
+                   r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
+        "keyword": {
+            "author": "YukinoTokisaki",
+            "date": "dt:2020-04-24 21:33:44",
+            "file_url": str,
+            "filename": "Perth.(Kantai.Collection).full.2920445",
+            "height": "1366",
+            "id": "2920445",
+            "size": "1929k",
+            "width": "1920",
+        },
+    })
+
+    def __init__(self, match):
+        ZerochanExtractor.__init__(self, match)
+        self.image_id = match.group(1)
+
+    def posts(self):
+        return (self._parse_entry_page(self.image_id),)
author	Unit 193 <unit193@unit193.net>	2022-08-29 02:17:16 -0400
committer	Unit 193 <unit193@unit193.net>	2022-08-29 02:17:16 -0400
commit	a768930761f7f20587ae40a8cacca0e55c85290a (patch)
tree	5a4163db912b93fc45f717e5e43fd5be3e66f16c /gallery_dl/extractor
parent	ae2a0f5622beaa6f402526f8a7b939419283a090 (diff)