New upstream version 1.22.3.upstream/1.22.3

author: Unit 193 <unit193@unit193.net> 2022-06-28 19:54:18 -0400
committer: Unit 193 <unit193@unit193.net> 2022-06-28 19:54:18 -0400
commit: ce35450b5308adab049c5bd99095986d4c607027 (patch)
tree: f0c2b600f8ef720941bdf615164b942c6c4a5d07 /gallery_dl/extractor
parent: 25442ea49f031d4d2df3353dd7e9ad2080e332da (diff)
14 files changed, 765 insertions, 234 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6d6c7ee..e273f84 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -64,6 +64,7 @@ modules = [
     "inkbunny",
     "instagram",
     "issuu",
+    "itaku",
     "kabeuchi",
     "keenspot",
     "kemonoparty",
@@ -106,6 +107,7 @@ modules = [
     "pixiv",
     "pixnet",
     "plurk",
+    "poipiku",
     "pornhub",
     "pururin",
     "reactor",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 9cd9059..5c5e29e 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -256,7 +256,7 @@ class Extractor():
         else:
             headers["User-Agent"] = self.config("user-agent", (
                 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
-                "rv:91.0) Gecko/20100101 Firefox/91.0"))
+                "rv:102.0) Gecko/20100101 Firefox/102.0"))
             headers["Accept"] = "*/*"
             headers["Accept-Language"] = "en-US,en;q=0.5"
             headers["Accept-Encoding"] = "gzip, deflate"
@@ -713,16 +713,21 @@ _browser_cookies = {}
 
 HTTP_HEADERS = {
     "firefox": (
-        ("User-Agent", "Mozilla/5.0 ({}; rv:91.0) "
-                       "Gecko/20100101 Firefox/91.0"),
+        ("User-Agent", "Mozilla/5.0 ({}; rv:102.0) "
+                       "Gecko/20100101 Firefox/102.0"),
         ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
-                   "image/avif,*/*;q=0.8"),
+                   "image/avif,image/webp,*/*;q=0.8"),
         ("Accept-Language", "en-US,en;q=0.5"),
-        ("Accept-Encoding", "gzip, deflate"),
+        ("Accept-Encoding", "gzip, deflate, br"),
         ("Referer", None),
+        ("DNT", "1"),
         ("Connection", "keep-alive"),
         ("Upgrade-Insecure-Requests", "1"),
         ("Cookie", None),
+        ("Sec-Fetch-Dest", "empty"),
+        ("Sec-Fetch-Mode", "no-cors"),
+        ("Sec-Fetch-Site", "same-origin"),
+        ("TE", "trailers"),
     ),
     "chrome": (
         ("Upgrade-Insecure-Requests", "1"),
@@ -755,8 +760,7 @@ SSL_CIPHERS = {
         "AES128-GCM-SHA256:"
         "AES256-GCM-SHA384:"
         "AES128-SHA:"
-        "AES256-SHA:"
-        "DES-CBC3-SHA"
+        "AES256-SHA"
     ),
     "chrome": (
         "TLS_AES_128_GCM_SHA256:"
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 1afaac8..7a79eca 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -48,10 +48,11 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
         files = []
         append = files.append
         while True:
-            url = extr('id="file" href="', '"')
+            url = text.unescape(extr('id="file" href="', '"'))
             if not url:
                 break
-            append({"file": text.unescape(url)})
+            append({"file": url,
+                    "_fallback": (self.root + url[url.find("/", 8):],)})
 
         return files, {
             "album_id"   : self.album_id,
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index e536e22..31f5b32 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -82,8 +82,12 @@ class InstagramExtractor(Extractor):
 
         if response.history:
 
-            url = response.request.url
+            url = response.url
             if "/accounts/login/" in url:
+                if self._username:
+                    self.log.debug("Invalidating cached login session for "
+                                   "'%s'", self._username)
+                    _login_impl.invalidate(self._username)
                 page = "login"
             elif "/challenge/" in url:
                 page = "challenge"
@@ -161,55 +165,15 @@ class InstagramExtractor(Extractor):
         return self._pagination_api(endpoint)
 
     def login(self):
+        self._username = None
         if not self._check_cookies(self.cookienames):
             username, password = self._get_auth_info()
             if username:
-                self._update_cookies(self._login_impl(username, password))
+                self._username = username
+                self._update_cookies(_login_impl(self, username, password))
         self.session.cookies.set(
             "csrftoken", self.csrf_token, domain=self.cookiedomain)
 
-    @cache(maxage=360*24*3600, keyarg=1)
-    def _login_impl(self, username, password):
-        self.log.info("Logging in as %s", username)
-
-        url = self.root + "/accounts/login/"
-        page = self.request(url).text
-
-        headers = {
-            "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0],
-            "X-IG-App-ID"     : "936619743392459",
-            "X-ASBD-ID"       : "437806",
-            "X-IG-WWW-Claim"  : "0",
-            "X-Requested-With": "XMLHttpRequest",
-            "Referer"         : url,
-        }
-        url = self.root + "/data/shared_data/"
-        data = self.request(url, headers=headers).json()
-
-        headers["X-CSRFToken"] = data["config"]["csrf_token"]
-        headers["X-Instagram-AJAX"] = data["rollout_hash"]
-        headers["Origin"] = self.root
-        data = {
-            "username"     : username,
-            "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
-                int(time.time()), password),
-            "queryParams"         : "{}",
-            "optIntoOneTap"       : "false",
-            "stopDeletionNonce"   : "",
-            "trustedDeviceRecords": "{}",
-        }
-        url = self.root + "/accounts/login/ajax/"
-        response = self.request(url, method="POST", headers=headers, data=data)
-
-        if not response.json().get("authenticated"):
-            raise exception.AuthenticationError()
-
-        cget = self.session.cookies.get
-        return {
-            name: cget(name)
-            for name in ("sessionid", "mid", "ig_did")
-        }
-
     def _parse_post_graphql(self, post):
         typename = post["__typename"]
 
@@ -286,37 +250,51 @@ class InstagramExtractor(Extractor):
         return data
 
     def _parse_post_api(self, post):
-
-        if "media" in post:
-            media = post["media"]
-            owner = media["user"]
+        if "items" in post:
+            items = post["items"]
+            reel_id = str(post["id"]).rpartition(":")[2]
             data = {
-                "post_id" : media["pk"],
-                "post_shortcode": shortcode_from_id(media["pk"]),
+                "expires": text.parse_timestamp(post.get("expiring_at")),
+                "post_id": reel_id,
+                "post_shortcode": shortcode_from_id(reel_id),
+            }
+        else:
+            data = {
+                "post_id" : post["pk"],
+                "post_shortcode": post["code"],
+                "likes": post["like_count"],
             }
 
-            if "carousel_media" in media:
-                post["items"] = media["carousel_media"]
+            caption = post["caption"]
+            data["description"] = caption["text"] if caption else ""
+
+            tags = self._find_tags(data["description"])
+            if tags:
+                data["tags"] = sorted(set(tags))
+
+            location = post.get("location")
+            if location:
+                slug = location["short_name"].replace(" ", "-").lower()
+                data["location_id"] = location["pk"]
+                data["location_slug"] = slug
+                data["location_url"] = "{}/explore/locations/{}/{}/".format(
+                    self.root, location["pk"], slug)
+
+            if "carousel_media" in post:
+                items = post["carousel_media"]
                 data["sidecar_media_id"] = data["post_id"]
                 data["sidecar_shortcode"] = data["post_shortcode"]
             else:
-                post["items"] = (media,)
-
-        else:
-            reel_id = str(post["id"]).rpartition(":")[2]
-            owner = post["user"]
-            data = {
-                "expires" : text.parse_timestamp(post.get("expiring_at")),
-                "post_id" : reel_id,
-                "post_shortcode": shortcode_from_id(reel_id),
-            }
+                items = (post,)
 
+        owner = post["user"]
         data["owner_id"] = owner["pk"]
         data["username"] = owner.get("username")
         data["fullname"] = owner.get("full_name")
-        data["_files"] = files = []
+        data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"])
 
-        for num, item in enumerate(post["items"], 1):
+        data["_files"] = files = []
+        for num, item in enumerate(items, 1):
 
             image = item["image_versions2"]["candidates"][0]
 
@@ -333,7 +311,8 @@ class InstagramExtractor(Extractor):
             media = {
                 "num"        : num,
                 "date"       : text.parse_timestamp(item.get("taken_at") or
-                                                    media.get("taken_at")),
+                                                    media.get("taken_at") or
+                                                    post.get("taken_at")),
                 "media_id"   : item["pk"],
                 "shortcode"  : (item.get("code") or
                                 shortcode_from_id(item["pk"])),
@@ -342,6 +321,10 @@ class InstagramExtractor(Extractor):
                 "width"      : media["width"],
                 "height"     : media["height"],
             }
+
+            if "expiring_at" in item:
+                media["expires"] = text.parse_timestamp(post["expiring_at"])
+
             self._extract_tagged_users(item, media)
             files.append(media)
 
@@ -385,31 +368,6 @@ class InstagramExtractor(Extractor):
                                          "username" : user["username"],
                                          "full_name": user["full_name"]})
 
-    def _extract_shared_data(self, page):
-        shared_data, pos = text.extract(
-            page, "window._sharedData =", ";</script>")
-        additional_data, pos = text.extract(
-            page, "window.__additionalDataLoaded(", ");</script>", pos)
-
-        data = json.loads(shared_data)
-        if additional_data:
-            next(iter(data["entry_data"].values()))[0] = \
-                json.loads(additional_data.partition(",")[2])
-        return data
-
-    def _get_edge_data(self, user, key):
-        cursor = self.config("cursor")
-        if cursor or not key:
-            return {
-                "edges"    : (),
-                "page_info": {
-                    "end_cursor"   : cursor,
-                    "has_next_page": True,
-                    "_virtual"     : True,
-                },
-            }
-        return user[key]
-
     def _pagination_graphql(self, query_hash, variables):
         cursor = self.config("cursor")
         if cursor:
@@ -436,8 +394,7 @@ class InstagramExtractor(Extractor):
     def _pagination_api(self, endpoint, params=None):
         while True:
             data = self._request_api(endpoint, params=params)
-            for item in data["items"]:
-                yield {"media": item}
+            yield from data["items"]
 
             if not data["more_available"]:
                 return
@@ -446,7 +403,8 @@ class InstagramExtractor(Extractor):
     def _pagination_api_post(self, endpoint, params, post=False):
         while True:
             data = self._request_api(endpoint, method="POST", data=params)
-            yield from data["items"]
+            for item in data["items"]:
+                yield item["media"]
 
             info = data["paging_info"]
             if not info["more_available"]:
@@ -567,21 +525,7 @@ class InstagramTagExtractor(InstagramExtractor):
         return {"tag": text.unquote(self.item)}
 
     def posts(self):
-        url = "{}/explore/tags/{}/".format(self.root, self.item)
-        page = self._extract_shared_data(
-            self.request(url).text)["entry_data"]["TagPage"][0]
-
-        if "data" in page:
-            return self._pagination_sections(page["data"]["recent"])
-
-        hashtag = page["graphql"]["hashtag"]
-        query_hash = "9b498c08113f1e09617a1703c22b2f32"
-        variables = {"tag_name": hashtag["name"], "first": 50}
-        edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
-        return self._pagination_graphql(query_hash, variables, edge)
-
-    def _pagination_sections(self, info):
-        endpoint = "/v1/tags/instagram/sections/"
+        endpoint = "/v1/tags/{}/sections/".format(self.item)
         data = {
             "include_persistent": "0",
             "max_id" : None,
@@ -591,29 +535,17 @@ class InstagramTagExtractor(InstagramExtractor):
         }
 
         while True:
+            info = self._request_api(endpoint, method="POST", data=data)
+
             for section in info["sections"]:
-                yield from section["layout_content"]["medias"]
+                for media in section["layout_content"]["medias"]:
+                    yield media["media"]
 
             if not info.get("more_available"):
                 return
 
             data["max_id"] = info["next_max_id"]
             data["page"] = info["next_page"]
-            info = self._request_api(endpoint, method="POST", data=data)
-
-    def _pagination_graphql(self, query_hash, variables, data):
-        while True:
-            for edge in data["edges"]:
-                yield edge["node"]
-
-            info = data["page_info"]
-            if not info["has_next_page"]:
-                return
-
-            variables["after"] = self._cursor = info["end_cursor"]
-            self.log.debug("Cursor: %s", self._cursor)
-            data = self._request_graphql(
-                query_hash, variables)["hashtag"]["edge_hashtag_to_media"]
 
 
 class InstagramPostExtractor(InstagramExtractor):
@@ -812,6 +744,49 @@ class InstagramReelsExtractor(InstagramExtractor):
         return self._pagination_api_post(endpoint, data)
 
 
+@cache(maxage=360*24*3600, keyarg=1)
+def _login_impl(extr, username, password):
+    extr.log.info("Logging in as %s", username)
+
+    url = extr.root + "/accounts/login/"
+    page = extr.request(url).text
+
+    headers = {
+        "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0],
+        "X-IG-App-ID"     : "936619743392459",
+        "X-ASBD-ID"       : "437806",
+        "X-IG-WWW-Claim"  : "0",
+        "X-Requested-With": "XMLHttpRequest",
+        "Referer"         : url,
+    }
+    url = extr.root + "/data/shared_data/"
+    data = extr.request(url, headers=headers).json()
+
+    headers["X-CSRFToken"] = data["config"]["csrf_token"]
+    headers["X-Instagram-AJAX"] = data["rollout_hash"]
+    headers["Origin"] = extr.root
+    data = {
+        "username"     : username,
+        "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
+            int(time.time()), password),
+        "queryParams"         : "{}",
+        "optIntoOneTap"       : "false",
+        "stopDeletionNonce"   : "",
+        "trustedDeviceRecords": "{}",
+    }
+    url = extr.root + "/accounts/login/ajax/"
+    response = extr.request(url, method="POST", headers=headers, data=data)
+
+    if not response.json().get("authenticated"):
+        raise exception.AuthenticationError()
+
+    cget = extr.session.cookies.get
+    return {
+        name: cget(name)
+        for name in ("sessionid", "mid", "ig_did")
+    }
+
+
 def id_from_shortcode(shortcode):
     return util.bdecode(shortcode, _ALPHABET)
 
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
new file mode 100644
index 0000000..dfe4b53
--- /dev/null
+++ b/gallery_dl/extractor/itaku.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://itaku.ee/"""
+
+from .common import Extractor, Message
+from ..cache import memcache
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?itaku\.ee"
+
+
+class ItakuExtractor(Extractor):
+    """Base class for itaku extractors"""
+    category = "itaku"
+    root = "https://itaku.ee"
+    directory_fmt = ("{category}", "{owner_username}")
+    filename_fmt = ("{id}{title:? //}.{extension}")
+    archive_fmt = "{id}"
+    request_interval = (0.5, 1.5)
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.api = ItakuAPI(self)
+        self.item = match.group(1)
+        self.videos = self.config("videos", True)
+
+    def items(self):
+        for post in self.posts():
+
+            post["date"] = text.parse_datetime(
+                post["date_added"], "%Y-%m-%dT%H:%M:%S.%f")
+            for category, tags in post.pop("categorized_tags").items():
+                post["tags_" + category.lower()] = [t["name"] for t in tags]
+            post["tags"] = [t["name"] for t in post["tags"]]
+            post["sections"] = [s["title"] for s in post["sections"]]
+
+            if post["video"] and self.videos:
+                url = post["video"]["video"]
+            else:
+                url = post["image"]
+
+            yield Message.Directory, post
+            yield Message.Url, url, text.nameext_from_url(url, post)
+
+
+class ItakuGalleryExtractor(ItakuExtractor):
+    """Extractor for posts from an itaku user gallery"""
+    subcategory = "gallery"
+    pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery"
+    test = ("https://itaku.ee/profile/piku/gallery", {
+        "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
+                   r"/[^/?#]+\.(jpg|png|gif)",
+        "range": "1-10",
+        "count": 10,
+    })
+
+    def posts(self):
+        return self.api.galleries_images(self.item)
+
+
+class ItakuImageExtractor(ItakuExtractor):
+    subcategory = "image"
+    pattern = BASE_PATTERN + r"/images/(\d+)"
+    test = (
+        ("https://itaku.ee/images/100471", {
+            "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
+                       r"/220504_oUNIAFT\.png",
+            "count": 1,
+            "keyword": {
+                "already_pinned": None,
+                "blacklisted": {
+                    "blacklisted_tags": [],
+                    "is_blacklisted": False
+                },
+                "can_reshare": True,
+                "date_added": "2022-05-05T19:21:17.674148Z",
+                "date_edited": "2022-05-25T14:37:46.220612Z",
+                "description": "sketch from drawpile",
+                "extension": "png",
+                "filename": "220504_oUNIAFT",
+                "hotness_score": 11507.4691939,
+                "id": 100471,
+                "image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs"
+                         "/220504_oUNIAFT.png",
+                "image_xl": "https://d1wmr8tlk3viaj.cloudfront.net"
+                            "/gallery_imgs/220504_oUNIAFT/xl.jpg",
+                "liked_by_you": False,
+                "maturity_rating": "SFW",
+                "num_comments": 2,
+                "num_likes": 80,
+                "num_reshares": 2,
+                "obj_tags": 136446,
+                "owner": 16775,
+                "owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
+                                "/profile_pics/av2022r_vKYVywc/sm.jpg",
+                "owner_displayname": "Piku",
+                "owner_username": "piku",
+                "reshared_by_you": False,
+                "sections": ["Miku"],
+                "tags": list,
+                "tags_character": ["hatsune_miku"],
+                "tags_copyright": ["vocaloid"],
+                "tags_general"  : ["twintails", "green_hair", "flag", "gloves",
+                                   "green_eyes", "female", "racing_miku"],
+                "title": "Racing Miku 2022 Ver.",
+                "too_mature": False,
+                "uncompressed_filesize": "0.62",
+                "video": None,
+                "visibility": "PUBLIC",
+            },
+        }),
+        # video
+        ("https://itaku.ee/images/19465", {
+            "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_vids"
+                       r"/sleepy_af_OY5GHWw\.mp4",
+        }),
+    )
+
+    def posts(self):
+        return (self.api.image(self.item),)
+
+
+class ItakuAPI():
+
+    def __init__(self, extractor):
+        self.extractor = extractor
+        self.root = extractor.root + "/api"
+        self.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Referer": extractor.root + "/",
+        }
+
+    def galleries_images(self, username, section=None):
+        endpoint = "/galleries/images/"
+        params = {
+            "cursor"    : None,
+            "owner"     : self.user(username)["owner"],
+            "section"   : section,
+            "date_range": "",
+            "maturity_rating": ("SFW", "Questionable", "NSFW", "Extreme"),
+            "ordering"  : "-date_added",
+            "page"      : "1",
+            "page_size" : "30",
+            "visibility": ("PUBLIC", "PROFILE_ONLY"),
+        }
+        return self._pagination(endpoint, params, self.image)
+
+    def image(self, image_id):
+        endpoint = "/galleries/images/" + str(image_id)
+        return self._call(endpoint)
+
+    @memcache()
+    def user(self, username):
+        return self._call("/user_profiles/{}/".format(username))
+
+    def _call(self, endpoint, params=None):
+        if not endpoint.startswith("http"):
+            endpoint = self.root + endpoint
+        response = self.extractor.request(
+            endpoint, params=params, headers=self.headers)
+        return response.json()
+
+    def _pagination(self, endpoint, params, extend):
+        data = self._call(endpoint, params)
+
+        while True:
+            if extend:
+                for result in data["results"]:
+                    yield extend(result["id"])
+            else:
+                yield from data["results"]
+
+            url_next = data["links"].get("next")
+            if not url_next:
+                return
+
+            data = self._call(url_next)
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index f3bd5d8..2aea44c 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -85,6 +85,8 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
         yield Message.Directory, data
         for data["num"], file in enumerate(files, 1):
             url = file["file"]
+            if "_fallback" in file:
+                data["_fallback"] = file["_fallback"]
             text.nameext_from_url(url, data)
             data["name"], sep, data["id"] = data["filename"].rpartition("-")
 
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 832831f..122ea46 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -6,31 +6,31 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://nijie.info/"""
+"""Extractors for nijie instances"""
 
-from .common import Extractor, Message, AsynchronousMixin
+from .common import BaseExtractor, Message, AsynchronousMixin
 from .. import text, exception
 from ..cache import cache
 
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?nijie\.info"
-
-
-class NijieExtractor(AsynchronousMixin, Extractor):
+class NijieExtractor(AsynchronousMixin, BaseExtractor):
     """Base class for nijie extractors"""
-    category = "nijie"
+    basecategory = "Nijie"
     directory_fmt = ("{category}", "{user_id}")
     filename_fmt = "{image_id}_p{num}.{extension}"
     archive_fmt = "{image_id}_{num}"
-    cookiedomain = "nijie.info"
-    cookienames = ("nemail", "nlogin")
-    root = "https://nijie.info"
-    view_url = "https://nijie.info/view.php?id="
-    popup_url = "https://nijie.info/view_popup.php?id="
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.user_id = text.parse_int(match.group(1))
+        self._init_category(match)
+        self.cookiedomain = "." + self.root.rpartition("/")[2]
+        self.cookienames = (self.category + "_tok",)
+
+        if self.category == "horne":
+            self._extract_data = self._extract_data_horne
+
+        BaseExtractor.__init__(self, match)
+
+        self.user_id = text.parse_int(match.group(match.lastindex))
         self.user_name = None
         self.session.headers["Referer"] = self.root + "/"
 
@@ -39,13 +39,21 @@ class NijieExtractor(AsynchronousMixin, Extractor):
 
         for image_id in self.image_ids():
 
-            response = self.request(self.view_url + image_id, fatal=False)
+            url = "{}/view.php?id={}".format(self.root, image_id)
+            response = self.request(url, fatal=False)
             if response.status_code >= 400:
                 continue
             page = response.text
 
             data = self._extract_data(page)
             data["image_id"] = text.parse_int(image_id)
+
+            if self.user_name:
+                data["user_id"] = self.user_id
+                data["user_name"] = self.user_name
+            else:
+                data["user_id"] = data["artist_id"]
+                data["user_name"] = data["artist_name"]
             yield Message.Directory, data
 
             for image in self._extract_images(page):
@@ -68,24 +76,41 @@ class NijieExtractor(AsynchronousMixin, Extractor):
             "description": text.unescape(extr(
                 '"description": "', '"').replace("&amp;", "&")),
             "date"       : text.parse_datetime(extr(
-                '"datePublished": "', '"') + "+0900",
-                "%a %b %d %H:%M:%S %Y%z"),
-            "artist_id"  : text.parse_int(extr(
-                '"sameAs": "https://nijie.info/members.php?id=', '"')),
+                '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
+            "artist_id"  : text.parse_int(extr('/members.php?id=', '"')),
+            "artist_name": keywords[1],
+            "tags"       : keywords[2:-1],
+        }
+        return data
+
+    @staticmethod
+    def _extract_data_horne(page):
+        """Extract image metadata from 'page'"""
+        extr = text.extract_from(page)
+        keywords = text.unescape(extr(
+            'name="keywords" content="', '" />')).split(",")
+        data = {
+            "title"      : keywords[0].strip(),
+            "description": text.unescape(extr(
+                'property="og:description" content="', '"')),
+            "artist_id"  : text.parse_int(extr('members.php?id=', '"')),
             "artist_name": keywords[1],
             "tags"       : keywords[2:-1],
+            "date"       : text.parse_datetime(extr(
+                "itemprop='datePublished' content=", "<").rpartition(">")[2],
+                "%Y-%m-%d %H:%M:%S", 9),
         }
-        data["user_id"] = data["artist_id"]
-        data["user_name"] = data["artist_name"]
         return data
 
     @staticmethod
     def _extract_images(page):
         """Extract image URLs from 'page'"""
-        images = text.extract_iter(page, '<a href="./view_popup.php', '</a>')
+        images = text.extract_iter(page, "/view_popup.php", "</a>")
         for num, image in enumerate(images):
-            url = "https:" + text.extract(image, 'src="', '"')[0]
-            url = url.replace("/__rs_l120x120/", "/")
+            src = text.extract(image, 'src="', '"')[0]
+            if not src:
+                continue
+            url = ("https:" + src).replace("/__rs_l120x120/", "/")
             yield text.nameext_from_url(url, {
                 "num": num,
                 "url": url,
@@ -112,7 +137,7 @@ class NijieExtractor(AsynchronousMixin, Extractor):
         data = {"email": username, "password": password, "save": "on"}
 
         response = self.request(url, method="POST", data=data)
-        if "//nijie.info/login.php" in response.text:
+        if "/login.php" in response.text:
             raise exception.AuthenticationError()
         return self.session.cookies
 
@@ -132,12 +157,27 @@ class NijieExtractor(AsynchronousMixin, Extractor):
             params["p"] += 1
 
 
+BASE_PATTERN = NijieExtractor.update({
+    "nijie": {
+        "root": "https://nijie.info",
+        "pattern": r"(?:www\.)?nijie\.info",
+    },
+    "horne": {
+        "root": "https://horne.red",
+        "pattern": r"(?:www\.)?horne\.red",
+    },
+})
+
+
 class NijieUserExtractor(NijieExtractor):
     """Extractor for nijie user profiles"""
     subcategory = "user"
     cookiedomain = None
     pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
-    test = ("https://nijie.info/members.php?id=44",)
+    test = (
+        ("https://nijie.info/members.php?id=44"),
+        ("https://horne.red/members.php?id=58000"),
+    )
 
     def items(self):
         fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
@@ -172,6 +212,25 @@ class NijieIllustrationExtractor(NijieExtractor):
                 "user_name": "ED",
             },
         }),
+        ("https://horne.red/members_illust.php?id=58000", {
+            "pattern": r"https://pic\.nijie\.net/\d+/horne/\d+/\d+/\d+"
+                       r"/illust/\d+_\d+_[0-9a-f]+_[0-9a-f]+\.png",
+            "range": "1-20",
+            "count": 20,
+            "keyword": {
+                "artist_id": 58000,
+                "artist_name": "のえるわ",
+                "date": "type:datetime",
+                "description": str,
+                "image_id": int,
+                "num": int,
+                "tags": list,
+                "title": str,
+                "url": str,
+                "user_id": 58000,
+                "user_name": "のえるわ",
+            },
+        }),
         ("https://nijie.info/members_illust.php?id=43", {
             "exception": exception.NotFoundError,
         }),
@@ -182,34 +241,47 @@ class NijieIllustrationExtractor(NijieExtractor):
 
 
 class NijieDoujinExtractor(NijieExtractor):
-    """Extractor for doujin entries of a nijie-user"""
+    """Extractor for doujin entries of a nijie user"""
     subcategory = "doujin"
     pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)"
-    test = ("https://nijie.info/members_dojin.php?id=6782", {
-        "count": ">= 18",
-        "keyword": {
-            "user_id"  : 6782,
-            "user_name": "ジョニー＠アビオン村",
-        },
-    })
+    test = (
+        ("https://nijie.info/members_dojin.php?id=6782", {
+            "count": ">= 18",
+            "keyword": {
+                "user_id"  : 6782,
+                "user_name": "ジョニー＠アビオン村",
+            },
+        }),
+        ("https://horne.red/members_dojin.php?id=58000"),
+    )
 
     def image_ids(self):
         return self._pagination("members_dojin")
 
 
 class NijieFavoriteExtractor(NijieExtractor):
-    """Extractor for all favorites/bookmarks of a nijie-user"""
+    """Extractor for all favorites/bookmarks of a nijie user"""
     subcategory = "favorite"
     directory_fmt = ("{category}", "bookmarks", "{user_id}")
     archive_fmt = "f_{user_id}_{image_id}_{num}"
     pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)"
-    test = ("https://nijie.info/user_like_illust_view.php?id=44", {
-        "count": ">= 16",
-        "keyword": {
-            "user_id"  : 44,
-            "user_name": "ED",
-        },
-    })
+    test = (
+        ("https://nijie.info/user_like_illust_view.php?id=44", {
+            "count": ">= 16",
+            "keyword": {
+                "user_id"  : 44,
+                "user_name": "ED",
+            },
+        }),
+        ("https://horne.red/user_like_illust_view.php?id=58000", {
+            "range": "1-5",
+            "count": 5,
+            "keyword": {
+                "user_id"  : 58000,
+                "user_name": "のえるわ",
+            },
+        }),
+    )
 
     def image_ids(self):
         return self._pagination("user_like_illust_view")
@@ -227,14 +299,17 @@ class NijieNuitaExtractor(NijieExtractor):
     directory_fmt = ("{category}", "nuita", "{user_id}")
     archive_fmt = "n_{user_id}_{image_id}_{num}"
     pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
-    test = ("https://nijie.info/history_nuita.php?id=728995", {
-        "range": "1-10",
-        "count": 10,
-        "keyword": {
-            "user_id"  : 728995,
-            "user_name": "莚",
-        },
-    })
+    test = (
+        ("https://nijie.info/history_nuita.php?id=728995", {
+            "range": "1-10",
+            "count": 10,
+            "keyword": {
+                "user_id"  : 728995,
+                "user_name": "莚",
+            },
+        }),
+        ("https://horne.red/history_nuita.php?id=58000"),
+    )
 
     def image_ids(self):
         return self._pagination("history_nuita")
@@ -252,7 +327,7 @@ class NijieNuitaExtractor(NijieExtractor):
 
 
 class NijieImageExtractor(NijieExtractor):
-    """Extractor for a work/image from nijie.info"""
+    """Extractor for a nijie work/image"""
     subcategory = "image"
     pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
     test = (
@@ -265,11 +340,26 @@ class NijieImageExtractor(NijieExtractor):
             "count": 0,
         }),
         ("https://nijie.info/view_popup.php?id=70720"),
+        ("https://horne.red/view.php?id=8716", {
+            "count": 4,
+            "keyword": {
+                "artist_id": 58000,
+                "artist_name": "のえるわ",
+                "date": "dt:2018-02-04 14:47:24",
+                "description": "ノエル「そんなことしなくても、"
+                               "言ってくれたら咥えるのに・・・♡」",
+                "image_id": 8716,
+                "tags": ["男の娘", "フェラ", "オリキャラ", "うちのこ"],
+                "title": "ノエル「いまどきそんな、恵方巻ネタなんてやらなくても・・・」",
+                "user_id": 58000,
+                "user_name": "のえるわ",
+            },
+        }),
     )
 
     def __init__(self, match):
         NijieExtractor.__init__(self, match)
-        self.image_id = match.group(1)
+        self.image_id = match.group(match.lastindex)
 
     def image_ids(self):
         return (self.image_id,)
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
new file mode 100644
index 0000000..e1846cc
--- /dev/null
+++ b/gallery_dl/extractor/poipiku.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://poipiku.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?poipiku\.com"
+
+
+class PoipikuExtractor(Extractor):
+    """Base class for poipiku extractors"""
+    category = "poipiku"
+    root = "https://poipiku.com"
+    directory_fmt = ("{category}", "{user_id} {user_name}")
+    filename_fmt = "{post_id}_{num}.{extension}"
+    archive_fmt = "{post_id}_{num}"
+    request_interval = (0.5, 1.5)
+
+    def items(self):
+        password = self.config("password", "")
+
+        for post_url in self.posts():
+            parts = post_url.split("/")
+            if post_url[0] == "/":
+                post_url = self.root + post_url
+            page = self.request(post_url).text
+            extr = text.extract_from(page)
+
+            post = {
+                "post_category": extr("<title>[", "]"),
+                "count"      : extr("(", " "),
+                "post_id"    : parts[-1].partition(".")[0],
+                "user_id"    : parts[-2],
+                "user_name"  : text.unescape(extr(
+                    '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
+                "description": text.unescape(extr(
+                    'class="IllustItemDesc" >', '<')),
+            }
+
+            yield Message.Directory, post
+            post["num"] = 0
+
+            while True:
+                thumb = extr('class="IllustItemThumbImg" src="', '"')
+                if not thumb:
+                    break
+                elif thumb.startswith("/img/"):
+                    continue
+                post["num"] += 1
+                url = text.ensure_http_scheme(thumb[:-8])
+                yield Message.Url, url, text.nameext_from_url(url, post)
+
+            if not extr('</i> show all', '<'):
+                continue
+
+            url = self.root + "/f/ShowAppendFileF.jsp"
+            headers = {
+                "Accept" : "application/json, text/javascript, */*; q=0.01",
+                "X-Requested-With": "XMLHttpRequest",
+                "Origin" : self.root,
+                "Referer": post_url,
+            }
+            data = {
+                "UID": post["user_id"],
+                "IID": post["post_id"],
+                "PAS": password,
+                "MD" : "0",
+                "TWF": "-1",
+            }
+            page = self.request(
+                url, method="POST", headers=headers, data=data).json()["html"]
+
+            for thumb in text.extract_iter(
+                    page, 'class="IllustItemThumbImg" src="', '"'):
+                post["num"] += 1
+                url = text.ensure_http_scheme(thumb[:-8])
+                yield Message.Url, url, text.nameext_from_url(url, post)
+
+
+class PoipikuUserExtractor(PoipikuExtractor):
+    """Extractor for posts from a poipiku user"""
+    subcategory = "user"
+    pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
+               r"(\d+)/?(?:$|[?&#])")
+    test = (
+        ("https://poipiku.com/25049/", {
+            "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
+                       r"/\d+_\w+\.(jpe?g|png)$",
+            "range": "1-10",
+            "count": 10,
+        }),
+        ("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=")
+    )
+
+    def __init__(self, match):
+        PoipikuExtractor.__init__(self, match)
+        self._page, self.user_id = match.groups()
+
+    def posts(self):
+        url = self.root + "/IllustListPcV.jsp"
+        params = {
+            "PG" : text.parse_int(self._page, 0),
+            "ID" : self.user_id,
+            "KWD": "",
+        }
+
+        while True:
+            page = self.request(url, params=params).text
+
+            cnt = 0
+            for path in text.extract_iter(
+                    page, 'class="IllustInfo" href="', '"'):
+                yield path
+                cnt += 1
+
+            if cnt < 48:
+                return
+            params["PG"] += 1
+
+
+class PoipikuPostExtractor(PoipikuExtractor):
+    """Extractor for a poipiku post"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
+    test = (
+        ("https://poipiku.com/25049/5864576.html", {
+            "pattern": r"https://img\.poipiku\.com/user_img03/000025049"
+                       r"/005864576_EWN1Y65gQ\.png$",
+            "keyword": {
+                "count": "1",
+                "description": "",
+                "extension": "png",
+                "filename": "005864576_EWN1Y65gQ",
+                "num": 1,
+                "post_category": "DOODLE",
+                "post_id": "5864576",
+                "user_id": "25049",
+                "user_name": "ユキウサギ",
+            },
+        }),
+        ("https://poipiku.com/2166245/6411749.html", {
+            "pattern": r"https://img\.poipiku\.com/user_img01/002166245"
+                       r"/006411749_\w+\.jpeg$",
+            "count": 4,
+            "keyword": {
+                "count": "4",
+                "description": "絵茶の産物ネタバレあるやつ",
+                "num": int,
+                "post_category": "SPOILER",
+                "post_id": "6411749",
+                "user_id": "2166245",
+                "user_name": "wadahito",
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        PoipikuExtractor.__init__(self, match)
+        self.user_id, self.post_id = match.groups()
+
+    def posts(self):
+        return ("/{}/{}.html".format(self.user_id, self.post_id),)
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index ca7a3c6..a477424 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -130,12 +130,13 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
 
 def beau(url):
     """https://readcomiconline.li/Scripts/rguard.min.js"""
-    if url.startswith("https"):
-        return url
-
     url = url.replace("_x236", "d")
     url = url.replace("_x945", "g")
 
+    if url.startswith("https"):
+        return url
+
+    url, sep, rest = url.partition("?")
     containsS0 = "=s0" in url
     url = url[:-3 if containsS0 else -6]
     url = url[4:22] + url[25:]
@@ -143,4 +144,4 @@ def beau(url):
     url = binascii.a2b_base64(url).decode()
     url = url[0:13] + url[17:]
     url = url[0:-2] + ("=s0" if containsS0 else "=s1600")
-    return "https://2.bp.blogspot.com/" + url
+    return "https://2.bp.blogspot.com/" + url + sep + rest
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 2af917d..2ecb4b6 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -135,10 +135,11 @@ class SkebPostExtractor(SkebExtractor):
             "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ",
             "client": {
                 "avatar_url": "https://pbs.twimg.com/profile_images"
-                              "/1471184042791895042/f0DcWFGl.jpg",
-                "header_url": None,
+                              "/1537488326697287680/yNUbLDgC.jpg",
+                "header_url": "https://pbs.twimg.com/profile_banners"
+                              "/1375007870291300358/1655744756/1500x500",
                 "id": 1196514,
-                "name": "湊ラギ",
+                "name": "湊ラギ♦️🎀Vtuber🎀次回6/23予定",
                 "screen_name": "minato_ragi",
             },
             "completed_at": "2022-02-27T14:03:45.442Z",
@@ -208,3 +209,30 @@ class SkebUserExtractor(SkebExtractor):
             posts = itertools.chain(posts, self._pagination(url, params))
 
         return posts
+
+
+class SkebFollowingExtractor(SkebExtractor):
+    """Extractor for all creators followed by a skeb user"""
+    subcategory = "following"
+    pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
+    test = ("https://skeb.jp/@user/following_creators",)
+
+    def items(self):
+        for user in self.users():
+            url = "{}/@{}".format(self.root, user["screen_name"])
+            user["_extractor"] = SkebUserExtractor
+            yield Message.Queue, url, user
+
+    def users(self):
+        url = "{}/api/users/{}/following_creators".format(
+            self.root, self.user_name)
+        headers = {"Referer": self.root, "Authorization": "Bearer null"}
+        params = {"sort": "date", "offset": 0, "limit": 90}
+
+        while True:
+            data = self.request(url, params=params, headers=headers).json()
+            yield from data
+
+            if len(data) < params["limit"]:
+                return
+            params["offset"] += params["limit"]
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2737d34..a0d6194 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -40,6 +40,7 @@ class TwitterExtractor(Extractor):
         self.quoted = self.config("quoted", False)
         self.videos = self.config("videos", True)
         self.cards = self.config("cards", False)
+        self._user_id = None
         self._user_cache = {}
         self._init_sizes()
 
@@ -59,6 +60,10 @@ class TwitterExtractor(Extractor):
         self.api = TwitterAPI(self)
         metadata = self.metadata()
 
+        if self.config("expand"):
+            tweets = self._expand_tweets(self.tweets())
+            self.tweets = lambda : tweets
+
         for tweet in self.tweets():
 
             if "legacy" in tweet:
@@ -75,7 +80,8 @@ class TwitterExtractor(Extractor):
             if "in_reply_to_user_id_str" in data and (
                 not self.replies or (
                     self.replies == "self" and
-                    data["in_reply_to_user_id_str"] != data["user_id_str"]
+                    (self._user_id or data["in_reply_to_user_id_str"]) !=
+                    data["user_id_str"]
                 )
             ):
                 self.log.debug("Skipping %s (reply)", data["id_str"])
@@ -338,6 +344,22 @@ class TwitterExtractor(Extractor):
             user["_extractor"] = cls
             yield Message.Queue, fmt(user), user
 
+    def _expand_tweets(self, tweets):
+        seen = set()
+        for tweet in tweets:
+
+            if "legacy" in tweet:
+                cid = tweet["legacy"]["conversation_id_str"]
+            else:
+                cid = tweet["conversation_id_str"]
+
+            if cid not in seen:
+                seen.add(cid)
+                try:
+                    yield from self.api.tweet_detail(cid)
+                except Exception:
+                    yield tweet
+
     def metadata(self):
         """Return general metadata"""
         return {}
@@ -418,12 +440,12 @@ class TwitterTimelineExtractor(TwitterExtractor):
             self.user = "id:" + user_id
 
     def tweets(self):
-        tweets = (self.api.user_tweets(self.user) if self.retweets else
-                  self.api.user_media(self.user))
+        tweets = (self.api.user_tweets if self.retweets else
+                  self.api.user_media)
 
         # yield initial batch of (media) tweets
         tweet = None
-        for tweet in tweets:
+        for tweet in tweets(self.user):
             yield tweet
 
         if tweet is None:
@@ -442,12 +464,17 @@ class TwitterTimelineExtractor(TwitterExtractor):
         if "legacy" in tweet:
             tweet = tweet["legacy"]
 
+        # build search query
+        query = "from:{} max_id:{}".format(username, tweet["id_str"])
+        if self.retweets:
+            query += " include:retweets include:nativeretweets"
+        if not self.textonly:
+            query += (" (filter:images OR"
+                      " filter:native_video OR"
+                      " card_name:animated_gif)")
+
         # yield search results starting from last tweet id
-        yield from self.api.search_adaptive(
-            "from:{} include:retweets include:nativeretweets max_id:{} "
-            "filter:images OR card_name:animated_gif OR filter:native_video"
-            .format(username, tweet["id_str"])
-        )
+        yield from self.api.search_adaptive(query)
 
 
 class TwitterTweetsExtractor(TwitterExtractor):
@@ -694,10 +721,10 @@ class TwitterTweetExtractor(TwitterExtractor):
                 "date"      : "dt:2020-08-20 04:00:28",
             },
         }),
-        # all Tweets from a conversation (#1319)
-        ("https://twitter.com/BlankArts_/status/1323314488611872769", {
+        # all Tweets from a 'conversation' (#1319)
+        ("https://twitter.com/supernaturepics/status/604341487988576256", {
             "options": (("conversations", True),),
-            "count": ">= 50",
+            "count": 5,
         }),
         # retweet with missing media entities (#1555)
         ("https://twitter.com/morino_ya/status/1392763691599237121", {
@@ -845,8 +872,11 @@ class TwitterAPI():
         cookies = extractor.session.cookies
         cookiedomain = extractor.cookiedomain
 
-        # CSRF
-        csrf_token = cookies.get("ct0", domain=cookiedomain)
+        csrf = extractor.config("csrf")
+        if csrf is None or csrf == "cookies":
+            csrf_token = cookies.get("ct0", domain=cookiedomain)
+        else:
+            csrf_token = None
         if not csrf_token:
             csrf_token = util.generate_token()
             cookies.set("ct0", csrf_token, domain=cookiedomain)
@@ -1000,19 +1030,23 @@ class TwitterAPI():
     def _user_id_by_screen_name(self, screen_name):
         if screen_name.startswith("id:"):
             self._user = util.SENTINEL
-            return screen_name[3:]
+            user_id = screen_name[3:]
 
-        user = ()
-        try:
-            user = self._user = self.user_by_screen_name(screen_name)
-            return user["rest_id"]
-        except KeyError:
-            if "unavailable_message" in user:
-                raise exception.NotFoundError("{} ({})".format(
-                    user["unavailable_message"].get("text"),
-                    user.get("reason")), False)
-            else:
-                raise exception.NotFoundError("user")
+        else:
+            user = ()
+            try:
+                user = self._user = self.user_by_screen_name(screen_name)
+                user_id = user["rest_id"]
+            except KeyError:
+                if "unavailable_message" in user:
+                    raise exception.NotFoundError("{} ({})".format(
+                        user["unavailable_message"].get("text"),
+                        user.get("reason")), False)
+                else:
+                    raise exception.NotFoundError("user")
+
+        self.extractor._user_id = user_id
+        return user_id
 
     @cache(maxage=3600)
     def _guest_token(self):
@@ -1228,6 +1262,8 @@ class TwitterAPI():
                     tweets.append(entry)
                 elif esw("cursor-bottom-"):
                     cursor = entry["content"]
+                    if "itemContent" in cursor:
+                        cursor = cursor["itemContent"]
                     if not cursor.get("stopOnEmptyResponse", True):
                         # keep going even if there are no tweets
                         tweet = True
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index ad1617c..c29d730 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -30,12 +30,16 @@ class UnsplashExtractor(Extractor):
 
     def items(self):
         fmt = self.config("format") or "raw"
+        metadata = self.metadata()
+
         for photo in self.photos():
             util.delete_items(
                 photo, ("current_user_collections", "related_collections"))
             url = photo["urls"][fmt]
             text.nameext_from_url(url, photo)
 
+            if metadata:
+                photo.update(metadata)
             photo["extension"] = "jpg"
             photo["date"] = text.parse_datetime(photo["created_at"])
             if "tags" in photo:
@@ -44,6 +48,10 @@ class UnsplashExtractor(Extractor):
             yield Message.Directory, photo
             yield Message.Url, url, photo
 
+    @staticmethod
+    def metadata():
+        return None
+
     def skip(self, num):
         pages = num // self.per_page
         self.page_start += pages
@@ -172,17 +180,27 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
 class UnsplashCollectionExtractor(UnsplashExtractor):
     """Extractor for an unsplash collection"""
     subcategory = "collection"
-    pattern = BASE_PATTERN + r"/collections/([^/?#]+)"
+    pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?"
     test = (
         ("https://unsplash.com/collections/3178572/winter", {
             "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
                        r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+            "keyword": {"collection_id": "3178572",
+                        "collection_title": "winter"},
             "range": "1-30",
             "count": 30,
         }),
+        ("https://unsplash.com/collections/3178572/"),
         ("https://unsplash.com/collections/_8qJQ2bCMWE/2021.05"),
     )
 
+    def __init__(self, match):
+        UnsplashExtractor.__init__(self, match)
+        self.title = match.group(2) or ""
+
+    def metadata(self):
+        return {"collection_id": self.item, "collection_title": self.title}
+
     def photos(self):
         url = "{}/napi/collections/{}/photos".format(self.root, self.item)
         params = {"order_by": "latest"}
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 23f6ea2..ab2153f 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -40,12 +40,12 @@ class VkExtractor(Extractor):
                 continue
 
             try:
-                photo["url"], photo["width"], photo["height"] = photo[size]
+                _, photo["width"], photo["height"] = photo[size]
             except ValueError:
                 # photo without width/height entries (#2535)
-                photo["url"] = photo[size + "src"]
                 photo["width"] = photo["height"] = 0
 
+            photo["url"] = photo[size + "src"]
             photo["id"] = photo["id"].rpartition("_")[2]
             photo.update(data)
 
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index a7068c8..68871c8 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -52,10 +52,6 @@ class WeiboExtractor(Extractor):
 
         for status in self.statuses():
 
-            status["date"] = text.parse_datetime(
-                status["created_at"], "%a %b %d %H:%M:%S %z %Y")
-            yield Message.Directory, status
-
             if self.retweets and "retweeted_status" in status:
                 if original_retweets:
                     status = status["retweeted_status"]
@@ -68,6 +64,10 @@ class WeiboExtractor(Extractor):
             else:
                 files = self._files_from_status(status)
 
+            status["date"] = text.parse_datetime(
+                status["created_at"], "%a %b %d %H:%M:%S %z %Y")
+            yield Message.Directory, status
+
             for num, file in enumerate(files, 1):
                 if file["url"].startswith("http:"):
                     file["url"] = "https:" + file["url"][5:]
@@ -191,7 +191,9 @@ class WeiboUserExtractor(WeiboExtractor):
     subcategory = "user"
     pattern = USER_PATTERN + r"(?:$|#)"
     test = (
-        ("https://weibo.com/1758989602"),
+        ("https://weibo.com/1758989602", {
+            "pattern": r"^https://weibo\.com/u/1758989602\?tabtype=feed$",
+        }),
         ("https://weibo.com/u/1758989602"),
         ("https://weibo.com/p/1758989602"),
         ("https://m.weibo.cn/profile/2314621010"),
@@ -200,12 +202,13 @@ class WeiboUserExtractor(WeiboExtractor):
     )
 
     def items(self):
-        base = " {}/u/{}?tabtype=".format(self.root, self._user_id())
+        base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
         return self._dispatch_extractors((
-            (WeiboHomeExtractor  , base + "home"),
-            (WeiboFeedExtractor  , base + "feed"),
-            (WeiboVideosExtractor, base + "newVideo"),
-            (WeiboAlbumExtractor , base + "album"),
+            (WeiboHomeExtractor    , base + "home"),
+            (WeiboFeedExtractor    , base + "feed"),
+            (WeiboVideosExtractor  , base + "video"),
+            (WeiboNewvideoExtractor, base + "newVideo"),
+            (WeiboAlbumExtractor   , base + "album"),
         ), ("feed",))
 
 
@@ -254,8 +257,27 @@ class WeiboFeedExtractor(WeiboExtractor):
 
 
 class WeiboVideosExtractor(WeiboExtractor):
-    """Extractor for weibo 'newVideo' listings"""
+    """Extractor for weibo 'video' listings"""
     subcategory = "videos"
+    pattern = USER_PATTERN + r"\?tabtype=video"
+    test = ("https://weibo.com/1758989602?tabtype=video", {
+        "pattern": r"https://f\.(video\.weibocdn\.com|us\.sinaimg\.cn)"
+                   r"/(../)?\w+\.mp4\?label=mp",
+        "range": "1-30",
+        "count": 30,
+    })
+
+    def statuses(self):
+        endpoint = "/profile/getprofilevideolist"
+        params = {"uid": self._user_id()}
+
+        for status in self._pagination(endpoint, params):
+            yield status["video_detail_vo"]
+
+
+class WeiboNewvideoExtractor(WeiboExtractor):
+    """Extractor for weibo 'newVideo' listings"""
+    subcategory = "newvideo"
     pattern = USER_PATTERN + r"\?tabtype=newVideo"
     test = ("https://weibo.com/1758989602?tabtype=newVideo", {
         "pattern": r"https://f\.video\.weibocdn\.com/(../)?\w+\.mp4\?label=mp",
@@ -336,8 +358,8 @@ class WeiboStatusExtractor(WeiboExtractor):
         }),
         # type == gif
         ("https://weibo.com/1758989602/LvBhm5DiP", {
-            "pattern": r"http://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM01041"
-                       r"20005tc0E010\.mp4\?label=gif_mp4",
+            "pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104"
+                       r"120005tc0E010\.mp4\?label=gif_mp4",
         }),
         ("https://m.weibo.cn/status/4339748116375525"),
         ("https://m.weibo.cn/5746766133/4339748116375525"),
author	Unit 193 <unit193@unit193.net>	2022-06-28 19:54:18 -0400
committer	Unit 193 <unit193@unit193.net>	2022-06-28 19:54:18 -0400
commit	ce35450b5308adab049c5bd99095986d4c607027 (patch)
tree	f0c2b600f8ef720941bdf615164b942c6c4a5d07 /gallery_dl/extractor
parent	25442ea49f031d4d2df3353dd7e9ad2080e332da (diff)