New upstream version 1.21.0.upstream/1.21.0

author: Unit 193 <unit193@unit193.net> 2022-03-15 00:19:57 -0400
committer: Unit 193 <unit193@unit193.net> 2022-03-15 00:19:57 -0400
commit: c2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (patch)
tree: a14107397b5bcb491aa4f4fb3e0feb4582e1879b /gallery_dl/extractor
parent: 7900ee4e3692dbd8056c3e47c81bb22eda030b65 (diff)
25 files changed, 922 insertions, 209 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b52561e..1bec48e 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -67,6 +67,7 @@ modules = [
     "keenspot",
     "kemonoparty",
     "khinsider",
+    "kissgoddess",
     "kohlchan",
     "komikcast",
     "lightroom",
@@ -81,6 +82,7 @@ modules = [
     "mangapark",
     "mangasee",
     "mangoxo",
+    "mememuseum",
     "myhentaigallery",
     "myportfolio",
     "naver",
@@ -123,9 +125,11 @@ modules = [
     "speakerdeck",
     "subscribestar",
     "tapas",
+    "toyhouse",
     "tsumino",
     "tumblr",
     "tumblrgallery",
+    "twibooru",
     "twitter",
     "unsplash",
     "vanillarock",
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index a42ec53..12d98b1 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -41,9 +41,9 @@ class BooruExtractor(BaseExtractor):
                 page_html = self._extended_tags(post)
             if notes:
                 self._notes(post, page_html)
-            self._prepare(post)
-            post.update(data)
             text.nameext_from_url(url, post)
+            post.update(data)
+            self._prepare(post)
 
             yield Message.Directory, post
             yield Message.Url, url, post
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5a2d3a3..e3559f9 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -55,6 +55,7 @@ class Extractor():
         self._retries = self.config("retries", 4)
         self._timeout = self.config("timeout", 30)
         self._verify = self.config("verify", True)
+        self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
         self._interval = util.build_duration_func(
             self.config("sleep-request", self.request_interval),
             self.request_interval_min,
@@ -65,7 +66,6 @@ class Extractor():
 
         self._init_session()
         self._init_cookies()
-        self._init_proxies()
 
     @classmethod
     def from_url(cls, url):
@@ -104,10 +104,12 @@ class Extractor():
 
     def request(self, url, *, method="GET", session=None, retries=None,
                 encoding=None, fatal=True, notfound=None, **kwargs):
-        if retries is None:
-            retries = self._retries
         if session is None:
             session = self.session
+        if retries is None:
+            retries = self._retries
+        if "proxies" not in kwargs:
+            kwargs["proxies"] = self._proxies
         if "timeout" not in kwargs:
             kwargs["timeout"] = self._timeout
         if "verify" not in kwargs:
@@ -289,20 +291,6 @@ class Extractor():
         session.mount("https://", adapter)
         session.mount("http://", adapter)
 
-    def _init_proxies(self):
-        """Update the session's proxy map"""
-        proxies = self.config("proxy")
-        if proxies:
-            if isinstance(proxies, str):
-                proxies = {"http": proxies, "https": proxies}
-            if isinstance(proxies, dict):
-                for scheme, proxy in proxies.items():
-                    if "://" not in proxy:
-                        proxies[scheme] = "http://" + proxy.lstrip("/")
-                self.session.proxies = proxies
-            else:
-                self.log.warning("invalid proxy specifier: %s", proxies)
-
     def _init_cookies(self):
         """Populate the session's cookiejar"""
         self._cookiefile = None
@@ -371,20 +359,25 @@ class Extractor():
         for cookie in self._cookiejar:
             if cookie.name in names and (
                     not domain or cookie.domain == domain):
+
                 if cookie.expires:
                     diff = int(cookie.expires - now)
+
                     if diff <= 0:
                         self.log.warning(
                             "Cookie '%s' has expired", cookie.name)
+                        continue
+
                     elif diff <= 86400:
                         hours = diff // 3600
                         self.log.warning(
                             "Cookie '%s' will expire in less than %s hour%s",
                             cookie.name, hours + 1, "s" if hours else "")
-                else:
-                    names.discard(cookie.name)
-                    if not names:
-                        return True
+                        continue
+
+                names.discard(cookie.name)
+                if not names:
+                    return True
         return False
 
     def _prepare_ddosguard_cookies(self):
@@ -616,8 +609,7 @@ class BaseExtractor(Extractor):
                     if index:
                         self.category, self.root = self.instances[index-1]
                         if not self.root:
-                            url = text.ensure_http_scheme(match.group(0))
-                            self.root = url[:url.index("/", 8)]
+                            self.root = text.root_from_url(match.group(0))
                     else:
                         self.root = group
                         self.category = group.partition("://")[2]
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 94fec16..fda7220 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -1004,6 +1004,7 @@ class DeviantartOAuthAPI():
         self.extractor = extractor
         self.log = extractor.log
         self.headers = {"dA-minor-version": "20200519"}
+        self._warn_429 = True
 
         self.delay = extractor.config("wait-min", 0)
         self.delay_min = max(2, self.delay)
@@ -1260,6 +1261,16 @@ class DeviantartOAuthAPI():
                 if self.delay < 30:
                     self.delay += 1
                 self.log.warning("%s. Using %ds delay.", msg, self.delay)
+
+                if self._warn_429 and self.delay >= 3:
+                    self._warn_429 = False
+                    if self.client_id == self.CLIENT_ID:
+                        self.log.info(
+                            "Register your own OAuth application and use its "
+                            "credentials to prevent this error: "
+                            "https://github.com/mikf/gallery-dl/blob/master/do"
+                            "cs/configuration.rst#extractordeviantartclient-id"
+                            "--client-secret")
             else:
                 self.log.error(msg)
                 return data
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index ef79808..11436cb 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -51,19 +51,16 @@ class FanboxExtractor(Extractor):
             url = text.ensure_http_scheme(url)
             body = self.request(url, headers=headers).json()["body"]
             for item in body["items"]:
-                yield self._process_post(item)
+                yield self._get_post_data(item["id"])
 
             url = body["nextUrl"]
 
-    def _get_post_data_from_id(self, post_id):
+    def _get_post_data(self, post_id):
         """Fetch and process post data"""
         headers = {"Origin": self.root}
         url = "https://api.fanbox.cc/post.info?postId="+post_id
         post = self.request(url, headers=headers).json()["body"]
 
-        return self._process_post(post)
-
-    def _process_post(self, post):
         content_body = post.pop("body", None)
         if content_body:
             if "html" in content_body:
@@ -279,7 +276,7 @@ class FanboxPostExtractor(FanboxExtractor):
         self.post_id = match.group(3)
 
     def posts(self):
-        return (self._get_post_data_from_id(self.post_id),)
+        return (self._get_post_data(self.post_id),)
 
 
 class FanboxRedirectExtractor(Extractor):
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 89a965f..c05ec39 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -8,6 +8,7 @@
 
 from .common import Extractor, Message
 from .. import text
+import json
 
 
 class FantiaExtractor(Extractor):
@@ -29,7 +30,9 @@ class FantiaExtractor(Extractor):
         for post_id in self.posts():
             full_response, post = self._get_post_data(post_id)
             yield Message.Directory, post
+            post["num"] = 0
             for url, url_data in self._get_urls_from_post(full_response, post):
+                post["num"] += 1
                 fname = url_data["content_filename"] or url
                 text.nameext_from_url(fname, url_data)
                 url_data["file_url"] = url
@@ -90,14 +93,39 @@ class FantiaExtractor(Extractor):
             post["content_title"] = content["title"]
             post["content_filename"] = content.get("filename", "")
             post["content_id"] = content["id"]
+
+            if "comment" in content:
+                post["content_comment"] = content["comment"]
+
             if "post_content_photos" in content:
                 for photo in content["post_content_photos"]:
                     post["file_id"] = photo["id"]
                     yield photo["url"]["original"], post
+
             if "download_uri" in content:
                 post["file_id"] = content["id"]
                 yield self.root+"/"+content["download_uri"], post
 
+            if content["category"] == "blog" and "comment" in content:
+                comment_json = json.loads(content["comment"])
+                ops = comment_json.get("ops", ())
+
+                # collect blogpost text first
+                blog_text = ""
+                for op in ops:
+                    insert = op.get("insert")
+                    if isinstance(insert, str):
+                        blog_text += insert
+                post["blogpost_text"] = blog_text
+
+                # collect images
+                for op in ops:
+                    insert = op.get("insert")
+                    if isinstance(insert, dict) and "fantiaImage" in insert:
+                        img = insert["fantiaImage"]
+                        post["file_id"] = img["id"]
+                        yield "https://fantia.jp" + img["original_url"], post
+
 
 class FantiaCreatorExtractor(FantiaExtractor):
     """Extractor for a Fantia creator's works"""
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index 7dd047c..b4f433b 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -57,6 +57,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
         self.root = text.ensure_http_scheme(root)
         url = "{}/story/{}/".format(self.root, self.slug)
         GalleryExtractor.__init__(self, match, url)
+        self.session.headers["Referer"] = url
 
     def metadata(self, page):
         title = text.extract(page, "<title>", "</title>")[0]
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 9370840..7cd67d6 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -10,45 +10,40 @@
 
 from .common import Extractor, Message
 from .. import text, exception
+import re
 
 
 class ImagebamExtractor(Extractor):
     """Base class for imagebam extractors"""
     category = "imagebam"
     root = "https://www.imagebam.com"
-    cookies = None
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.key = match.group(1)
-        if self.cookies:
-            self.session.cookies = self.cookies
-
-    def get_image_data(self, data):
-        page_url = "{}/image/{}".format(self.root, data["image_key"])
-        page = self.request(page_url).text
-        image_url, pos = text.extract(page, '<img src="https://images', '"')
-
-        if not image_url:
-            # cache cookies
-            ImagebamExtractor.cookies = self.session.cookies
-            # repeat request to get past "Continue to your image" pages
-            page = self.request(page_url).text
-            image_url, pos = text.extract(
-                page, '<img src="https://images', '"')
+        self.path = match.group(1)
+        self.session.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
 
+    def _parse_image_page(self, path):
+        page = self.request(self.root + path).text
+        url, pos = text.extract(page, '<img src="https://images', '"')
         filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
-        data["url"] = "https://images" + image_url
+
+        data = {
+            "url"      : "https://images" + url,
+            "image_key": path.rpartition("/")[2],
+        }
         data["filename"], _, data["extension"] = filename.rpartition(".")
+        return data
 
 
 class ImagebamGalleryExtractor(ImagebamExtractor):
-    """Extractor for image galleries from imagebam.com"""
+    """Extractor for imagebam galleries"""
     subcategory = "gallery"
     directory_fmt = ("{category}", "{title} {gallery_key}")
     filename_fmt = "{num:>03} {filename}.{extension}"
     archive_fmt = "{gallery_key}_{image_key}"
-    pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
+    pattern = (r"(?:https?://)?(?:www\.)?imagebam\.com"
+               r"(/(?:gallery/|view/G)[a-zA-Z0-9]+)")
     test = (
         ("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
             "url": "76d976788ae2757ac81694736b07b72356f5c4c8",
@@ -63,50 +58,56 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
         ("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
             "exception": exception.HttpError,
         }),
+        # /view/ path (#2378)
+        ("https://www.imagebam.com/view/GA3MT1", {
+            "url": "35018ce1e00a2d2825a33d3cd37857edaf804919",
+            "keyword": "3a9f98178f73694c527890c0d7ca9a92b46987ba",
+        }),
     )
 
     def items(self):
-        url = "{}/gallery/{}".format(self.root, self.key)
-        page = self.request(url).text
+        page = self.request(self.root + self.path).text
 
-        data = self.get_metadata(page)
-        keys = self.get_image_keys(page)
-        keys.reverse()
-        data["count"] = len(keys)
-        data["gallery_key"] = self.key
+        images = self.images(page)
+        images.reverse()
+
+        data = self.metadata(page)
+        data["count"] = len(images)
+        data["gallery_key"] = self.path.rpartition("/")[2]
 
         yield Message.Directory, data
-        for data["num"], data["image_key"] in enumerate(keys, 1):
-            self.get_image_data(data)
-            yield Message.Url, data["url"], data
+        for data["num"], path in enumerate(images, 1):
+            image = self._parse_image_page(path)
+            image.update(data)
+            yield Message.Url, image["url"], image
 
     @staticmethod
-    def get_metadata(page):
-        """Return gallery metadata"""
-        title = text.extract(page, 'id="gallery-name">', '<')[0]
-        return {"title": text.unescape(title.strip())}
-
-    def get_image_keys(self, page):
-        """Return a list of all image keys"""
-        keys = []
+    def metadata(page):
+        return {"title": text.unescape(text.extract(
+            page, 'id="gallery-name">', '<')[0].strip())}
+
+    def images(self, page):
+        findall = re.compile(r'<a href="https://www\.imagebam\.com'
+                             r'(/(?:image/|view/M)[a-zA-Z0-9]+)').findall
+
+        paths = []
         while True:
-            keys.extend(text.extract_iter(
-                page, '<a href="https://www.imagebam.com/image/', '"'))
+            paths += findall(page)
             pos = page.find('rel="next" aria-label="Next')
             if pos > 0:
                 url = text.rextract(page, 'href="', '"', pos)[0]
                 if url:
                     page = self.request(url).text
                     continue
-            return keys
+            return paths
 
 
 class ImagebamImageExtractor(ImagebamExtractor):
-    """Extractor for single images from imagebam.com"""
+    """Extractor for single imagebam images"""
     subcategory = "image"
     archive_fmt = "{image_key}"
     pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
-               r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
+               r"(/(?:image/|view/M|(?:[0-9a-f]{2}/){3})[a-zA-Z0-9]+)")
     test = (
         ("https://www.imagebam.com/image/94d56c502511890", {
             "url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
@@ -118,10 +119,19 @@ class ImagebamImageExtractor(ImagebamExtractor):
         ("https://www.imagebam.com/image/0850951366904951", {
             "url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
         }),
+        # /view/ path (#2378)
+        ("https://www.imagebam.com/view/ME8JOQP", {
+            "url": "4dca72bbe61a0360185cf4ab2bed8265b49565b8",
+            "keyword": "15a494c02fd30846b41b42a26117aedde30e4ceb",
+            "content": "f81008666b17a42d8834c4749b910e1dc10a6e83",
+        }),
     )
 
     def items(self):
-        data = {"image_key": self.key}
-        self.get_image_data(data)
-        yield Message.Directory, data
-        yield Message.Url, data["url"], data
+        path = self.path
+        if path[3] == "/":
+            path = ("/view/" if path[10] == "M" else "/image/") + path[10:]
+
+        image = self._parse_image_page(path)
+        yield Message.Directory, image
+        yield Message.Url, image["url"], image
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index b898e3b..9537263 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,7 @@ from ..cache import cache
 import itertools
 import re
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?(kemono|coomer)\.party"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.party"
 USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
 
 
@@ -23,15 +23,15 @@ class KemonopartyExtractor(Extractor):
     category = "kemonoparty"
     root = "https://kemono.party"
     directory_fmt = ("{category}", "{service}", "{user}")
-    filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
+    filename_fmt = "{id}_{title}_{num:>02}_{filename[:180]}.{extension}"
     archive_fmt = "{service}_{user}_{id}_{num}"
     cookiedomain = ".kemono.party"
 
     def __init__(self, match):
         if match.group(1) == "coomer":
             self.category = "coomerparty"
-            self.root = "https://coomer.party"
             self.cookiedomain = ".coomer.party"
+        self.root = text.root_from_url(match.group(0))
         Extractor.__init__(self, match)
 
     def items(self):
@@ -291,6 +291,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
         }),
         ("https://kemono.party/subscribestar/user/alcorart/post/184330"),
         ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
+        ("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
     )
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
new file mode 100644
index 0000000..85ec806
--- /dev/null
+++ b/gallery_dl/extractor/kissgoddess.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://kissgoddess.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, exception
+
+
+class KissgoddessGalleryExtractor(GalleryExtractor):
+    """Extractor for image galleries on kissgoddess.com"""
+    category = "kissgoddess"
+    root = "https://kissgoddess.com"
+    pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/album/(\d+)"
+    test = ("https://kissgoddess.com/album/18285.html", {
+        "pattern": r"https://pic\.kissgoddess\.com"
+                   r"/gallery/16473/18285/s/\d+\.jpg",
+        "count": 8,
+        "keyword": {
+            "gallery_id": 18285,
+            "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
+        },
+    })
+
+    def __init__(self, match):
+        self.gallery_id = match.group(1)
+        url = "{}/album/{}.html".format(self.root, self.gallery_id)
+        GalleryExtractor.__init__(self, match, url)
+
+    def metadata(self, page):
+        return {
+            "gallery_id": text.parse_int(self.gallery_id),
+            "title"     : text.extract(
+                page, '<title>', "<")[0].rpartition(" | ")[0],
+        }
+
+    def images(self, page):
+        pnum = 1
+
+        while page:
+            for url in text.extract_iter(page, "<img src='", "'"):
+                yield url, None
+
+            pnum += 1
+            url = "{}/album/{}_{}.html".format(
+                self.root, self.gallery_id, pnum)
+            try:
+                page = self.request(url).text
+            except exception.HttpError:
+                return
+
+
+class KissgoddessModelExtractor(Extractor):
+    """Extractor for all galleries of a model on kissgoddess.com"""
+    category = "kissgoddess"
+    subcategory = "model"
+    root = "https://kissgoddess.com"
+    pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/people/([^./?#]+)"
+    test = ("https://kissgoddess.com/people/aya-hazuki.html", {
+        "pattern": KissgoddessGalleryExtractor.pattern,
+        "count": ">= 7",
+    })
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.model = match.group(1)
+
+    def items(self):
+        url = "{}/people/{}.html".format(self.root, self.model)
+        page = self.request(url).text
+
+        data = {"_extractor": KissgoddessGalleryExtractor}
+        for path in text.extract_iter(page, 'thumb"><a href="/album/', '"'):
+            url = self.root + "/album/" + path
+            yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index c63fa51..43377bd 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -44,7 +44,7 @@ class LolisafelbumExtractor(LolisafeExtractor):
         }),
         # mp4 (#2239)
         ("https://bunkr.is/a/ptRHaCn2", {
-            "pattern": r"https://cdn\.bunkr\.is/_-RnHoW69L\.mp4",
+            "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
             "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
         }),
         ("https://bunkr.to/a/Lktg9Keq"),
@@ -73,9 +73,8 @@ class LolisafelbumExtractor(LolisafeExtractor):
             data["name"], sep, data["id"] = data["filename"].rpartition("-")
 
             if data["extension"] == "mp4":
-                data["_http_validate"] = self._check_rewrite
-            else:
-                data["_http_validate"] = None
+                url = url.replace(
+                    "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
             yield Message.Url, url, data
 
     def fetch_album(self, album_id):
@@ -87,13 +86,3 @@ class LolisafelbumExtractor(LolisafeExtractor):
             "album_name": text.unescape(data["title"]),
             "count"     : data["count"],
         }
-
-    @staticmethod
-    def _check_rewrite(response):
-        if response.history and response.headers.get(
-                "Content-Type").startswith("text/html"):
-            # consume content to reuse connection
-            response.content
-            # rewrite to download URL
-            return response.url.replace("/v/", "/d/", 1)
-        return True
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 152da4f..7194757 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -53,7 +53,10 @@ class MangadexExtractor(Extractor):
 
         cattributes = chapter["attributes"]
         mattributes = manga["attributes"]
-        lang = cattributes["translatedLanguage"].partition("-")[0]
+
+        lang = cattributes.get("translatedLanguage")
+        if lang:
+            lang = lang.partition("-")[0]
 
         if cattributes["chapter"]:
             chnum, sep, minor = cattributes["chapter"].partition(".")
diff --git a/gallery_dl/extractor/mememuseum.py b/gallery_dl/extractor/mememuseum.py
new file mode 100644
index 0000000..1de0d76
--- /dev/null
+++ b/gallery_dl/extractor/mememuseum.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://meme.museum/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class MememuseumExtractor(Extractor):
+    """Base class for meme.museum extractors"""
+    basecategory = "booru"
+    category = "mememuseum"
+    filename_fmt = "{category}_{id}_{md5}.{extension}"
+    archive_fmt = "{id}"
+    root = "https://meme.museum"
+
+    def items(self):
+        data = self.metadata()
+
+        for post in self.posts():
+            url = post["file_url"]
+            for key in ("id", "width", "height"):
+                post[key] = text.parse_int(post[key])
+            post["tags"] = text.unquote(post["tags"])
+            post.update(data)
+            yield Message.Directory, post
+            yield Message.Url, url, text.nameext_from_url(url, post)
+
+    def metadata(self):
+        """Return general metadata"""
+        return ()
+
+    def posts(self):
+        """Return an iterable containing data of all relevant posts"""
+        return ()
+
+
+class MememuseumTagExtractor(MememuseumExtractor):
+    """Extractor for images from meme.museum by search-tags"""
+    subcategory = "tag"
+    directory_fmt = ("{category}", "{search_tags}")
+    pattern = r"(?:https?://)?meme\.museum/post/list/([^/?#]+)"
+    test = ("https://meme.museum/post/list/animated/1", {
+        "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
+        "count": ">= 30"
+    })
+    per_page = 25
+
+    def __init__(self, match):
+        MememuseumExtractor.__init__(self, match)
+        self.tags = text.unquote(match.group(1))
+
+    def metadata(self):
+        return {"search_tags": self.tags}
+
+    def posts(self):
+        pnum = 1
+        while True:
+            url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
+            extr = text.extract_from(self.request(url).text)
+
+            while True:
+                mime = extr("data-mime='", "'")
+                if not mime:
+                    break
+
+                pid = extr("data-post-id='", "'")
+                tags, dimensions, size = extr("title='", "'").split(" // ")
+                md5 = extr("/_thumbs/", "/")
+                width, _, height = dimensions.partition("x")
+
+                yield {
+                    "file_url": "{}/_images/{}/{}%20-%20{}.{}".format(
+                        self.root, md5, pid, text.quote(tags),
+                        mime.rpartition("/")[2]),
+                    "id": pid, "md5": md5, "tags": tags,
+                    "width": width, "height": height,
+                    "size": text.parse_bytes(size[:-1]),
+                }
+
+            if not extr(">Next<", ">"):
+                return
+            pnum += 1
+
+
+class MememuseumPostExtractor(MememuseumExtractor):
+    """Extractor for single images from meme.museum"""
+    subcategory = "post"
+    pattern = r"(?:https?://)?meme\.museum/post/view/(\d+)"
+    test = ("https://meme.museum/post/view/10243", {
+        "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc4997"
+                   r"1f78/10243%20-%20g%20beard%20open_source%20richard_stallm"
+                   r"an%20stallman%20tagme%20text\.jpg",
+        "keyword": "3c8009251480cf17248c08b2b194dc0c4d59580e",
+        "content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
+    })
+
+    def __init__(self, match):
+        MememuseumExtractor.__init__(self, match)
+        self.post_id = match.group(1)
+
+    def posts(self):
+        url = "{}/post/view/{}".format(self.root, self.post_id)
+        extr = text.extract_from(self.request(url).text)
+
+        return ({
+            "id"      : self.post_id,
+            "tags"    : extr(": ", "<"),
+            "md5"     : extr("/_thumbs/", "/"),
+            "file_url": self.root + extr("id='main_image' src='", "'"),
+            "width"   : extr("data-width=", " ").strip("'\""),
+            "height"  : extr("data-height=", " ").strip("'\""),
+            "size"    : 0,
+        },)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 54e2040..6d0e94b 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -103,7 +103,7 @@ class NewgroundsExtractor(Extractor):
         }
 
     def extract_post(self, post_url):
-
+        url = post_url
         if "/art/view/" in post_url:
             extract_data = self._extract_image_data
         elif "/audio/listen/" in post_url:
@@ -111,18 +111,19 @@ class NewgroundsExtractor(Extractor):
         else:
             extract_data = self._extract_media_data
             if self.flash:
-                post_url += "/format/flash"
+                url += "/format/flash"
 
-        response = self.request(post_url, fatal=False)
+        response = self.request(url, fatal=False)
         if response.status_code >= 400:
             return {}
         page = response.text
         extr = text.extract_from(page)
         data = extract_data(extr, post_url)
 
-        data["_comment"] = extr('id="author_comments"', '</div>')
+        data["_comment"] = extr(
+            'id="author_comments"', '</div>').partition(">")[2]
         data["comment"] = text.unescape(text.remove_html(
-            data["_comment"].partition(">")[2], "", ""))
+            data["_comment"], "", ""))
         data["favorites"] = text.parse_int(extr(
             'id="faves_load">', '<').replace(",", ""))
         data["score"] = text.parse_float(extr('id="score_number">', '<'))
@@ -134,6 +135,7 @@ class NewgroundsExtractor(Extractor):
 
         data["tags"].sort()
         data["user"] = self.user or data["artist"][0]
+        data["post_url"] = post_url
         return data
 
     @staticmethod
@@ -171,6 +173,7 @@ class NewgroundsExtractor(Extractor):
     def _extract_media_data(self, extr, url):
         index = url.split("/")[5]
         title = extr('"og:title" content="', '"')
+        descr = extr('"og:description" content="', '"')
         src = extr('{"url":"', '"')
 
         if src:
@@ -209,7 +212,7 @@ class NewgroundsExtractor(Extractor):
             "title"      : text.unescape(title),
             "url"        : src,
             "date"       : date,
-            "description": text.unescape(extr(
+            "description": text.unescape(descr or extr(
                 'itemprop="description" content="', '"')),
             "rating"     : extr('class="rated-', '"'),
             "index"      : text.parse_int(index),
@@ -319,6 +322,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
                 "artist"     : ["kickinthehead", "danpaladin", "tomfulp"],
                 "comment"    : "re:My fan trailer for Alien Hominid HD!",
                 "date"       : "dt:2013-02-01 09:50:49",
+                "description": "Fan trailer for Alien Hominid HD!",
                 "favorites"  : int,
                 "filename"   : "564957_alternate_31",
                 "index"      : 595355,
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 6812f35..428f772 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2017-2021 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -399,7 +399,7 @@ class OAuthPixiv(OAuthBase):
 
         if "error" in data:
             print(data)
-            if data["error"] == "invalid_request":
+            if data["error"] in ("invalid_request", "invalid_grant"):
                 print("'code' expired, try again")
             return
 
@@ -417,6 +417,10 @@ class OAuthPixiv(OAuthBase):
 2) Login
 3) Select the last network monitor entry ('callback?state=...')
 4) Copy its 'code' query parameter, paste it below, and press Enter
+
+- This 'code' will expire 30 seconds after logging in.
+- Copy-pasting more than just the 'code' value will work as well,
+  like the entire URL or several query parameters.
 """)
         code = input("code: ")
         return code.rpartition("=")[2].strip()
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 051f1ef..35a015f 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -71,6 +71,15 @@ class PatreonExtractor(Extractor):
                 name = image.get("file_name") or self._filename(url) or url
                 yield "image", url, name
 
+    def _image_large(self, post):
+        image = post.get("image")
+        if image:
+            url = image.get("large_url")
+            if url:
+                name = image.get("file_name") or self._filename(url) or url
+                return (("image_large", url, name),)
+        return ()
+
     def _attachments(self, post):
         for attachment in post["attachments"]:
             url = self.request(
@@ -212,10 +221,11 @@ class PatreonExtractor(Extractor):
 
     def _build_file_generators(self, filetypes):
         if filetypes is None:
-            return (self._images, self._attachments,
-                    self._postfile, self._content)
+            return (self._images, self._image_large,
+                    self._attachments, self._postfile, self._content)
         genmap = {
             "images"     : self._images,
+            "image_large": self._image_large,
             "attachments": self._attachments,
             "postfile"   : self._postfile,
             "content"    : self._content,
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index bf38a77..22c9487 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -1,16 +1,15 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract images from https://seiga.nicovideo.jp/"""
+"""Extractors for https://seiga.nicovideo.jp/"""
 
 from .common import Extractor, Message
 from .. import text, util, exception
-from ..cache import cache
 
 
 class SeigaExtractor(Extractor):
@@ -25,7 +24,9 @@ class SeigaExtractor(Extractor):
         self.start_image = 0
 
     def items(self):
-        self.login()
+        if not self._check_cookies(("user_session",)):
+            raise exception.StopExtraction("'user_session' cookie required")
+
         images = iter(self.get_images())
         data = next(images)
 
@@ -45,28 +46,6 @@ class SeigaExtractor(Extractor):
             url, method="HEAD", allow_redirects=False, notfound="image")
         return response.headers["Location"].replace("/o/", "/priv/", 1)
 
-    def login(self):
-        """Login and set necessary cookies"""
-        if not self._check_cookies(("user_session",)):
-            username, password = self._get_auth_info()
-            self._update_cookies(self._login_impl(username, password))
-
-    @cache(maxage=7*24*3600, keyarg=1)
-    def _login_impl(self, username, password):
-        if not username or not password:
-            raise exception.AuthenticationError(
-                "Username and password required")
-
-        self.log.info("Logging in as %s", username)
-        url = "https://account.nicovideo.jp/api/v1/login"
-        data = {"mail_tel": username, "password": password}
-
-        self.request(url, method="POST", data=data)
-        if "user_session" not in self.session.cookies:
-            raise exception.AuthenticationError()
-        del self.session.cookies["nicosid"]
-        return self.session.cookies
-
 
 class SeigaUserExtractor(SeigaExtractor):
     """Extractor for images of a user from seiga.nicovideo.jp"""
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 2c806ad..965391c 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -22,10 +22,11 @@ class SkebExtractor(Extractor):
         Extractor.__init__(self, match)
         self.user_name = match.group(1)
         self.thumbnails = self.config("thumbnails", False)
+        self.sent_requests = self.config("sent-requests", False)
 
     def items(self):
-        for post_num in self.posts():
-            response, post = self._get_post_data(post_num)
+        for user_name, post_num in self.posts():
+            response, post = self._get_post_data(user_name, post_num)
             yield Message.Directory, post
             for data in self._get_urls_from_post(response, post):
                 url = data["file_url"]
@@ -38,24 +39,33 @@ class SkebExtractor(Extractor):
         url = "{}/api/users/{}/works".format(self.root, self.user_name)
         params = {"role": "creator", "sort": "date", "offset": 0}
         headers = {"Referer": self.root, "Authorization": "Bearer null"}
+        do_requests = self.sent_requests
 
         while True:
             posts = self.request(url, params=params, headers=headers).json()
 
             for post in posts:
                 post_num = post["path"].rpartition("/")[2]
+                user_name = post["path"].split("/")[1][1:]
                 if post["private"]:
-                    self.log.debug("Skipping %s (private)", post_num)
+                    self.log.debug("Skipping @%s/%s (private)",
+                                   user_name, post_num)
                     continue
-                yield post_num
+                yield user_name, post_num
 
             if len(posts) < 30:
-                return
+                if do_requests:
+                    params["offset"] = 0
+                    params['role'] = "client"
+                    do_requests = False
+                    continue
+                else:
+                    return
             params["offset"] += 30
 
-    def _get_post_data(self, post_num):
+    def _get_post_data(self, user_name, post_num):
         url = "{}/api/users/{}/works/{}".format(
-            self.root, self.user_name, post_num)
+            self.root, user_name, post_num)
         headers = {"Referer": self.root, "Authorization": "Bearer null"}
         resp = self.request(url, headers=headers).json()
         creator = resp["creator"]
@@ -130,7 +140,7 @@ class SkebPostExtractor(SkebExtractor):
         self.post_num = match.group(2)
 
     def posts(self):
-        return (self.post_num,)
+        return ((self.user_name, self.post_num),)
 
 
 class SkebUserExtractor(SkebExtractor):
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 91386e8..557c9fb 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2021 Mike Fährmann, Leonardo Taccari
+# Copyright 2016-2022 Mike Fährmann, Leonardo Taccari
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -8,11 +8,12 @@
 
 """Extractors for https://www.slideshare.net/"""
 
-from .common import Extractor, Message
+from .common import GalleryExtractor
 from .. import text
+import json
 
 
-class SlidesharePresentationExtractor(Extractor):
+class SlidesharePresentationExtractor(GalleryExtractor):
     """Extractor for images from a presentation on slideshare.net"""
     category = "slideshare"
     subcategory = "presentation"
@@ -24,13 +25,36 @@ class SlidesharePresentationExtractor(Extractor):
     test = (
         (("https://www.slideshare.net"
           "/Slideshare/get-started-with-slide-share"), {
-            "url": "23685fb9b94b32c77a547d45dc3a82fe7579ea18",
-            "content": "2e90a01c6ca225579ebf8f98ab46f97a28a5e45c",
+            "pattern": r"https://image\.slidesharecdn\.com/getstartedwithslide"
+                       r"share-150520173821-lva1-app6892/95/get-started-with-s"
+                       r"lide-share-\d+-1024\.jpg\?cb=\d+",
+            "count": 19,
+            "content": "2b6a191eab60b3978fdacfecf2da302dd45bc108",
+            "keyword": {
+                "comments": "0",
+                "description": "Get Started with SlideShare - "
+                               "A Beginngers Guide for Creators",
+                "likes": r"re:\d{3,}",
+                "presentation": "get-started-with-slide-share",
+                "published": "dt:2015-05-20 00:00:00",
+                "title": "Getting Started With SlideShare",
+                "user": "Slideshare",
+                "views": r"re:\d{7,}",
+            },
         }),
-        # long title
+        # long title and description
         (("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren"
           "-mitarbeitenden-ndern-sollten-sondern-ihr-managementsystem"), {
             "url": "cf70ca99f57f61affab47ebf8583eb564b21e3a7",
+            "keyword": {
+                "title": "Warum Sie nicht Ihren Mitarbeitenden ändern "
+                         "sollten, sondern Ihr Managementsystem",
+                "description": "Mitarbeitende verhalten sich mehrheitlich so, "
+                               "wie das System es ihnen vorgibt. Welche Voraus"
+                               "setzungen es braucht, damit Ihre Mitarbeitende"
+                               "n ihr ganzes Herzblut einsetzen, bespricht Fre"
+                               "di Schmidli in diesem Referat.",
+            },
         }),
         # mobile URL
         (("https://www.slideshare.net"
@@ -40,48 +64,50 @@ class SlidesharePresentationExtractor(Extractor):
     )
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
         self.user, self.presentation = match.groups()
+        url = "https://www.slideshare.net/{}/{}".format(
+            self.user, self.presentation)
+        GalleryExtractor.__init__(self, match, url)
 
-    def items(self):
-        page = self.request("https://www.slideshare.net/" + self.user +
-                            "/" + self.presentation).text
-        data = self.get_job_metadata(page)
-        imgs = self.get_image_urls(page)
-        data["count"] = len(imgs)
-        yield Message.Directory, data
-        for data["num"], url in enumerate(imgs, 1):
-            yield Message.Url, url, text.nameext_from_url(url, data)
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        descr = extr('<meta name="description" content="', '"')
+        title = extr('<span class="j-title-breadcrumb">', '</span>')
+        published = extr('<div class="metadata-item">', '</div>')
+        comments = extr('content="UserComments:', '"')
+        likes = extr('content="UserLikes:', '"')
+        views = extr('content="UserPageVisits:', '"')
 
-    def get_job_metadata(self, page):
-        """Collect metadata for extractor-job"""
-        descr, pos = text.extract(
-            page, '<meta name="description" content="', '"')
-        category, pos = text.extract(
-            page, '<div class="metadata-item">', '</div>', pos)
-        views, pos = text.extract(
-            page, '<div class="metadata-item">', '</div>', pos)
-        published, pos = text.extract(
-            page, '<div class="metadata-item">', '</div>', pos)
-        title, pos = text.extract(
-            page, '<span class="j-title-breadcrumb">', '</span>', pos)
-        alt_descr, pos = text.extract(
-            page, '<p class="slideshow-description notranslate">', '</p>', pos)
-
-        if descr.endswith("…") and alt_descr:
-            descr = text.remove_html(alt_descr).strip()
+        if descr.endswith("…"):
+            alt_descr = extr(
+                'id="slideshow-description-text" class="notranslate">', '</p>')
+            if alt_descr:
+                descr = text.remove_html(alt_descr).strip()
 
         return {
             "user": self.user,
             "presentation": self.presentation,
             "title": text.unescape(title.strip()),
             "description": text.unescape(descr),
-            "views": text.parse_int(views.rpartition(
-                " views")[0].replace(",", "")),
-            "published": published.strip(),
+            "views": views,
+            "likes": likes,
+            "comments": comments,
+            "published": text.parse_datetime(
+                published.strip(), "%b. %d, %Y"),
         }
 
     @staticmethod
-    def get_image_urls(page):
-        """Extract and return a list of all image-urls"""
-        return list(text.extract_iter(page, 'data-full="', '"'))
+    def images(page):
+        data = json.loads(text.extract(
+            page, "xtend(true, slideshare_object.slideshow_config, ", ");")[0])
+
+        # useing 'stripped_title' here is technically wrong, but it works all
+        # the same, slideshare doesn't seem to care what characters go there
+        begin = "https://image.slidesharecdn.com/{}/95/{}-".format(
+            data["ppt_location"], data["stripped_title"])
+        end = "-1024.jpg?cb=" + str(data["timestamp"])
+
+        return [
+            (begin + str(n) + end, None)
+            for n in range(1, data["slide_count"]+1)
+        ]
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 69e3854..b57013a 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -105,7 +105,7 @@ class SubscribestarExtractor(Extractor):
                         att, 'data-upload-id="', '"')[0]),
                     "name": text.unescape(text.extract(
                         att, 'doc_preview-title">', '<')[0] or ""),
-                    "url" : text.extract(att, 'href="', '"')[0],
+                    "url" : text.unescape(text.extract(att, 'href="', '"')[0]),
                     "type": "attachment",
                 })
 
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
new file mode 100644
index 0000000..c6be38d
--- /dev/null
+++ b/gallery_dl/extractor/toyhouse.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://toyhou.se/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?toyhou\.se"
+
+
+class ToyhouseExtractor(Extractor):
+    """Base class for toyhouse extractors"""
+    category = "toyhouse"
+    root = "https://toyhou.se"
+    directory_fmt = ("{category}", "{user|artists!S}")
+    archive_fmt = "{id}"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.user = match.group(1)
+        self.offset = 0
+
+    def items(self):
+        metadata = self.metadata()
+
+        for post in util.advance(self.posts(), self.offset):
+            if metadata:
+                post.update(metadata)
+            text.nameext_from_url(post["url"], post)
+            post["id"], _, post["hash"] = post["filename"].partition("_")
+            yield Message.Directory, post
+            yield Message.Url, post["url"], post
+
+    def posts(self):
+        return ()
+
+    def metadata(self):
+        return None
+
+    def skip(self, num):
+        self.offset += num
+        return num
+
+    def _parse_post(self, post, needle='<a href="'):
+        extr = text.extract_from(post)
+        return {
+            "url": extr(needle, '"'),
+            "date": text.parse_datetime(extr(
+                'Credits\n</h2>\n<div class="mb-1">', '<'),
+                "%d %b %Y, %I:%M:%S %p"),
+            "artists": [
+                text.remove_html(artist)
+                for artist in extr(
+                    '<div class="artist-credit">', '</div>\n</div>').split(
+                    '<div class="artist-credit">')
+            ],
+            "characters": text.split_html(extr(
+                '<div class="image-characters', '</div>\n</div>'))[2:],
+        }
+
+    def _pagination(self, path):
+        url = self.root + path
+        params = {"page": 1}
+
+        while True:
+            page = self.request(url, params=params).text
+
+            cnt = 0
+            for post in text.extract_iter(
+                    page, '<li class="gallery-item">', '</li>'):
+                cnt += 1
+                yield self._parse_post(post)
+
+            if cnt == 0 and params["page"] == 1:
+                token, pos = text.extract(
+                    page, '<input name="_token" type="hidden" value="', '"')
+                if not token:
+                    return
+                data = {
+                    "_token": token,
+                    "user"  : text.extract(page, 'value="', '"', pos)[0],
+                }
+                self.request(self.root + "/~account/warnings/accept",
+                             method="POST", data=data, allow_redirects=False)
+                continue
+
+            if cnt < 18:
+                return
+            params["page"] += 1
+
+
+class ToyhouseArtExtractor(ToyhouseExtractor):
+    """Extractor for artworks of a toyhouse user"""
+    subcategory = "art"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/art"
+
+    test = (
+        ("https://www.toyhou.se/d-floe/art", {
+            "range": "1-30",
+            "count": 30,
+            "pattern": r"https://f\d+\.toyhou\.se/file/f\d+-toyhou-se"
+                       r"/images/\d+_\w+\.\w+$",
+            "keyword": {
+                "artists": list,
+                "characters": list,
+                "date": "type:datetime",
+                "hash": r"re:\w+",
+                "id": r"re:\d+",
+                "url": str,
+                "user": "d-floe",
+            },
+        }),
+        # protected by Content Warning
+        ("https://www.toyhou.se/kroksoc/art", {
+            "count": ">= 19",
+        }),
+    )
+
+    def posts(self):
+        return self._pagination("/{}/art".format(self.user))
+
+    def metadata(self):
+        return {"user": self.user}
+
+
+class ToyhouseImageExtractor(ToyhouseExtractor):
+    """Extractor for individual toyhouse images"""
+    subcategory = "image"
+    pattern = (r"(?:https?://)?(?:"
+               r"(?:www\.)?toyhou\.se/~images|"
+               r"f\d+\.toyhou\.se/file/[^/?#]+/(?:image|watermark)s"
+               r")/(\d+)")
+    test = (
+        ("https://toyhou.se/~images/40587320", {
+            "content": "058ec8427977ab432c4cc5be5a6dd39ce18713ef",
+            "keyword": {
+                "artists": ["d-floe"],
+                "characters": ["Sumi"],
+                "date": "dt:2021-10-08 01:32:47",
+                "extension": "png",
+                "filename": "40587320_TT1NaBUr3FLkS1p",
+                "hash": "TT1NaBUr3FLkS1p",
+                "id": "40587320",
+                "url": "https://f2.toyhou.se/file/f2-toyhou-se/images"
+                       "/40587320_TT1NaBUr3FLkS1p.png",
+            },
+        }),
+        # direct link, multiple artists
+        (("https://f2.toyhou.se/file/f2-toyhou-se"
+          "/watermarks/36817425_bqhGcwcnU.png?1625561467"), {
+            "keyword": {
+                "artists": [
+                    "http://aminoapps.com/p/92sf3z",
+                    "kroksoc (Color)"],
+                "characters": ["❀Reiichi❀"],
+                "date": "dt:2021-07-03 20:02:02",
+                "hash": "bqhGcwcnU",
+                "id": "36817425",
+            },
+        }),
+        ("https://f2.toyhou.se/file/f2-toyhou-se"
+         "/images/40587320_TT1NaBUr3FLkS1p.png"),
+    )
+
+    def posts(self):
+        url = "{}/~images/{}".format(self.root, self.user)
+        return (self._parse_post(self.request(url).text, '<img src="'),)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 358bc95..fbe641d 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -400,6 +400,15 @@ class TumblrAPI(oauth.OAuth1API):
                 t = (datetime.now() + timedelta(seconds=float(reset))).time()
 
                 self.log.error("Daily API rate limit exceeded")
+
+                api_key = self.api_key or self.session.auth.consumer_key
+                if api_key == self.API_KEY:
+                    self.log.info("Register your own OAuth application and "
+                                  "use its credentials to prevent this error: "
+                                  "https://github.com/mikf/gallery-dl/blob/mas"
+                                  "ter/docs/configuration.rst#extractortumblra"
+                                  "pi-key--api-secret")
+
                 raise exception.StopExtraction(
                     "Aborting - Rate limit will reset at %s",
                     "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second))
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
new file mode 100644
index 0000000..ec8ab35
--- /dev/null
+++ b/gallery_dl/extractor/twibooru.py
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://twibooru.org/"""
+
+from .booru import BooruExtractor
+from .. import text, exception
+import operator
+
+BASE_PATTERN = r"(?:https?://)?twibooru\.org"
+
+
+class TwibooruExtractor(BooruExtractor):
+    """Base class for twibooru extractors"""
+    category = "twibooru"
+    basecategory = "philomena"
+    filename_fmt = "{id}_{filename}.{extension}"
+    archive_fmt = "{id}"
+    request_interval = 6.05
+    per_page = 50
+    root = "https://twibooru.org"
+
+    def __init__(self, match):
+        BooruExtractor.__init__(self, match)
+        self.api = TwibooruAPI(self)
+
+    _file_url = operator.itemgetter("view_url")
+
+    @staticmethod
+    def _prepare(post):
+        post["date"] = text.parse_datetime(
+            post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+        name, sep, rest = post["name"].rpartition(".")
+        post["filename"] = name if sep else rest
+
+
+class TwibooruPostExtractor(TwibooruExtractor):
+    """Extractor for single twibooru posts"""
+    subcategory = "post"
+    request_interval = 1.0
+    pattern = BASE_PATTERN + r"/(\d+)"
+    test = ("https://twibooru.org/1", {
+        "pattern": r"https://cdn.twibooru.org/img/2020/7/8/1/full.png",
+        "content": "aac4d1dba611883ac701aaa8f0b2b322590517ae",
+        "keyword": {
+            "animated": False,
+            "aspect_ratio": 1.0,
+            "comment_count": int,
+            "created_at": "2020-07-08T22:26:55.743Z",
+            "date": "dt:2020-07-08 22:26:55",
+            "description": "Why have I done this?",
+            "downvotes": 0,
+            "duration": 0.0,
+            "faves": int,
+            "first_seen_at": "2020-07-08T22:26:55.743Z",
+            "format": "png",
+            "height": 576,
+            "hidden_from_users": False,
+            "id": 1,
+            "intensities": dict,
+            "locations": [],
+            "media_type": "image",
+            "mime_type": "image/png",
+            "name": "1676547__safe_artist-colon-scraggleman_oc_oc-colon-"
+                    "floor+bored_oc+only_bags+under+eyes_bust_earth+pony_"
+                    "female_goggles_helmet_mare_meme_neet_neet+home+g.png",
+            "orig_sha512_hash": "re:8b4c00d2[0-9a-f]{120}",
+            "processed": True,
+            "representations": dict,
+            "score": int,
+            "sha512_hash": "8b4c00d2eff52d51ad9647e14738944ab306fd1d8e1bf6"
+                           "34fbb181b32f44070aa588938e26c4eb072b1eb61489aa"
+                           "f3062fb644a76c79f936b97723a2c3e0e5d3",
+            "size": 70910,
+            "source_url": "",
+            "tag_ids": list,
+            "tags": list,
+            "thumbnails_generated": True,
+            "updated_at": "2022-02-03T15:49:07.110Z",
+            "upvotes": int,
+            "view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png",
+            "width": 576,
+            "wilson_score": float,
+        },
+    })
+
+    def __init__(self, match):
+        TwibooruExtractor.__init__(self, match)
+        self.post_id = match.group(1)
+
+    def posts(self):
+        return (self.api.post(self.post_id),)
+
+
+class TwibooruSearchExtractor(TwibooruExtractor):
+    """Extractor for twibooru search results"""
+    subcategory = "search"
+    directory_fmt = ("{category}", "{search_tags}")
+    pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
+    test = (
+        ("https://twibooru.org/search?q=cute", {
+            "range": "40-60",
+            "count": 21,
+        }),
+        ("https://twibooru.org/tags/cute", {
+            "range": "1-20",
+            "count": 20,
+        }),
+    )
+
+    def __init__(self, match):
+        TwibooruExtractor.__init__(self, match)
+        query, tag = match.groups()
+        if tag:
+            q = tag.replace("+", " ")
+            for old, new in (
+                ("-colon-"  , ":"),
+                ("-dash-"   , "-"),
+                ("-dot-"    , "."),
+                ("-plus-"   , "+"),
+                ("-fwslash-", "/"),
+                ("-bwslash-", "\\"),
+            ):
+                if old in q:
+                    q = q.replace(old, new)
+            self.params = {"q": text.unquote(text.unquote(q))}
+        else:
+            self.params = text.parse_query(query)
+
+    def metadata(self):
+        return {"search_tags": self.params.get("q", "")}
+
+    def posts(self):
+        return self.api.search(self.params)
+
+
+class TwibooruGalleryExtractor(TwibooruExtractor):
+    """Extractor for twibooru galleries"""
+    subcategory = "gallery"
+    directory_fmt = ("{category}", "galleries",
+                     "{gallery[id]} {gallery[title]}")
+    pattern = BASE_PATTERN + r"/galleries/(\d+)"
+    test = ("https://twibooru.org/galleries/1", {
+        "range": "1-20",
+        "keyword": {
+            "gallery": {
+                "description": "Best nation pone and "
+                               "russian related pics.",
+                "id": 1,
+                "spoiler_warning": "Russia",
+                "thumbnail_id": 694923,
+                "title": "Marussiaverse",
+            },
+        },
+    })
+
+    def __init__(self, match):
+        TwibooruExtractor.__init__(self, match)
+        self.gallery_id = match.group(1)
+
+    def metadata(self):
+        return {"gallery": self.api.gallery(self.gallery_id)}
+
+    def posts(self):
+        gallery_id = "gallery_id:" + self.gallery_id
+        params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id}
+        return self.api.search(params)
+
+
+class TwibooruAPI():
+    """Interface for the Twibooru API
+
+    https://twibooru.org/pages/api
+    """
+
+    def __init__(self, extractor):
+        self.extractor = extractor
+        self.root = "https://twibooru.org/api"
+
+    def gallery(self, gallery_id):
+        endpoint = "/v3/galleries/" + gallery_id
+        return self._call(endpoint)["gallery"]
+
+    def post(self, post_id):
+        endpoint = "/v3/posts/" + post_id
+        return self._call(endpoint)["post"]
+
+    def search(self, params):
+        endpoint = "/v3/search/posts"
+        return self._pagination(endpoint, params)
+
+    def _call(self, endpoint, params=None):
+        url = self.root + endpoint
+
+        while True:
+            response = self.extractor.request(url, params=params, fatal=None)
+
+            if response.status_code < 400:
+                return response.json()
+
+            if response.status_code == 429:
+                until = text.parse_datetime(
+                    response.headers["X-RL-Reset"], "%Y-%m-%d %H:%M:%S %Z")
+                # wait an extra minute, just to be safe
+                self.extractor.wait(until=until, adjust=60.0)
+                continue
+
+            # error
+            self.extractor.log.debug(response.content)
+            raise exception.StopExtraction(
+                "%s %s", response.status_code, response.reason)
+
+    def _pagination(self, endpoint, params):
+        extr = self.extractor
+
+        api_key = extr.config("api-key")
+        if api_key:
+            params["key"] = api_key
+
+        filter_id = extr.config("filter")
+        if filter_id:
+            params["filter_id"] = filter_id
+        elif not api_key:
+            params["filter_id"] = "2"
+
+        params["page"] = 1
+        params["per_page"] = per_page = extr.per_page
+
+        while True:
+            data = self._call(endpoint, params)
+            yield from data["posts"]
+
+            if len(data["posts"]) < per_page:
+                return
+            params["page"] += 1
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 46b06c2..6d51834 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -649,6 +649,10 @@ class TwitterTweetExtractor(TwitterExtractor):
         ("https://twitter.com/i/web/status/1460044411165888515", {
             "count": 0,
         }),
+        # "Misleading" content
+        ("https://twitter.com/i/web/status/1486373748911575046", {
+            "count": 4,
+        }),
     )
 
     def __init__(self, match):
@@ -765,7 +769,7 @@ class TwitterAPI():
             "__fs_dont_mention_me_view_api_enabled": False,
         }
 
-        self._log_warnings = extractor.config("warnings")
+        self._nsfw_warning = True
         self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
         self._user = None
 
@@ -789,7 +793,7 @@ class TwitterAPI():
             self.headers["x-guest-token"] = guest_token
 
     def tweet_detail(self, tweet_id):
-        endpoint = "/graphql/aD0-HB47XIOxiBl5kTkX5Q/TweetDetail"
+        endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
         variables = {
             "focalTweetId": tweet_id,
             "with_rux_injections": False,
@@ -801,7 +805,7 @@ class TwitterAPI():
             endpoint, variables, ("threaded_conversation_with_injections",))
 
     def user_tweets(self, screen_name):
-        endpoint = "/graphql/LNhjy8t3XpIrBYM-ms7sPQ/UserTweets"
+        endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
@@ -810,7 +814,7 @@ class TwitterAPI():
         return self._pagination_tweets(endpoint, variables)
 
     def user_tweets_and_replies(self, screen_name):
-        endpoint = "/graphql/Vg5aF036K40ST3FWvnvRGA/UserTweetsAndReplies"
+        endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
@@ -819,7 +823,7 @@ class TwitterAPI():
         return self._pagination_tweets(endpoint, variables)
 
     def user_media(self, screen_name):
-        endpoint = "/graphql/Hl6C7ac051l_QBe3HjGz_A/UserMedia"
+        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
@@ -827,7 +831,7 @@ class TwitterAPI():
         return self._pagination_tweets(endpoint, variables)
 
     def user_likes(self, screen_name):
-        endpoint = "/graphql/smISlRVSnz-GaU_XpU_akw/Likes"
+        endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
@@ -835,7 +839,7 @@ class TwitterAPI():
         return self._pagination_tweets(endpoint, variables)
 
     def user_bookmarks(self):
-        endpoint = "/graphql/yKNebSjZKbo2tOd-Qdc7Xg/Bookmarks"
+        endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks"
         variables = {
             "count": 100,
         }
@@ -843,7 +847,7 @@ class TwitterAPI():
             endpoint, variables, ("bookmark_timeline", "timeline"))
 
     def list_latest_tweets_timeline(self, list_id):
-        endpoint = "/graphql/RxUL5UHi4Msxt_P9O1729w/ListLatestTweetsTimeline"
+        endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
         variables = {
             "listId": list_id,
             "count": 100,
@@ -889,7 +893,7 @@ class TwitterAPI():
             raise exception.NotFoundError("list")
 
     def list_members(self, list_id):
-        endpoint = "/graphql/kk9RQtSa2sc-4_9figZVBw/ListMembers"
+        endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers"
         variables = {
             "listId": list_id,
             "count": 100,
@@ -899,7 +903,7 @@ class TwitterAPI():
             endpoint, variables, ("list", "members_timeline", "timeline"))
 
     def user_following(self, screen_name):
-        endpoint = "/graphql/kz464_e4MAOXc3bGOA9kow/Following"
+        endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
@@ -961,20 +965,9 @@ class TwitterAPI():
             if csrf_token:
                 self.headers["x-csrf-token"] = csrf_token
 
-            data = response.json()
-            if "errors" in data:
-                try:
-                    errors = ", ".join(e["message"] for e in data["errors"])
-                except Exception:
-                    errors = data["errors"]
-            else:
-                errors = ""
-
             if response.status_code < 400:
                 # success
-                if errors and self._log_warnings:
-                    self.extractor.log.warning(errors)
-                return data
+                return response.json()
 
             if response.status_code == 429:
                 # rate limit exceeded
@@ -984,6 +977,14 @@ class TwitterAPI():
                 continue
 
             # error
+            try:
+                data = response.json()
+                errors = ", ".join(e["message"] for e in data["errors"])
+            except ValueError:
+                errors = response.text
+            except Exception:
+                errors = data.get("errors", "")
+
             raise exception.StopExtraction(
                 "%s %s (%s)", response.status_code, response.reason, errors)
 
@@ -1151,6 +1152,10 @@ class TwitterAPI():
                     tweets.extend(entry["content"]["items"])
                 elif esw("conversationthread-"):
                     tweets.extend(entry["content"]["items"])
+                elif esw("tombstone-"):
+                    self._report_tombstone(
+                        entry,
+                        entry["content"]["itemContent"]["tombstoneInfo"])
                 elif esw("cursor-bottom-"):
                     cursor = entry["content"]
                     if not cursor.get("stopOnEmptyResponse", True):
@@ -1162,6 +1167,11 @@ class TwitterAPI():
                 try:
                     tweet = ((entry.get("content") or entry["item"])
                              ["itemContent"]["tweet_results"]["result"])
+                    if "tombstone" in tweet:
+                        self._report_tombstone(entry, tweet["tombstone"])
+                        continue
+                    if "tweet" in tweet:
+                        tweet = tweet["tweet"]
                     legacy = tweet["legacy"]
                 except KeyError:
                     extr.log.debug(
@@ -1248,3 +1258,11 @@ class TwitterAPI():
             if stop or not cursor or not entry:
                 return
             variables["cursor"] = cursor
+
+    def _report_tombstone(self, entry, tombstone):
+        text = (tombstone.get("richText") or tombstone["text"])["text"]
+        if text.startswith("Age-restricted") and self._nsfw_warning:
+            self.extractor.log.warning(text)
+            self._nsfw_warning = False
+        self.extractor.log.debug(
+            "Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text)
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index 8f3ef9a..b3a1652 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -64,6 +64,9 @@ class YoutubeDLExtractor(Extractor):
             "nocheckcertificate"     : not self._verify,
         }
 
+        if self._proxies:
+            user_opts["proxy"] = self._proxies.get("http")
+
         username, password = self._get_auth_info()
         if username:
             user_opts["username"], user_opts["password"] = username, password
author	Unit 193 <unit193@unit193.net>	2022-03-15 00:19:57 -0400
committer	Unit 193 <unit193@unit193.net>	2022-03-15 00:19:57 -0400
commit	c2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (patch)
tree	a14107397b5bcb491aa4f4fb3e0feb4582e1879b /gallery_dl/extractor
parent	7900ee4e3692dbd8056c3e47c81bb22eda030b65 (diff)