From a5aecc343fd2886e7ae09bb3e2afeec38f175755 Mon Sep 17 00:00:00 2001
From: Unit 193 <unit193@unit193.net>
Date: Wed, 1 Dec 2021 14:44:00 -0500
Subject: New upstream version 1.19.3.

---
 gallery_dl/downloader/ytdl.py         |  87 +++---
 gallery_dl/extractor/__init__.py      |   1 -
 gallery_dl/extractor/dynastyscans.py  |  25 +-
 gallery_dl/extractor/exhentai.py      |   8 +-
 gallery_dl/extractor/foolfuuka.py     |   6 +-
 gallery_dl/extractor/gelbooru_v02.py  |  15 +-
 gallery_dl/extractor/instagram.py     |  20 +-
 gallery_dl/extractor/kemonoparty.py   | 125 +++++++--
 gallery_dl/extractor/mangadex.py      |  42 ++-
 gallery_dl/extractor/mangoxo.py       |  12 +-
 gallery_dl/extractor/philomena.py     |  12 +
 gallery_dl/extractor/reactor.py       | 228 +++++++--------
 gallery_dl/extractor/seisoparty.py    | 201 -------------
 gallery_dl/extractor/shopify.py       |   6 +
 gallery_dl/extractor/skeb.py          |   3 +-
 gallery_dl/extractor/subscribestar.py |  14 +-
 gallery_dl/extractor/twitter.py       |  37 ++-
 gallery_dl/extractor/webtoons.py      |   5 +-
 gallery_dl/extractor/xvideos.py       |   4 +-
 gallery_dl/extractor/ytdl.py          |  79 +++---
 gallery_dl/formatter.py               |  12 +
 gallery_dl/job.py                     |  69 ++---
 gallery_dl/util.py                    |  70 +++++
 gallery_dl/version.py                 |   2 +-
 gallery_dl/ytdl.py                    | 513 ++++++++++++++++++++++++++++++++++
 25 files changed, 1024 insertions(+), 572 deletions(-)
 delete mode 100644 gallery_dl/extractor/seisoparty.py
 create mode 100644 gallery_dl/ytdl.py

(limited to 'gallery_dl')

diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index f4d3e05..8416ca0 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -9,7 +9,7 @@
 """Downloader module for URLs requiring youtube-dl support"""
 
 from .common import DownloaderBase
-from .. import text
+from .. import ytdl, text
 import os
 
 
@@ -17,70 +17,53 @@ class YoutubeDLDownloader(DownloaderBase):
     scheme = "ytdl"
 
     def __init__(self, job):
-        module = __import__(self.config("module") or "youtube_dl")
-
         DownloaderBase.__init__(self, job)
-        extractor = job.extractor
 
+        extractor = job.extractor
         retries = self.config("retries", extractor._retries)
-        options = {
-            "format": self.config("format") or None,
-            "ratelimit": text.parse_bytes(self.config("rate"), None),
+        self.ytdl_opts = {
             "retries": retries+1 if retries >= 0 else float("inf"),
             "socket_timeout": self.config("timeout", extractor._timeout),
             "nocheckcertificate": not self.config("verify", extractor._verify),
-            "nopart": not self.part,
-            "updatetime": self.config("mtime", True),
-            "proxy": extractor.session.proxies.get("http"),
-            "min_filesize": text.parse_bytes(
-                self.config("filesize-min"), None),
-            "max_filesize": text.parse_bytes(
-                self.config("filesize-max"), None),
         }
 
-        raw_options = self.config("raw-options")
-        if raw_options:
-            options.update(raw_options)
-
-        self.progress = self.config("progress", 3.0)
-        if self.progress is not None:
-            options["progress_hooks"] = (self._progress_hook,)
-
-        if self.config("logging", True):
-            options["logger"] = self.log
+        self.ytdl_instance = None
         self.forward_cookies = self.config("forward-cookies", False)
-
+        self.progress = self.config("progress", 3.0)
         self.outtmpl = self.config("outtmpl")
-        if self.outtmpl == "default":
-            self.outtmpl = module.DEFAULT_OUTTMPL
-
-        self.ytdl = module.YoutubeDL(options)
 
     def download(self, url, pathfmt):
         kwdict = pathfmt.kwdict
 
-        ytdl = kwdict.pop("_ytdl_instance", None)
-        if ytdl:
-            if self.progress is not None and not ytdl._progress_hooks:
-                ytdl.add_progress_hook(self._progress_hook)
-        else:
-            ytdl = self.ytdl
+        ytdl_instance = kwdict.pop("_ytdl_instance", None)
+        if not ytdl_instance:
+            ytdl_instance = self.ytdl_instance
+            if not ytdl_instance:
+                module = __import__(self.config("module") or "youtube_dl")
+                self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL(
+                    module, self, self.ytdl_opts)
+                if self.outtmpl == "default":
+                    self.outtmpl = module.DEFAULT_OUTTMPL
             if self.forward_cookies:
-                set_cookie = ytdl.cookiejar.set_cookie
+                set_cookie = ytdl_instance.cookiejar.set_cookie
                 for cookie in self.session.cookies:
                     set_cookie(cookie)
 
+        if self.progress is not None and not ytdl_instance._progress_hooks:
+            ytdl_instance.add_progress_hook(self._progress_hook)
+
         info_dict = kwdict.pop("_ytdl_info_dict", None)
         if not info_dict:
             try:
-                info_dict = ytdl.extract_info(url[5:], download=False)
+                info_dict = ytdl_instance.extract_info(url[5:], download=False)
             except Exception:
                 return False
 
         if "entries" in info_dict:
             index = kwdict.get("_ytdl_index")
             if index is None:
-                return self._download_playlist(ytdl, pathfmt, info_dict)
+                return self._download_playlist(
+                    ytdl_instance, pathfmt, info_dict)
             else:
                 info_dict = info_dict["entries"][index]
 
@@ -88,9 +71,9 @@ class YoutubeDLDownloader(DownloaderBase):
         if extra:
             info_dict.update(extra)
 
-        return self._download_video(ytdl, pathfmt, info_dict)
+        return self._download_video(ytdl_instance, pathfmt, info_dict)
 
-    def _download_video(self, ytdl, pathfmt, info_dict):
+    def _download_video(self, ytdl_instance, pathfmt, info_dict):
         if "url" in info_dict:
             text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
 
@@ -99,8 +82,9 @@ class YoutubeDLDownloader(DownloaderBase):
             info_dict["ext"] = "mkv"
 
         if self.outtmpl:
-            self._set_outtmpl(ytdl, self.outtmpl)
-            pathfmt.filename = filename = ytdl.prepare_filename(info_dict)
+            self._set_outtmpl(ytdl_instance, self.outtmpl)
+            pathfmt.filename = filename = \
+                ytdl_instance.prepare_filename(info_dict)
             pathfmt.extension = info_dict["ext"]
             pathfmt.path = pathfmt.directory + filename
             pathfmt.realpath = pathfmt.temppath = (
@@ -115,40 +99,41 @@ class YoutubeDLDownloader(DownloaderBase):
             pathfmt.temppath = os.path.join(
                 self.partdir, pathfmt.filename)
 
-        self._set_outtmpl(ytdl, pathfmt.temppath.replace("%", "%%"))
+        self._set_outtmpl(ytdl_instance, pathfmt.temppath.replace("%", "%%"))
 
         self.out.start(pathfmt.path)
         try:
-            ytdl.process_info(info_dict)
+            ytdl_instance.process_info(info_dict)
         except Exception:
             self.log.debug("Traceback", exc_info=True)
             return False
         return True
 
-    def _download_playlist(self, ytdl, pathfmt, info_dict):
+    def _download_playlist(self, ytdl_instance, pathfmt, info_dict):
         pathfmt.set_extension("%(playlist_index)s.%(ext)s")
-        self._set_outtmpl(ytdl, pathfmt.realpath)
+        self._set_outtmpl(ytdl_instance, pathfmt.realpath)
 
         for entry in info_dict["entries"]:
-            ytdl.process_info(entry)
+            ytdl_instance.process_info(entry)
         return True
 
     def _progress_hook(self, info):
         if info["status"] == "downloading" and \
                 info["elapsed"] >= self.progress:
             total = info.get("total_bytes") or info.get("total_bytes_estimate")
+            speed = info.get("speed")
             self.out.progress(
                 None if total is None else int(total),
                 info["downloaded_bytes"],
-                int(info["speed"]),
+                int(speed) if speed else 0,
             )
 
     @staticmethod
-    def _set_outtmpl(ytdl, outtmpl):
+    def _set_outtmpl(ytdl_instance, outtmpl):
         try:
-            ytdl.outtmpl_dict["default"] = outtmpl
+            ytdl_instance.outtmpl_dict["default"] = outtmpl
         except AttributeError:
-            ytdl.params["outtmpl"] = outtmpl
+            ytdl_instance.params["outtmpl"] = outtmpl
 
 
 def compatible_formats(formats):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 79fe971..dd9da01 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -111,7 +111,6 @@ modules = [
     "sankaku",
     "sankakucomplex",
     "seiga",
-    "seisoparty",
     "senmanga",
     "sexcom",
     "simplyhentai",
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 4541d25..ab1044f 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -8,7 +8,7 @@
 
 """Extractors for https://dynasty-scans.com/"""
 
-from .common import ChapterExtractor, Extractor, Message
+from .common import ChapterExtractor, MangaExtractor, Extractor, Message
 from .. import text
 import json
 import re
@@ -48,12 +48,12 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
         (("http://dynasty-scans.com/chapters/"
           "hitoribocchi_no_oo_seikatsu_ch33"), {
             "url": "dce64e8c504118f1ab4135c00245ea12413896cb",
-            "keyword": "1564965671ac69bb7fbc340538397f6bd0aa269b",
+            "keyword": "b67599703c27316a2fe4f11c3232130a1904e032",
         }),
         (("http://dynasty-scans.com/chapters/"
           "new_game_the_spinoff_special_13"), {
             "url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
-            "keyword": "22b35029bc65d6d95db2e2c147b0a37f2d290f29",
+            "keyword": "6b674eb3a274999153f6be044973b195008ced2f",
         }),
     )
 
@@ -76,7 +76,8 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
             "author"  : text.remove_html(author),
             "group"   : (text.remove_html(group) or
                          text.extract(group, ' alt="', '"')[0] or ""),
-            "date"    : extr('"icon-calendar"></i> ', '<'),
+            "date"    : text.parse_datetime(extr(
+                '"icon-calendar"></i> ', '<'), "%b %d, %Y"),
             "lang"    : "en",
             "language": "English",
         }
@@ -89,6 +90,22 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
         ]
 
 
+class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
+    chapterclass = DynastyscansChapterExtractor
+    reverse = False
+    pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
+    test = ("https://dynasty-scans.com/series/hitoribocchi_no_oo_seikatsu", {
+        "pattern": DynastyscansChapterExtractor.pattern,
+        "count": ">= 100",
+    })
+
+    def chapters(self, page):
+        return [
+            (self.root + path, {})
+            for path in text.extract_iter(page, '<dd>\n<a href="', '"')
+        ]
+
+
 class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
     """Extrator for image search results on dynasty-scans.com"""
     subcategory = "search"
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index aabfe6b..7ffb214 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
                 "date": "dt:2018-03-18 20:15:00",
                 "eh_category": "Non-H",
                 "expunged": False,
-                "favorites": "18",
+                "favorites": "19",
                 "filecount": "4",
                 "filesize": 1488978,
                 "gid": 1200119,
@@ -239,7 +239,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             "title_jpn"    : text.unescape(extr('<h1 id="gj">', '</h1>')),
             "_"            : extr('<div id="gdc"><div class="cs ct', '"'),
             "eh_category"  : extr('>', '<'),
-            "uploader"     : text.unquote(extr('/uploader/', '"')),
+            "uploader"     : extr('<div id="gdn">', '</div>'),
             "date"         : text.parse_datetime(extr(
                 '>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
             "parent"       : extr(
@@ -255,6 +255,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             "torrentcount" : extr('>Torrent Download (', ')'),
         }
 
+        if data["uploader"].startswith("<"):
+            data["uploader"] = text.unescape(text.extract(
+                data["uploader"], ">", "<")[0])
+
         f = data["favorites"][0]
         if f == "N":
             data["favorites"] = "0"
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index d2c5e8f..6ddd689 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -122,7 +122,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
             "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
         }),
         ("https://desuarchive.org/a/thread/159542679/", {
-            "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
+            "url": "2bddbe03b01b4630337f6916f6df36d1d443b7b8",
         }),
         ("https://boards.fireden.net/sci/thread/11264294/", {
             "url": "61cab625c95584a12a30049d054931d64f8d20aa",
@@ -131,10 +131,10 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
             "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
         }),
         ("https://rbt.asia/g/thread/61487650/", {
-            "url": "61896d9d9a2edb556b619000a308a984307b6d30",
+            "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5",
         }),
         ("https://archive.rebeccablacktech.com/g/thread/61487650/", {
-            "url": "61896d9d9a2edb556b619000a308a984307b6d30",
+            "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5",
         }),
         ("https://thebarchive.com/b/thread/739772332/", {
             "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index e09e190..a42a202 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -27,8 +27,21 @@ class GelbooruV02Extractor(booru.BooruExtractor):
         params["pid"] = self.page_start
         params["limit"] = self.per_page
 
+        post = None
         while True:
-            root = self._api_request(params)
+            try:
+                root = self._api_request(params)
+            except ElementTree.ParseError:
+                if "tags" not in params or post is None:
+                    raise
+                taglist = [tag for tag in params["tags"].split()
+                           if not tag.startswith("id:<")]
+                taglist.append("id:<" + str(post.attrib["id"]))
+                params["tags"] = " ".join(taglist)
+                params["pid"] = 0
+                continue
+
+            post = None
             for post in root:
                 yield post.attrib
 
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index bf479ab..a1dd465 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -439,15 +439,27 @@ class InstagramTaggedExtractor(InstagramExtractor):
     test = ("https://www.instagram.com/instagram/tagged/", {
         "range": "1-16",
         "count": ">= 16",
+        "keyword": {
+            "tagged_owner_id" : "25025320",
+            "tagged_username" : "instagram",
+            "tagged_full_name": "Instagram",
+        },
     })
 
-    def posts(self):
+    def metadata(self):
         url = "{}/{}/".format(self.root, self.item)
-        user = self._extract_profile_page(url)
+        self.user = user = self._extract_profile_page(url)
+
+        return {
+            "tagged_owner_id" : user["id"],
+            "tagged_username" : user["username"],
+            "tagged_full_name": user["full_name"],
+        }
 
+    def posts(self):
         query_hash = "be13233562af2d229b008d2976b998b5"
-        variables = {"id": user["id"], "first": 50}
-        edge = self._get_edge_data(user, None)
+        variables = {"id": self.user["id"], "first": 50}
+        edge = self._get_edge_data(self.user, None)
         return self._pagination_graphql(query_hash, variables, edge)
 
 
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 2e1d0b2..6483278 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,7 @@ from ..cache import cache
 import itertools
 import re
 
-BASE_PATTERN = r"(?:https?://)?kemono\.party"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?kemono\.party"
 USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
 
 
@@ -30,19 +30,20 @@ class KemonopartyExtractor(Extractor):
     def items(self):
         self._prepare_ddosguard_cookies()
 
-        find_inline = re.compile(
+        self._find_inline = re.compile(
             r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
             r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
-        skip_service = \
-            "patreon" if self.config("patreon-skip-file", True) else None
+        find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
+        generators = self._build_file_generators(self.config("files"))
         comments = self.config("comments")
+        username = dms = None
 
         if self.config("metadata"):
             username = text.unescape(text.extract(
                 self.request(self.user_url).text,
                 '<meta name="artist_name" content="', '"')[0])
-        else:
-            username = None
+        if self.config("dms"):
+            dms = True
 
         posts = self.posts()
         max_posts = self.config("max-posts")
@@ -51,31 +52,38 @@ class KemonopartyExtractor(Extractor):
 
         for post in posts:
 
-            files = []
-            append = files.append
-            file = post["file"]
-
-            if file:
-                file["type"] = "file"
-                if post["service"] != skip_service or not post["attachments"]:
-                    append(file)
-            for attachment in post["attachments"]:
-                attachment["type"] = "attachment"
-                append(attachment)
-            for path in find_inline(post["content"] or ""):
-                append({"path": path, "name": path, "type": "inline"})
-
             post["date"] = text.parse_datetime(
-                post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+                post["published"] or post["added"],
+                "%a, %d %b %Y %H:%M:%S %Z")
             if username:
                 post["username"] = username
             if comments:
                 post["comments"] = self._extract_comments(post)
+            if dms is not None:
+                if dms is True:
+                    dms = self._extract_dms(post)
+                post["dms"] = dms
             yield Message.Directory, post
 
-            for post["num"], file in enumerate(files, 1):
-                post["type"] = file["type"]
+            hashes = set()
+            post["num"] = 0
+            for file in itertools.chain.from_iterable(
+                    g(post) for g in generators):
                 url = file["path"]
+
+                match = find_hash(url)
+                if match:
+                    post["hash"] = hash = match.group(1)
+                    if hash in hashes:
+                        self.log.debug("Skipping %s (duplicate)", url)
+                        continue
+                    hashes.add(hash)
+                else:
+                    post["hash"] = ""
+
+                post["type"] = file["type"]
+                post["num"] += 1
+
                 if url[0] == "/":
                     url = self.root + "/data" + url
                 elif url.startswith("https://kemono.party"):
@@ -103,6 +111,34 @@ class KemonopartyExtractor(Extractor):
 
         return {c.name: c.value for c in response.history[0].cookies}
 
+    def _file(self, post):
+        file = post["file"]
+        if not file:
+            return ()
+        file["type"] = "file"
+        return (file,)
+
+    def _attachments(self, post):
+        for attachment in post["attachments"]:
+            attachment["type"] = "attachment"
+        return post["attachments"]
+
+    def _inline(self, post):
+        for path in self._find_inline(post["content"] or ""):
+            yield {"path": path, "name": path, "type": "inline"}
+
+    def _build_file_generators(self, filetypes):
+        if filetypes is None:
+            return (self._file, self._attachments, self._inline)
+        genmap = {
+            "file"       : self._file,
+            "attachments": self._attachments,
+            "inline"     : self._inline,
+        }
+        if isinstance(filetypes, str):
+            filetypes = filetypes.split(",")
+        return [genmap[ft] for ft in filetypes]
+
     def _extract_comments(self, post):
         url = "{}/{}/user/{}/post/{}".format(
             self.root, post["service"], post["user"], post["id"])
@@ -121,6 +157,21 @@ class KemonopartyExtractor(Extractor):
             })
         return comments
 
+    def _extract_dms(self, post):
+        url = "{}/{}/user/{}/dms".format(
+            self.root, post["service"], post["user"])
+        page = self.request(url).text
+
+        dms = []
+        for dm in text.extract_iter(page, "<article", "</article>"):
+            dms.append({
+                "body": text.unescape(text.extract(
+                    dm, '<div class="dm-card__content">', '</div>',
+                )[0].strip()),
+                "date": text.extract(dm, 'datetime="', '"')[0],
+            })
+        return dms
+
 
 class KemonopartyUserExtractor(KemonopartyExtractor):
     """Extractor for all posts from a kemono.party user listing"""
@@ -175,6 +226,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
                 "embed": dict,
                 "extension": "jpeg",
                 "filename": "P058kDFYus7DbqAkGlfWTlOr",
+                "hash": "210f35388e28bbcf756db18dd516e2d8"
+                        "2ce758e0d32881eeee76d43e1716d382",
                 "id": "506575",
                 "num": 1,
                 "published": "Sun, 11 Aug 2019 02:09:04 GMT",
@@ -188,25 +241,39 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
         }),
         # inline image (#1286)
         ("https://kemono.party/fanbox/user/7356311/post/802343", {
-            "pattern": r"https://kemono\.party/data/inline/fanbox"
-                       r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
+            "pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
+                       r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
+            "keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a"
+                                "76336997ae8596f332e97d956a460ad2"},
         }),
         # kemono.party -> data.kemono.party
         ("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
-            "pattern": r"https://kemono\.party/data/(file|attachment)s"
-                       r"/gumroad/trylsc/IURjT/",
+            "pattern": r"https://kemono\.party/data/("
+                       r"files/gumroad/trylsc/IURjT/reward8\.jpg|"
+                       r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
         }),
         # username (#1548, #1652)
         ("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
             "options": (("metadata", True),),
             "keyword": {"username": "Kudalyn's Creations"},
         }),
-        # skip patreon main file (#1667, #1689)
+        # skip patreon duplicates
         ("https://kemono.party/patreon/user/4158582/post/32099982", {
             "count": 2,
-            "keyword": {"type": "attachment"},
+        }),
+        # DMs (#2008)
+        ("https://kemono.party/patreon/user/34134344/post/38129255", {
+            "options": (("dms", True),),
+            "keyword": {"dms": [{
+                "body": r"re:Hi! Thank you very much for supporting the work I"
+                        r" did in May. Here's your reward pack! I hope you fin"
+                        r"d something you enjoy in it. :\)\n\nhttps://www.medi"
+                        r"afire.com/file/\w+/Set13_tier_2.zip/file",
+                "date": "2021-07-31 02:47:51.327865",
+            }]},
         }),
         ("https://kemono.party/subscribestar/user/alcorart/post/184330"),
+        ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
     )
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index ff1d7c3..393f4e2 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -46,10 +46,10 @@ class MangadexExtractor(Extractor):
     def _transform(self, chapter):
         relationships = defaultdict(list)
         for item in chapter["relationships"]:
-            relationships[item["type"]].append(item["id"])
-        manga = self.api.manga(relationships["manga"][0])
+            relationships[item["type"]].append(item)
+        manga = self.api.manga(relationships["manga"][0]["id"])
         for item in manga["relationships"]:
-            relationships[item["type"]].append(item["id"])
+            relationships[item["type"]].append(item)
 
         cattributes = chapter["attributes"]
         mattributes = manga["attributes"]
@@ -75,16 +75,12 @@ class MangadexExtractor(Extractor):
             "count"   : len(cattributes["data"]),
         }
 
-        if self.config("metadata"):
-            data["artist"] = [
-                self.api.author(uuid)["attributes"]["name"]
-                for uuid in relationships["artist"]]
-            data["author"] = [
-                self.api.author(uuid)["attributes"]["name"]
-                for uuid in relationships["author"]]
-            data["group"] = [
-                self.api.group(uuid)["attributes"]["name"]
-                for uuid in relationships["scanlation_group"]]
+        data["artist"] = [artist["attributes"]["name"]
+                          for artist in relationships["artist"]]
+        data["author"] = [author["attributes"]["name"]
+                          for author in relationships["author"]]
+        data["group"] = [group["attributes"]["name"]
+                         for group in relationships["scanlation_group"]]
 
         return data
 
@@ -95,12 +91,11 @@ class MangadexChapterExtractor(MangadexExtractor):
     pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
     test = (
         ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
-            "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd",
+            "keyword": "86fb262cf767dac6d965cd904ad499adba466404",
             #  "content": "50383a4c15124682057b197d40261641a98db514",
         }),
         # oneshot
         ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
-            "options": (("metadata", True),),
             "count": 64,
             "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb",
         }),
@@ -147,6 +142,8 @@ class MangadexMangaExtractor(MangadexExtractor):
                 "date"    : "type:datetime",
                 "lang"    : str,
                 "language": str,
+                "artist"  : ["Arakawa Hiromu"],
+                "author"  : ["Arakawa Hiromu"],
             },
         }),
         ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
@@ -193,20 +190,14 @@ class MangadexAPI():
     def athome_server(self, uuid):
         return self._call("/at-home/server/" + uuid)
 
-    @memcache(keyarg=1)
-    def author(self, uuid):
-        return self._call("/author/" + uuid)["data"]
-
     def chapter(self, uuid):
-        return self._call("/chapter/" + uuid)["data"]
-
-    @memcache(keyarg=1)
-    def group(self, uuid):
-        return self._call("/group/" + uuid)["data"]
+        params = {"includes[]": ("scanlation_group",)}
+        return self._call("/chapter/" + uuid, params)["data"]
 
     @memcache(keyarg=1)
     def manga(self, uuid):
-        return self._call("/manga/" + uuid)["data"]
+        params = {"includes[]": ("artist", "author")}
+        return self._call("/manga/" + uuid, params)["data"]
 
     def manga_feed(self, uuid):
         order = "desc" if self.extractor.config("chapter-reverse") else "asc"
@@ -275,6 +266,7 @@ class MangadexAPI():
             ratings = ("safe", "suggestive", "erotica", "pornographic")
 
         params["contentRating[]"] = ratings
+        params["includes[]"] = ("scanlation_group",)
         params["translatedLanguage[]"] = config("lang")
         params["offset"] = 0
 
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index d45fbc9..1486057 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -122,18 +122,18 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
     def metadata(self, page):
         """Return general metadata"""
         extr = text.extract_from(page)
-        title = extr('<title>', '</title>')
-        count = extr('id="pic-count">', '<')
-        cid = extr('<img alt="', '"')
+        title = extr('<img id="cover-img" alt="', '"')
+        cid = extr('href="https://www.mangoxo.com/user/', '"')
+        cname = extr('<img alt="', '"')
         cover = extr(' src="', '"')
-        cname = extr('target="_blank">', '<')
-        date = extr('</i>', '<')
+        count = extr('id="pic-count">', '<')
+        date = extr('class="fa fa-calendar"></i>', '<')
         descr = extr('<pre>', '</pre>')
 
         return {
             "channel": {
                 "id": cid,
-                "name": text.unescape(cname.strip()),
+                "name": text.unescape(cname),
                 "cover": cover,
             },
             "album": {
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index d3b3bb1..51a0d38 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -62,6 +62,8 @@ INSTANCES = {
                    "filter_id": "56027"},
     "ponybooru" : {"root": "https://ponybooru.org",
                    "filter_id": "2"},
+    "furbooru"  : {"root": "https://furbooru.org",
+                   "filter_id": "2"},
 }
 
 BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
@@ -124,6 +126,9 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
         ("https://ponybooru.org/images/1", {
             "content": "bca26f58fafd791fe07adcd2a28efd7751824605",
         }),
+        ("https://furbooru.org/images/1", {
+            "content": "9eaa1e1b32fa0f16520912257dbefaff238d5fd2",
+        }),
     )
 
     def __init__(self, match):
@@ -157,6 +162,10 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
             "range": "40-60",
             "count": 21,
         }),
+        ("https://furbooru.org/search?q=cute", {
+            "range": "40-60",
+            "count": 21,
+        }),
     )
 
     def __init__(self, match):
@@ -210,6 +219,9 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
         ("https://ponybooru.org/galleries/27", {
             "count": ">= 24",
         }),
+        ("https://furbooru.org/galleries/27", {
+            "count": ">= 13",
+        }),
     )
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 04fe581..b3a620a 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -8,29 +8,29 @@
 
 """Generic extractors for *reactor sites"""
 
-from .common import Extractor, Message
+from .common import BaseExtractor, Message
 from .. import text
 import urllib.parse
 import json
 
-BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
 
-
-class ReactorExtractor(Extractor):
+class ReactorExtractor(BaseExtractor):
     """Base class for *reactor.cc extractors"""
     basecategory = "reactor"
     filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
     archive_fmt = "{post_id}_{num}"
-    instances = ()
     request_interval = 5.0
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.root = "http://" + match.group(1)
+        BaseExtractor.__init__(self, match)
+        url = text.ensure_http_scheme(match.group(0), "http://")
+        pos = url.index("/", 10)
+
+        self.root, self.path = url[:pos], url[pos:]
         self.session.headers["Referer"] = self.root
         self.gif = self.config("gif", False)
 
-        if not self.category:
+        if self.category == "reactor":
             # set category based on domain name
             netloc = urllib.parse.urlsplit(self.root).netloc
             self.category = netloc.rpartition(".")[0]
@@ -50,7 +50,7 @@ class ReactorExtractor(Extractor):
 
     def posts(self):
         """Return all relevant post-objects"""
-        return self._pagination(self.url)
+        return self._pagination(self.root + self.path)
 
     def _pagination(self, url):
         while True:
@@ -145,91 +145,63 @@ class ReactorExtractor(Extractor):
             }
 
 
+BASE_PATTERN = ReactorExtractor.update({
+    "reactor"    : {
+        "root": "http://reactor.cc",
+        "pattern": r"(?:[^/.]+\.)?reactor\.cc",
+    },
+    "joyreactor" : {
+        "root": "http://joyreactor.cc",
+        "pattern": r"(?:www\.)?joyreactor\.c(?:c|om)",
+    },
+    "pornreactor": {
+        "root": "http://pornreactor.cc",
+        "pattern": r"(?:www\.)?(?:pornreactor\.cc|fapreactor.com)",
+    },
+    "thatpervert": {
+        "root": "http://thatpervert.com",
+    },
+})
+
+
 class ReactorTagExtractor(ReactorExtractor):
     """Extractor for tag searches on *reactor.cc sites"""
     subcategory = "tag"
     directory_fmt = ("{category}", "{search_tags}")
     archive_fmt = "{search_tags}_{post_id}_{num}"
     pattern = BASE_PATTERN + r"/tag/([^/?#]+)"
-    test = ("http://anime.reactor.cc/tag/Anime+Art",)
+    test = (
+        ("http://reactor.cc/tag/gif"),
+        ("http://anime.reactor.cc/tag/Anime+Art"),
+        ("http://joyreactor.cc/tag/Advent+Cirno", {
+            "count": ">= 15",
+        }),
+        ("http://joyreactor.com/tag/Cirno", {
+            "url": "aa59090590b26f4654881301fe8fe748a51625a8",
+        }),
+        ("http://pornreactor.cc/tag/RiceGnat", {
+            "range": "1-25",
+            "count": ">= 25",
+        }),
+        ("http://fapreactor.com/tag/RiceGnat"),
+    )
 
     def __init__(self, match):
         ReactorExtractor.__init__(self, match)
-        self.tag = match.group(2)
+        self.tag = match.group(match.lastindex)
 
     def metadata(self):
         return {"search_tags": text.unescape(self.tag).replace("+", " ")}
 
 
-class ReactorSearchExtractor(ReactorTagExtractor):
+class ReactorSearchExtractor(ReactorExtractor):
     """Extractor for search results on *reactor.cc sites"""
     subcategory = "search"
     directory_fmt = ("{category}", "search", "{search_tags}")
     archive_fmt = "s_{search_tags}_{post_id}_{num}"
     pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
-    test = ("http://anime.reactor.cc/search?q=Art",)
-
-
-class ReactorUserExtractor(ReactorExtractor):
-    """Extractor for all posts of a user on *reactor.cc sites"""
-    subcategory = "user"
-    directory_fmt = ("{category}", "user", "{user}")
-    pattern = BASE_PATTERN + r"/user/([^/?#]+)"
-    test = ("http://anime.reactor.cc/user/Shuster",)
-
-    def __init__(self, match):
-        ReactorExtractor.__init__(self, match)
-        self.user = match.group(2)
-
-    def metadata(self):
-        return {"user": text.unescape(self.user).replace("+", " ")}
-
-
-class ReactorPostExtractor(ReactorExtractor):
-    """Extractor for single posts on *reactor.cc sites"""
-    subcategory = "post"
-    pattern = BASE_PATTERN + r"/post/(\d+)"
-    test = ("http://anime.reactor.cc/post/3576250",)
-
-    def __init__(self, match):
-        ReactorExtractor.__init__(self, match)
-        self.post_id = match.group(2)
-
-    def items(self):
-        post = self.request(self.url).text
-        pos = post.find('class="uhead">')
-        for image in self._parse_post(post[pos:]):
-            if image["num"] == 1:
-                yield Message.Directory, image
-            url = image["url"]
-            yield Message.Url, url, text.nameext_from_url(url, image)
-
-
-# --------------------------------------------------------------------
-# JoyReactor
-
-JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
-
-
-class JoyreactorTagExtractor(ReactorTagExtractor):
-    """Extractor for tag searches on joyreactor.cc"""
-    category = "joyreactor"
-    pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)"
-    test = (
-        ("http://joyreactor.cc/tag/Advent+Cirno", {
-            "count": ">= 15",
-        }),
-        ("http://joyreactor.com/tag/Cirno", {
-            "url": "aa59090590b26f4654881301fe8fe748a51625a8",
-        }),
-    )
-
-
-class JoyreactorSearchExtractor(ReactorSearchExtractor):
-    """Extractor for search results on joyreactor.cc"""
-    category = "joyreactor"
-    pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
     test = (
+        ("http://reactor.cc/search?q=Art"),
         ("http://joyreactor.cc/search/Nature", {
             "range": "1-25",
             "count": ">= 20",
@@ -238,26 +210,54 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor):
             "range": "1-25",
             "count": ">= 20",
         }),
+        ("http://pornreactor.cc/search?q=ecchi+hentai"),
+        ("http://fapreactor.com/search/ecchi+hentai"),
     )
 
+    def __init__(self, match):
+        ReactorExtractor.__init__(self, match)
+        self.tag = match.group(match.lastindex)
+
+    def metadata(self):
+        return {"search_tags": text.unescape(self.tag).replace("+", " ")}
+
 
-class JoyreactorUserExtractor(ReactorUserExtractor):
-    """Extractor for all posts of a user on joyreactor.cc"""
-    category = "joyreactor"
-    pattern = JR_BASE_PATTERN + r"/user/([^/?#]+)"
+class ReactorUserExtractor(ReactorExtractor):
+    """Extractor for all posts of a user on *reactor.cc sites"""
+    subcategory = "user"
+    directory_fmt = ("{category}", "user", "{user}")
+    pattern = BASE_PATTERN + r"/user/([^/?#]+)"
     test = (
+        ("http://reactor.cc/user/Dioklet"),
+        ("http://anime.reactor.cc/user/Shuster"),
         ("http://joyreactor.cc/user/hemantic"),
         ("http://joyreactor.com/user/Tacoman123", {
             "url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5",
         }),
+        ("http://pornreactor.cc/user/Disillusion", {
+            "range": "1-25",
+            "count": ">= 20",
+        }),
+        ("http://fapreactor.com/user/Disillusion"),
     )
 
+    def __init__(self, match):
+        ReactorExtractor.__init__(self, match)
+        self.user = match.group(match.lastindex)
+
+    def metadata(self):
+        return {"user": text.unescape(self.user).replace("+", " ")}
+
 
-class JoyreactorPostExtractor(ReactorPostExtractor):
-    """Extractor for single posts on joyreactor.cc"""
-    category = "joyreactor"
-    pattern = JR_BASE_PATTERN + r"/post/(\d+)"
+class ReactorPostExtractor(ReactorExtractor):
+    """Extractor for single posts on *reactor.cc sites"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/post/(\d+)"
     test = (
+        ("http://reactor.cc/post/4999736", {
+            "url": "dfc74d150d7267384d8c229c4b82aa210755daa0",
+        }),
+        ("http://anime.reactor.cc/post/3576250"),
         ("http://joyreactor.com/post/3721876", {  # single image
             "pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
                        r"/cartoon-painting-monster-lake-4841316.jpeg",
@@ -281,57 +281,6 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
         ("http://joyreactor.cc/post/1299", {  # "malformed" JSON
             "url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39",
         }),
-    )
-
-
-# --------------------------------------------------------------------
-# PornReactor
-
-PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
-
-
-class PornreactorTagExtractor(ReactorTagExtractor):
-    """Extractor for tag searches on pornreactor.cc"""
-    category = "pornreactor"
-    pattern = PR_BASE_PATTERN + r"/tag/([^/?#]+)"
-    test = (
-        ("http://pornreactor.cc/tag/RiceGnat", {
-            "range": "1-25",
-            "count": ">= 25",
-        }),
-        ("http://fapreactor.com/tag/RiceGnat"),
-    )
-
-
-class PornreactorSearchExtractor(ReactorSearchExtractor):
-    """Extractor for search results on pornreactor.cc"""
-    category = "pornreactor"
-    pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
-    test = (
-        ("http://pornreactor.cc/search?q=ecchi+hentai"),
-        ("http://fapreactor.com/search/ecchi+hentai"),
-    )
-
-
-class PornreactorUserExtractor(ReactorUserExtractor):
-    """Extractor for all posts of a user on pornreactor.cc"""
-    category = "pornreactor"
-    pattern = PR_BASE_PATTERN + r"/user/([^/?#]+)"
-    test = (
-        ("http://pornreactor.cc/user/Disillusion", {
-            "range": "1-25",
-            "count": ">= 20",
-        }),
-        ("http://fapreactor.com/user/Disillusion"),
-    )
-
-
-class PornreactorPostExtractor(ReactorPostExtractor):
-    """Extractor for single posts on pornreactor.cc"""
-    category = "pornreactor"
-    subcategory = "post"
-    pattern = PR_BASE_PATTERN + r"/post/(\d+)"
-    test = (
         ("http://pornreactor.cc/post/863166", {
             "url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3",
             "content": "ec6b0568bfb1803648744077da082d14de844340",
@@ -340,3 +289,16 @@ class PornreactorPostExtractor(ReactorPostExtractor):
             "url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54",
         }),
     )
+
+    def __init__(self, match):
+        ReactorExtractor.__init__(self, match)
+        self.post_id = match.group(match.lastindex)
+
+    def items(self):
+        post = self.request(self.root + self.path).text
+        pos = post.find('class="uhead">')
+        for image in self._parse_post(post[pos:]):
+            if image["num"] == 1:
+                yield Message.Directory, image
+            url = image["url"]
+            yield Message.Url, url, text.nameext_from_url(url, image)
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
deleted file mode 100644
index a2a24e0..0000000
--- a/gallery_dl/extractor/seisoparty.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://seiso.party/"""
-
-from .common import Extractor, Message
-from .. import text, exception
-from ..cache import cache
-import re
-
-
-class SeisopartyExtractor(Extractor):
-    """Base class for seisoparty extractors"""
-    category = "seisoparty"
-    root = "https://seiso.party"
-    directory_fmt = ("{category}", "{service}", "{username}")
-    filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
-    archive_fmt = "{service}_{user}_{id}_{num}"
-    cookiedomain = ".seiso.party"
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.user_name = None
-        self._find_files = re.compile(
-            r'href="(https://cdn(?:-\d)?\.seiso\.party/files/[^"]+)').findall
-
-    def items(self):
-        self._prepare_ddosguard_cookies()
-
-        for post in self.posts():
-            files = post.pop("files")
-            yield Message.Directory, post
-            for post["num"], url in enumerate(files, 1):
-                yield Message.Url, url, text.nameext_from_url(url, post)
-
-    def _parse_post(self, page, post_id):
-        extr = text.extract_from(page)
-        return {
-            "service" : self.service,
-            "user"    : self.user_id,
-            "username": self.user_name,
-            "id"      : post_id,
-            "date"    : text.parse_datetime(extr(
-                '<div class="margin-bottom-15 minor-text">', '<'),
-                "%Y-%m-%d %H:%M:%S %Z"),
-            "title"   : text.unescape(extr('class="post-title">', '<')),
-            "content" : text.unescape(extr("\n<p>\n", "\n</p>\n").strip()),
-            "files"   : self._find_files(page),
-        }
-
-    def login(self):
-        username, password = self._get_auth_info()
-        if username:
-            self._update_cookies(self._login_impl(username, password))
-
-    @cache(maxage=28*24*3600, keyarg=1)
-    def _login_impl(self, username, password):
-        self.log.info("Logging in as %s", username)
-
-        url = self.root + "/account/login"
-        data = {"username": username, "password": password}
-
-        response = self.request(url, method="POST", data=data)
-        if response.url.endswith("/account/login") and \
-                "Username or password is incorrect" in response.text:
-            raise exception.AuthenticationError()
-
-        return {c.name: c.value for c in response.history[0].cookies}
-
-
-class SeisopartyUserExtractor(SeisopartyExtractor):
-    """Extractor for all posts from a seiso.party user listing"""
-    subcategory = "user"
-    pattern = r"(?:https?://)?seiso\.party/artists/([^/?#]+)/([^/?#]+)"
-    test = (
-        ("https://seiso.party/artists/fanbox/21", {
-            "pattern": r"https://cdn\.seiso\.party/files/fanbox/\d+/",
-            "count": ">=15",
-            "keyword": {
-                "content": str,
-                "date": "type:datetime",
-                "id": r"re:\d+",
-                "num": int,
-                "service": "fanbox",
-                "title": str,
-                "user": "21",
-                "username": "雨",
-            },
-        }),
-    )
-
-    def __init__(self, match):
-        SeisopartyExtractor.__init__(self, match)
-        self.service, self.user_id = match.groups()
-
-    def posts(self):
-        url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id)
-        page = self.request(url).text
-        self.user_name, pos = text.extract(page, '<span class="title">', '<')
-
-        url = self.root + text.extract(
-            page, 'href="', '"', page.index('id="content"', pos))[0]
-        response = self.request(url)
-        headers = {"Referer": url}
-
-        while True:
-            yield self._parse_post(response.text, url.rpartition("/")[2])
-            response = self.request(url + "/next", headers=headers)
-            if url == response.url:
-                return
-            url = headers["Referer"] = response.url
-
-
-class SeisopartyPostExtractor(SeisopartyExtractor):
-    """Extractor for a single seiso.party post"""
-    subcategory = "post"
-    pattern = r"(?:https?://)?seiso\.party/post/([^/?#]+)/([^/?#]+)/([^/?#]+)"
-    test = (
-        ("https://seiso.party/post/fanbox/21/371", {
-            "url": "75f13b92de0ce399b6163c3de18f1f36011c2366",
-            "count": 2,
-            "keyword": {
-                "content": "この前描いためぐるちゃんのPSDファイルです。<br/>"
-                           "どうぞよろしくお願いします。",
-                "date": "dt:2021-05-06 12:38:31",
-                "extension": "re:psd|jpg",
-                "filename": "re:backcourt|ffb2ccb7a3586d05f9a4620329dd131e",
-                "id": "371",
-                "num": int,
-                "service": "fanbox",
-                "title": "MEGURU.PSD",
-                "user": "21",
-                "username": "雨",
-            },
-        }),
-        ("https://seiso.party/post/patreon/429/95949", {
-            "pattern": r"https://cdn-2\.seiso\.party/files/patreon/95949/",
-            "count": 2,
-        }),
-    )
-
-    def __init__(self, match):
-        SeisopartyExtractor.__init__(self, match)
-        self.service, self.user_id, self.post_id = match.groups()
-
-    def posts(self):
-        url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id)
-        page = self.request(url).text
-        self.user_name, pos = text.extract(page, '<span class="title">', '<')
-
-        url = "{}/post/{}/{}/{}".format(
-            self.root, self.service, self.user_id, self.post_id)
-        return (self._parse_post(self.request(url).text, self.post_id),)
-
-
-class SeisopartyFavoriteExtractor(SeisopartyExtractor):
-    """Extractor for seiso.party favorites"""
-    subcategory = "favorite"
-    pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?"
-    test = (
-        ("https://seiso.party/favorites/artists", {
-            "pattern": SeisopartyUserExtractor.pattern,
-            "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683",
-            "count": 3,
-        }),
-        ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", {
-            "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3",
-        }),
-    )
-
-    def __init__(self, match):
-        SeisopartyExtractor.__init__(self, match)
-        self.query = match.group(1)
-
-    def items(self):
-        self._prepare_ddosguard_cookies()
-        self.login()
-
-        url = self.root + "/favorites/artists"
-        data = {"_extractor": SeisopartyUserExtractor}
-        params = text.parse_query(self.query)
-        params["page"] = text.parse_int(params.get("page"), 1)
-
-        while True:
-            page = self.request(url, params=params).text
-
-            cnt = 0
-            for card in text.extract_iter(
-                    page, '<div class="artist-card', '</a>'):
-                path = text.extract(card, '<a href="', '"')[0]
-                yield Message.Queue, self.root + path, data
-                cnt += 1
-
-            if cnt < 25:
-                return
-            params["page"] += 1
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 6d924de..f276e84 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -53,6 +53,10 @@ BASE_PATTERN = ShopifyExtractor.update({
     "windsorstore": {
         "root": "https://www.windsorstore.com",
     },
+    "loungeunderwear": {
+        "root": "https://loungeunderwear.com",
+        "pattern": r"(?:[a-z]+\.)?loungeunderwear\.com",
+    },
 })
 
 
@@ -70,6 +74,7 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
         ("https://www.fashionnova.com/collections/mini-dresses#1"),
         ("https://www.omgmiamiswimwear.com/collections/fajas"),
         ("https://www.windsorstore.com/collections/dresses-ball-gowns"),
+        ("https://loungeunderwear.com/collections/apparel"),
     )
 
     def metadata(self):
@@ -105,6 +110,7 @@ class ShopifyProductExtractor(ShopifyExtractor):
         ("https://www.fashionnova.com/collections/flats/products/name"),
         ("https://www.windsorstore.com/collections/accessories-belts/products"
          "/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"),
+        ("https://de.loungeunderwear.com/products/ribbed-crop-top-black"),
     )
 
     def products(self):
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index c1a8878..2c806ad 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -21,6 +21,7 @@ class SkebExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user_name = match.group(1)
+        self.thumbnails = self.config("thumbnails", False)
 
     def items(self):
         for post_num in self.posts():
@@ -94,7 +95,7 @@ class SkebExtractor(Extractor):
         return resp, post
 
     def _get_urls_from_post(self, resp, post):
-        if "og_image_url" in resp:
+        if self.thumbnails and "og_image_url" in resp:
             post["content_category"] = "thumb"
             post["file_id"] = "thumb"
             post["file_url"] = resp["og_image_url"]
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index ae8b58d..69e3854 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -38,12 +38,11 @@ class SubscribestarExtractor(Extractor):
         self.login()
         for post_html in self.posts():
             media = self._media_from_post(post_html)
-            if not media:
-                continue
             data = self._data_from_post(post_html)
             yield Message.Directory, data
-            for item in media:
+            for num, item in enumerate(media, 1):
                 item.update(data)
+                item["num"] = num
                 text.nameext_from_url(item.get("name") or item["url"], item)
                 yield Message.Url, item["url"], item
 
@@ -140,8 +139,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
     test = (
         ("https://www.subscribestar.com/subscribestar", {
             "count": ">= 20",
-            "pattern": r"https://(star-uploads|ss-uploads-prod)\.s\d+-us-west-"
-                       r"\d+\.amazonaws\.com/uploads(_v2)?/users/11/",
+            "pattern": r"https://\w+\.cloudfront\.net/uploads(_v2)?/users/11/",
             "keyword": {
                 "author_id": 11,
                 "author_name": "subscribestar",
@@ -149,6 +147,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
                 "content": str,
                 "date"   : "type:datetime",
                 "id"     : int,
+                "num"    : int,
                 "post_id": int,
                 "type"   : "re:image|video|attachment",
                 "url"    : str,
@@ -190,7 +189,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
     pattern = BASE_PATTERN + r"/posts/(\d+)"
     test = (
         ("https://www.subscribestar.com/posts/102468", {
-            "url": "612da5a98af056dd78dc846fbcfa705e721f6675",
+            "count": 1,
             "keyword": {
                 "author_id": 11,
                 "author_name": "subscribestar",
@@ -202,6 +201,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
                 "group": "imgs_and_videos",
                 "height": 291,
                 "id": 203885,
+                "num": 1,
                 "pinned": False,
                 "post_id": 102468,
                 "type": "image",
@@ -209,7 +209,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
             },
         }),
         ("https://subscribestar.adult/posts/22950", {
-            "url": "440d745a368e6b3e218415f593a5045f384afa0d",
+            "count": 1,
             "keyword": {"date": "dt:2019-04-28 07:32:00"},
         }),
     )
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 00f3b04..f1c392d 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,7 +41,9 @@ class TwitterExtractor(Extractor):
         self.videos = self.config("videos", True)
         self.cards = self.config("cards", False)
         self._user_cache = {}
+        self._init_sizes()
 
+    def _init_sizes(self):
         size = self.config("size")
         if size is None:
             self._size_image = "orig"
@@ -580,13 +582,17 @@ class TwitterImageExtractor(Extractor):
     subcategory = "image"
     pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)"
     test = (
-        ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg%name=orig"),
+        ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", {
+            "options": (("size", "4096x4096,orig"),),
+            "url": "cb3042a6f6826923da98f0d2b66c427e9385114c",
+        }),
         ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"),
     )
 
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.id, self.fmt = match.groups()
+        TwitterExtractor._init_sizes(self)
 
     def items(self):
         base = "https://pbs.twimg.com/media/{}?format={}&name=".format(
@@ -595,11 +601,11 @@ class TwitterImageExtractor(Extractor):
         data = {
             "filename": self.id,
             "extension": self.fmt,
-            "_fallback": TwitterExtractor._image_fallback(base),
+            "_fallback": TwitterExtractor._image_fallback(self, base),
         }
 
         yield Message.Directory, data
-        yield Message.Url, base + "orig", data
+        yield Message.Url, base + self._size_image, data
 
 
 class TwitterAPI():
@@ -793,16 +799,21 @@ class TwitterAPI():
             data = response.json()
             if "errors" in data:
                 try:
-                    msg = ", ".join(
-                        '"' + error["message"] + '"'
-                        for error in data["errors"]
-                    )
+                    errors, warnings = [], []
+                    for error in data["errors"]:
+                        if error.get("kind") == "NonFatal":
+                            warnings.append(error["message"])
+                        else:
+                            errors.append(error["message"])
+                    errors = ", ".join(errors)
                 except Exception:
-                    msg = data["errors"]
-                if msg and response.status_code < 400:
-                    raise exception.StopExtraction(msg)
+                    errors = data["errors"]
+                if warnings:
+                    self.extractor.log.warning(", ".join(warnings))
+                if errors and response.status_code < 400:
+                    raise exception.StopExtraction(errors)
             else:
-                msg = ""
+                errors = ""
 
             if response.status_code < 400:
                 # success
@@ -816,7 +827,7 @@ class TwitterAPI():
                 continue
 
             if response.status_code == 401 and \
-                    "have been blocked from viewing" in msg:
+                    "have been blocked from viewing" in errors:
                 # account blocked
                 extr = self.extractor
                 if self.headers["x-twitter-auth-type"] and \
@@ -833,7 +844,7 @@ class TwitterAPI():
 
             # error
             raise exception.StopExtraction(
-                "%s %s (%s)", response.status_code, response.reason, msg)
+                "%s %s (%s)", response.status_code, response.reason, errors)
 
     def _pagination(self, endpoint, params=None):
         if params is None:
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index e2474c9..cf5b192 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -48,7 +48,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
     test = (
         (("https://www.webtoons.com/en/comedy/safely-endangered"
           "/ep-572-earth/viewer?title_no=352&episode_no=572"), {
-            "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef",
+            "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38",
             "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7",
                         "42055e44659f6ffc410b3fb6557346dfbb993df3",
                         "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
@@ -62,7 +62,6 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
         url = "{}/{}/viewer?{}".format(self.root, self.path, query)
         GalleryExtractor.__init__(self, match, url)
         self.setup_agegate_cookies()
-        self.session.headers["Referer"] = url
 
         query = text.parse_query(query)
         self.title_no = query.get("title_no")
@@ -88,7 +87,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
     @staticmethod
     def images(page):
         return [
-            (url, None)
+            (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None)
             for url in text.extract_iter(
                 page, 'class="_images" data-url="', '"')
         ]
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 0922c7c..0a55532 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -32,8 +32,8 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
     test = (
         ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
             "count": 8,
-            "pattern": r"https://profile-pics-l3\.xvideos-cdn\.com"
-                       r"/[0-9a-f]{40}-\d+/videos/profiles/galleries/84/ca/37"
+            "pattern": r"https://profile-pics-cdn\d+\.xvideos-cdn\.com"
+                       r"/[^/]+\,\d+/videos/profiles/galleries/84/ca/37"
                        r"/pervertedcouple/gal751031/pic_\d+_big\.jpg",
             "keyword": {
                 "gallery": {
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index d380dab..8eb0c83 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -9,7 +9,7 @@
 """Extractors for sites supported by youtube-dl"""
 
 from .common import Extractor, Message
-from .. import text, config, exception
+from .. import ytdl, config, exception
 
 
 class YoutubeDLExtractor(Extractor):
@@ -54,52 +54,45 @@ class YoutubeDLExtractor(Extractor):
         self.log.debug("Using %s", ytdl_module)
 
         # construct YoutubeDL object
-        options = {
-            "format"                 : self.config("format"),
+        extr_opts = {
+            "extract_flat"           : "in_playlist",
+            "force_generic_extractor": self.force_generic_extractor,
+        }
+        user_opts = {
             "retries"                : self._retries,
             "socket_timeout"         : self._timeout,
             "nocheckcertificate"     : not self._verify,
-            "proxy"                  : self.session.proxies.get("http"),
-            "force_generic_extractor": self.force_generic_extractor,
-            "nopart"                 : not self.config("part", True),
-            "updatetime"             : self.config("mtime", True),
-            "ratelimit"              : text.parse_bytes(
-                self.config("rate"), None),
-            "min_filesize"           : text.parse_bytes(
-                self.config("filesize-min"), None),
-            "max_filesize"           : text.parse_bytes(
-                self.config("filesize-max"), None),
         }
 
-        raw_options = self.config("raw-options")
-        if raw_options:
-            options.update(raw_options)
-        if self.config("logging", True):
-            options["logger"] = self.log
-        options["extract_flat"] = "in_playlist"
-
         username, password = self._get_auth_info()
         if username:
-            options["username"], options["password"] = username, password
+            user_opts["username"], user_opts["password"] = username, password
         del username, password
 
-        ytdl = ytdl_module.YoutubeDL(options)
+        ytdl_instance = ytdl.construct_YoutubeDL(
+            ytdl_module, self, user_opts, extr_opts)
 
         # transfer cookies to ytdl
         cookies = self.session.cookies
         if cookies:
-            set_cookie = self.ytdl.cookiejar.set_cookie
-            for cookie in self.session.cookies:
+            set_cookie = ytdl_instance.cookiejar.set_cookie
+            for cookie in cookies:
                 set_cookie(cookie)
 
         # extract youtube_dl info_dict
-        info_dict = ytdl._YoutubeDL__extract_info(
-            self.ytdl_url,
-            ytdl.get_info_extractor(self.ytdl_ie_key),
-            False, {}, True)
-
-        if "entries" in info_dict:
-            results = self._process_entries(ytdl, info_dict["entries"])
+        try:
+            info_dict = ytdl_instance._YoutubeDL__extract_info(
+                self.ytdl_url,
+                ytdl_instance.get_info_extractor(self.ytdl_ie_key),
+                False, {}, True)
+        except ytdl_module.utils.YoutubeDLError:
+            raise exception.StopExtraction("Failed to extract video data")
+
+        if not info_dict:
+            return
+        elif "entries" in info_dict:
+            results = self._process_entries(
+                ytdl_module, ytdl_instance, info_dict["entries"])
         else:
             results = (info_dict,)
 
@@ -107,7 +100,7 @@ class YoutubeDLExtractor(Extractor):
         for info_dict in results:
             info_dict["extension"] = None
             info_dict["_ytdl_info_dict"] = info_dict
-            info_dict["_ytdl_instance"] = ytdl
+            info_dict["_ytdl_instance"] = ytdl_instance
 
             url = "ytdl:" + (info_dict.get("url") or
                              info_dict.get("webpage_url") or
@@ -116,15 +109,23 @@ class YoutubeDLExtractor(Extractor):
             yield Message.Directory, info_dict
             yield Message.Url, url, info_dict
 
-    def _process_entries(self, ytdl, entries):
+    def _process_entries(self, ytdl_module, ytdl_instance, entries):
         for entry in entries:
-            if entry.get("_type") in ("url", "url_transparent"):
-                info_dict = ytdl.extract_info(
-                    entry["url"], False,
-                    ie_key=entry.get("ie_key"))
-                if "entries" in info_dict:
+            if not entry:
+                continue
+            elif entry.get("_type") in ("url", "url_transparent"):
+                try:
+                    info_dict = ytdl_instance.extract_info(
+                        entry["url"], False,
+                        ie_key=entry.get("ie_key"))
+                except ytdl_module.utils.YoutubeDLError:
+                    continue
+
+                if not info_dict:
+                    continue
+                elif "entries" in info_dict:
                     yield from self._process_entries(
-                        ytdl, info_dict["entries"])
+                        ytdl_module, ytdl_instance, info_dict["entries"])
                 else:
                     yield info_dict
             else:
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index f5d961a..c2b4d99 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -274,6 +274,8 @@ def build_format_func(format_spec):
             return _parse_join(format_spec)
         if fmt == "R":
             return _parse_replace(format_spec)
+        if fmt == "D":
+            return _parse_datetime(format_spec)
         return _default_format(format_spec)
     return format
 
@@ -319,6 +321,16 @@ def _parse_replace(format_spec):
     return replace
 
 
+def _parse_datetime(format_spec):
+    dt_format, _, format_spec = format_spec.partition("/")
+    dt_format = dt_format[1:]
+    fmt = build_format_func(format_spec)
+
+    def dt(obj):
+        return fmt(text.parse_datetime(obj, dt_format))
+    return dt
+
+
 def _default_format(format_spec):
     def wrap(obj):
         return format(obj, format_spec)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 4e185d0..97a8d3f 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -11,7 +11,6 @@ import json
 import time
 import errno
 import logging
-import operator
 import functools
 import collections
 from . import extractor, downloader, postprocessor
@@ -201,7 +200,6 @@ class DownloadJob(Job):
     def __init__(self, url, parent=None):
         Job.__init__(self, url, parent)
         self.log = self.get_logger("download")
-        self.blacklist = None
         self.fallback = None
         self.archive = None
         self.sleep = None
@@ -209,6 +207,7 @@ class DownloadJob(Job):
         self.downloaders = {}
         self.out = output.select()
         self.visited = parent.visited if parent else set()
+        self._extractor_filter = None
         self._skipcnt = 0
 
     def handle_url(self, url, kwdict):
@@ -297,9 +296,9 @@ class DownloadJob(Job):
         else:
             extr = extractor.find(url)
             if extr:
-                if self.blacklist is None:
-                    self.blacklist = self._build_blacklist()
-                if extr.category in self.blacklist:
+                if self._extractor_filter is None:
+                    self._extractor_filter = self._build_extractor_filter()
+                if not self._extractor_filter(extr):
                     extr = None
 
         if extr:
@@ -444,22 +443,20 @@ class DownloadJob(Job):
             self.hooks = collections.defaultdict(list)
             pp_log = self.get_logger("postprocessor")
             pp_list = []
-            category = self.extractor.category
-            basecategory = self.extractor.basecategory
 
             pp_conf = config.get((), "postprocessor") or {}
             for pp_dict in postprocessors:
                 if isinstance(pp_dict, str):
                     pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict}
 
-                whitelist = pp_dict.get("whitelist")
-                if whitelist and category not in whitelist and \
-                        basecategory not in whitelist:
-                    continue
-
-                blacklist = pp_dict.get("blacklist")
-                if blacklist and (
-                        category in blacklist or basecategory in blacklist):
+                clist = pp_dict.get("whitelist")
+                if clist is not None:
+                    negate = False
+                else:
+                    clist = pp_dict.get("blacklist")
+                    negate = True
+                if clist and not util.build_extractor_filter(
+                        clist, negate)(self.extractor):
                     continue
 
                 name = pp_dict.get("name")
@@ -500,38 +497,18 @@ class DownloadJob(Job):
         if condition(pathfmt.kwdict):
             callback(pathfmt)
 
-    def _build_blacklist(self):
-        wlist = self.extractor.config("whitelist")
-        if wlist is not None:
-            if isinstance(wlist, str):
-                wlist = wlist.split(",")
-
-            # build a set of all categories
-            blist = set()
-            add = blist.add
-            update = blist.update
-            get = operator.itemgetter(0)
-
-            for extr in extractor._list_classes():
-                category = extr.category
-                if category:
-                    add(category)
-                else:
-                    update(map(get, extr.instances))
-
-            # remove whitelisted categories
-            blist.difference_update(wlist)
-            return blist
-
-        blist = self.extractor.config("blacklist")
-        if blist is not None:
-            if isinstance(blist, str):
-                blist = blist.split(",")
-            blist = set(blist)
+    def _build_extractor_filter(self):
+        clist = self.extractor.config("whitelist")
+        if clist is not None:
+            negate = False
         else:
-            blist = {self.extractor.category}
-        blist |= util.SPECIAL_EXTRACTORS
-        return blist
+            clist = self.extractor.config("blacklist")
+            negate = True
+            if clist is None:
+                clist = (self.extractor.category,)
+
+        return util.build_extractor_filter(
+            clist, negate, util.SPECIAL_EXTRACTORS)
 
 
 class SimulationJob(DownloadJob):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 4a7fdbf..d25194e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -81,6 +81,16 @@ def identity(x):
     return x
 
 
+def true(_):
+    """Always returns True"""
+    return True
+
+
+def false(_):
+    """Always returns False"""
+    return False
+
+
 def noop():
     """Does nothing"""
 
@@ -432,6 +442,66 @@ def build_duration_func(duration, min=0.0):
     return functools.partial(identity, duration if duration > min else min)
 
 
+def build_extractor_filter(categories, negate=True, special=None):
+    """Build a function that takes an Extractor class as argument
+    and returns True if that class is allowed by 'categories'
+    """
+    if isinstance(categories, str):
+        categories = categories.split(",")
+
+    catset = set()  # set of categories / basecategories
+    subset = set()  # set of subcategories
+    catsub = []     # list of category-subcategory pairs
+
+    for item in categories:
+        category, _, subcategory = item.partition(":")
+        if category and category != "*":
+            if subcategory and subcategory != "*":
+                catsub.append((category, subcategory))
+            else:
+                catset.add(category)
+        elif subcategory and subcategory != "*":
+            subset.add(subcategory)
+
+    if special:
+        catset |= special
+    elif not catset and not subset and not catsub:
+        return true if negate else false
+
+    tests = []
+
+    if negate:
+        if catset:
+            tests.append(lambda extr:
+                         extr.category not in catset and
+                         extr.basecategory not in catset)
+        if subset:
+            tests.append(lambda extr: extr.subcategory not in subset)
+    else:
+        if catset:
+            tests.append(lambda extr:
+                         extr.category in catset or
+                         extr.basecategory in catset)
+        if subset:
+            tests.append(lambda extr: extr.subcategory in subset)
+
+    if catsub:
+        def test(extr):
+            for category, subcategory in catsub:
+                if category in (extr.category, extr.basecategory) and \
+                        subcategory == extr.subcategory:
+                    return not negate
+            return negate
+        tests.append(test)
+
+    if len(tests) == 1:
+        return tests[0]
+    if negate:
+        return lambda extr: all(t(extr) for t in tests)
+    else:
+        return lambda extr: any(t(extr) for t in tests)
+
+
 def build_predicate(predicates):
     if not predicates:
         return lambda url, kwdict: True
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 48817be..a363a97 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.19.2"
+__version__ = "1.19.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
new file mode 100644
index 0000000..4266f48
--- /dev/null
+++ b/gallery_dl/ytdl.py
@@ -0,0 +1,513 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Helpers for interacting with youtube-dl"""
+
+import re
+import shlex
+import itertools
+from . import text, util, exception
+
+
+def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
+    opts = argv = None
+    config = obj.config
+
+    cfg = config("config-file")
+    if cfg:
+        with open(util.expand_path(cfg)) as fp:
+            contents = fp.read()
+        argv = shlex.split(contents, comments=True)
+
+    cmd = config("cmdline-args")
+    if cmd:
+        if isinstance(cmd, str):
+            cmd = shlex.split(cmd)
+        argv = (argv + cmd) if argv else cmd
+
+    try:
+        opts = parse_command_line(module, argv) if argv else user_opts
+    except SystemExit:
+        raise exception.StopExtraction("Invalid command-line option")
+
+    if opts.get("format") is None:
+        opts["format"] = config("format")
+    if opts.get("proxy") is None:
+        opts["proxy"] = obj.session.proxies.get("http")
+    if opts.get("nopart") is None:
+        opts["nopart"] = not config("part", True)
+    if opts.get("updatetime") is None:
+        opts["updatetime"] = config("mtime", True)
+    if opts.get("ratelimit") is None:
+        opts["ratelimit"] = text.parse_bytes(config("rate"), None)
+    if opts.get("min_filesize") is None:
+        opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None)
+    if opts.get("max_filesize") is None:
+        opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None)
+
+    raw_opts = config("raw-options")
+    if raw_opts:
+        opts.update(raw_opts)
+    if config("logging", True):
+        opts["logger"] = obj.log
+    if system_opts:
+        opts.update(system_opts)
+
+    return module.YoutubeDL(opts)
+
+
+def parse_command_line(module, argv):
+    parser, opts, args = module.parseOpts(argv)
+
+    ytdlp = (module.__name__ == "yt_dlp")
+    std_headers = module.std_headers
+    parse_bytes = module.FileDownloader.parse_bytes
+
+    # HTTP headers
+    if opts.user_agent is not None:
+        std_headers["User-Agent"] = opts.user_agent
+    if opts.referer is not None:
+        std_headers["Referer"] = opts.referer
+    if opts.headers:
+        if isinstance(opts.headers, dict):
+            std_headers.update(opts.headers)
+        else:
+            for h in opts.headers:
+                key, _, value = h.partition(":")
+                std_headers[key] = value
+
+    if opts.ratelimit is not None:
+        opts.ratelimit = parse_bytes(opts.ratelimit)
+    if getattr(opts, "throttledratelimit", None) is not None:
+        opts.throttledratelimit = parse_bytes(opts.throttledratelimit)
+    if opts.min_filesize is not None:
+        opts.min_filesize = parse_bytes(opts.min_filesize)
+    if opts.max_filesize is not None:
+        opts.max_filesize = parse_bytes(opts.max_filesize)
+    if opts.max_sleep_interval is None:
+        opts.max_sleep_interval = opts.sleep_interval
+    if getattr(opts, "overwrites", None):
+        opts.continue_dl = False
+    if opts.retries is not None:
+        opts.retries = parse_retries(opts.retries)
+    if opts.fragment_retries is not None:
+        opts.fragment_retries = parse_retries(opts.fragment_retries)
+    if getattr(opts, "extractor_retries", None) is not None:
+        opts.extractor_retries = parse_retries(opts.extractor_retries)
+    if opts.buffersize is not None:
+        opts.buffersize = parse_bytes(opts.buffersize)
+    if opts.http_chunk_size is not None:
+        opts.http_chunk_size = parse_bytes(opts.http_chunk_size)
+    if opts.extractaudio:
+        opts.audioformat = opts.audioformat.lower()
+    if opts.audioquality:
+        opts.audioquality = opts.audioquality.strip("kK")
+    if opts.recodevideo is not None:
+        opts.recodevideo = opts.recodevideo.replace(" ", "")
+    if getattr(opts, "remuxvideo", None) is not None:
+        opts.remuxvideo = opts.remuxvideo.replace(" ", "")
+
+    if opts.date is not None:
+        date = module.DateRange.day(opts.date)
+    else:
+        date = module.DateRange(opts.dateafter, opts.datebefore)
+
+    compat_opts = getattr(opts, "compat_opts", ())
+
+    def _unused_compat_opt(name):
+        if name not in compat_opts:
+            return False
+        compat_opts.discard(name)
+        compat_opts.update(["*%s" % name])
+        return True
+
+    def set_default_compat(
+            compat_name, opt_name, default=True, remove_compat=True):
+        attr = getattr(opts, opt_name, None)
+        if compat_name in compat_opts:
+            if attr is None:
+                setattr(opts, opt_name, not default)
+                return True
+            else:
+                if remove_compat:
+                    _unused_compat_opt(compat_name)
+                return False
+        elif attr is None:
+            setattr(opts, opt_name, default)
+        return None
+
+    set_default_compat("abort-on-error", "ignoreerrors", "only_download")
+    set_default_compat("no-playlist-metafiles", "allow_playlist_files")
+    set_default_compat("no-clean-infojson", "clean_infojson")
+    if "format-sort" in compat_opts:
+        opts.format_sort.extend(module.InfoExtractor.FormatSort.ytdl_default)
+    _video_multistreams_set = set_default_compat(
+        "multistreams", "allow_multiple_video_streams",
+        False, remove_compat=False)
+    _audio_multistreams_set = set_default_compat(
+        "multistreams", "allow_multiple_audio_streams",
+        False, remove_compat=False)
+    if _video_multistreams_set is False and _audio_multistreams_set is False:
+        _unused_compat_opt("multistreams")
+
+    if isinstance(opts.outtmpl, dict):
+        outtmpl = opts.outtmpl
+        outtmpl_default = outtmpl.get("default")
+    else:
+        opts.outtmpl = outtmpl = outtmpl_default = ""
+
+    if "filename" in compat_opts:
+        if outtmpl_default is None:
+            outtmpl_default = outtmpl["default"] = "%(title)s-%(id)s.%(ext)s"
+        else:
+            _unused_compat_opt("filename")
+
+    if opts.extractaudio and not opts.keepvideo and opts.format is None:
+        opts.format = "bestaudio/best"
+
+    if ytdlp:
+        def metadataparser_actions(f):
+            if isinstance(f, str):
+                yield module.MetadataFromFieldPP.to_action(f)
+            else:
+                REPLACE = module.MetadataParserPP.Actions.REPLACE
+                args = f[1:]
+                for x in f[0].split(","):
+                    action = [REPLACE, x]
+                    action += args
+                    yield action
+
+        if getattr(opts, "parse_metadata", None) is None:
+            opts.parse_metadata = []
+        if opts.metafromtitle is not None:
+            opts.parse_metadata.append("title:%s" % opts.metafromtitle)
+            opts.metafromtitle = None
+        opts.parse_metadata = list(itertools.chain.from_iterable(map(
+            metadataparser_actions, opts.parse_metadata)))
+    else:
+        opts.parse_metadata = ()
+
+    download_archive_fn = module.expand_path(opts.download_archive) \
+        if opts.download_archive is not None else opts.download_archive
+
+    if getattr(opts, "getcomments", None):
+        opts.writeinfojson = True
+
+    if getattr(opts, "no_sponsorblock", None):
+        opts.sponsorblock_mark = set()
+        opts.sponsorblock_remove = set()
+    else:
+        opts.sponsorblock_mark = \
+            getattr(opts, "sponsorblock_mark", None) or set()
+        opts.sponsorblock_remove = \
+            getattr(opts, "sponsorblock_remove", None) or set()
+    sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
+
+    addchapters = getattr(opts, "addchapters", None)
+    if (opts.addmetadata or opts.sponsorblock_mark) and addchapters is None:
+        addchapters = True
+    opts.remove_chapters = getattr(opts, "remove_chapters", None) or ()
+
+    # PostProcessors
+    postprocessors = []
+    if opts.metafromtitle:
+        postprocessors.append({
+            "key": "MetadataFromTitle",
+            "titleformat": opts.metafromtitle,
+        })
+    if getattr(opts, "add_postprocessors", None):
+        postprocessors += list(opts.add_postprocessors)
+    if sponsorblock_query:
+        postprocessors.append({
+            "key": "SponsorBlock",
+            "categories": sponsorblock_query,
+            "api": opts.sponsorblock_api,
+            "when": "pre_process",
+        })
+    if opts.parse_metadata:
+        postprocessors.append({
+            "key": "MetadataParser",
+            "actions": opts.parse_metadata,
+            "when": "pre_process",
+        })
+    if opts.convertsubtitles:
+        pp = {"key": "FFmpegSubtitlesConvertor",
+              "format": opts.convertsubtitles}
+        if ytdlp:
+            pp["when"] = "before_dl"
+        postprocessors.append(pp)
+    if getattr(opts, "convertthumbnails", None):
+        postprocessors.append({
+            "key": "FFmpegThumbnailsConvertor",
+            "format": opts.convertthumbnails,
+            "when": "before_dl",
+        })
+    if getattr(opts, "exec_before_dl_cmd", None):
+        postprocessors.append({
+            "key": "Exec",
+            "exec_cmd": opts.exec_before_dl_cmd,
+            "when": "before_dl",
+        })
+    if opts.extractaudio:
+        postprocessors.append({
+            "key": "FFmpegExtractAudio",
+            "preferredcodec": opts.audioformat,
+            "preferredquality": opts.audioquality,
+            "nopostoverwrites": opts.nopostoverwrites,
+        })
+    if getattr(opts, "remuxvideo", None):
+        postprocessors.append({
+            "key": "FFmpegVideoRemuxer",
+            "preferedformat": opts.remuxvideo,
+        })
+    if opts.recodevideo:
+        postprocessors.append({
+            "key": "FFmpegVideoConvertor",
+            "preferedformat": opts.recodevideo,
+        })
+    if opts.embedsubtitles:
+        pp = {"key": "FFmpegEmbedSubtitle"}
+        if ytdlp:
+            pp["already_have_subtitle"] = (
+                opts.writesubtitles and "no-keep-subs" not in compat_opts)
+        postprocessors.append(pp)
+        if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts:
+            opts.writesubtitles = True
+    if opts.allsubtitles and not opts.writeautomaticsub:
+        opts.writesubtitles = True
+    remove_chapters_patterns, remove_ranges = [], []
+    for regex in opts.remove_chapters:
+        if regex.startswith("*"):
+            dur = list(map(module.parse_duration, regex[1:].split("-")))
+            if len(dur) == 2 and all(t is not None for t in dur):
+                remove_ranges.append(tuple(dur))
+                continue
+        remove_chapters_patterns.append(re.compile(regex))
+    if opts.remove_chapters or sponsorblock_query:
+        postprocessors.append({
+            "key": "ModifyChapters",
+            "remove_chapters_patterns": remove_chapters_patterns,
+            "remove_sponsor_segments": opts.sponsorblock_remove,
+            "remove_ranges": remove_ranges,
+            "sponsorblock_chapter_title": opts.sponsorblock_chapter_title,
+            "force_keyframes": opts.force_keyframes_at_cuts,
+        })
+    if opts.addmetadata or addchapters:
+        pp = {"key": "FFmpegMetadata"}
+        if ytdlp:
+            pp["add_chapters"] = addchapters
+            pp["add_metadata"] = opts.addmetadata
+        postprocessors.append(pp)
+    if getattr(opts, "sponskrub", False) is not False:
+        postprocessors.append({
+            "key": "SponSkrub",
+            "path": opts.sponskrub_path,
+            "args": opts.sponskrub_args,
+            "cut": opts.sponskrub_cut,
+            "force": opts.sponskrub_force,
+            "ignoreerror": opts.sponskrub is None,
+        })
+    if opts.embedthumbnail:
+        already_have_thumbnail = (opts.writethumbnail or
+                                  opts.write_all_thumbnails)
+        postprocessors.append({
+            "key": "EmbedThumbnail",
+            "already_have_thumbnail": already_have_thumbnail,
+        })
+        if not already_have_thumbnail:
+            opts.writethumbnail = True
+            if isinstance(opts.outtmpl, dict):
+                opts.outtmpl["pl_thumbnail"] = ""
+    if getattr(opts, "split_chapters", None):
+        postprocessors.append({
+            "key": "FFmpegSplitChapters",
+            "force_keyframes": opts.force_keyframes_at_cuts,
+        })
+    if opts.xattrs:
+        postprocessors.append({"key": "XAttrMetadata"})
+    if opts.exec_cmd:
+        postprocessors.append({
+            "key": "Exec",
+            "exec_cmd": opts.exec_cmd,
+            "when": "after_move",
+        })
+
+    match_filter = (
+        None if opts.match_filter is None
+        else module.match_filter_func(opts.match_filter))
+
+    return {
+        "usenetrc": opts.usenetrc,
+        "netrc_location": getattr(opts, "netrc_location", None),
+        "username": opts.username,
+        "password": opts.password,
+        "twofactor": opts.twofactor,
+        "videopassword": opts.videopassword,
+        "ap_mso": opts.ap_mso,
+        "ap_username": opts.ap_username,
+        "ap_password": opts.ap_password,
+        "quiet": opts.quiet,
+        "no_warnings": opts.no_warnings,
+        "forceurl": opts.geturl,
+        "forcetitle": opts.gettitle,
+        "forceid": opts.getid,
+        "forcethumbnail": opts.getthumbnail,
+        "forcedescription": opts.getdescription,
+        "forceduration": opts.getduration,
+        "forcefilename": opts.getfilename,
+        "forceformat": opts.getformat,
+        "forceprint": getattr(opts, "forceprint", None) or (),
+        "force_write_download_archive": getattr(
+            opts, "force_write_download_archive", None),
+        "simulate": opts.simulate,
+        "skip_download": opts.skip_download,
+        "format": opts.format,
+        "allow_unplayable_formats": getattr(
+            opts, "allow_unplayable_formats", None),
+        "ignore_no_formats_error": getattr(
+            opts, "ignore_no_formats_error", None),
+        "format_sort": getattr(
+            opts, "format_sort", None),
+        "format_sort_force": getattr(
+            opts, "format_sort_force", None),
+        "allow_multiple_video_streams": opts.allow_multiple_video_streams,
+        "allow_multiple_audio_streams": opts.allow_multiple_audio_streams,
+        "check_formats": getattr(
+            opts, "check_formats", None),
+        "listformats": opts.listformats,
+        "listformats_table": getattr(
+            opts, "listformats_table", None),
+        "outtmpl": opts.outtmpl,
+        "outtmpl_na_placeholder": opts.outtmpl_na_placeholder,
+        "paths": getattr(opts, "paths", None),
+        "autonumber_size": opts.autonumber_size,
+        "autonumber_start": opts.autonumber_start,
+        "restrictfilenames": opts.restrictfilenames,
+        "windowsfilenames": getattr(opts, "windowsfilenames", None),
+        "ignoreerrors": opts.ignoreerrors,
+        "force_generic_extractor": opts.force_generic_extractor,
+        "ratelimit": opts.ratelimit,
+        "throttledratelimit": getattr(opts, "throttledratelimit", None),
+        "overwrites": getattr(opts, "overwrites", None),
+        "retries": opts.retries,
+        "fragment_retries": opts.fragment_retries,
+        "extractor_retries": getattr(opts, "extractor_retries", None),
+        "skip_unavailable_fragments": opts.skip_unavailable_fragments,
+        "keep_fragments": opts.keep_fragments,
+        "concurrent_fragment_downloads": getattr(
+            opts, "concurrent_fragment_downloads", None),
+        "buffersize": opts.buffersize,
+        "noresizebuffer": opts.noresizebuffer,
+        "http_chunk_size": opts.http_chunk_size,
+        "continuedl": opts.continue_dl,
+        "noprogress": True if opts.noprogress is None else opts.noprogress,
+        "playliststart": opts.playliststart,
+        "playlistend": opts.playlistend,
+        "playlistreverse": opts.playlist_reverse,
+        "playlistrandom": opts.playlist_random,
+        "noplaylist": opts.noplaylist,
+        "logtostderr": outtmpl_default == "-",
+        "consoletitle": opts.consoletitle,
+        "nopart": opts.nopart,
+        "updatetime": opts.updatetime,
+        "writedescription": opts.writedescription,
+        "writeannotations": opts.writeannotations,
+        "writeinfojson": opts.writeinfojson,
+        "allow_playlist_files": opts.allow_playlist_files,
+        "clean_infojson": opts.clean_infojson,
+        "getcomments": getattr(opts, "getcomments", None),
+        "writethumbnail": opts.writethumbnail,
+        "write_all_thumbnails": opts.write_all_thumbnails,
+        "writelink": getattr(opts, "writelink", None),
+        "writeurllink": getattr(opts, "writeurllink", None),
+        "writewebloclink": getattr(opts, "writewebloclink", None),
+        "writedesktoplink": getattr(opts, "writedesktoplink", None),
+        "writesubtitles": opts.writesubtitles,
+        "writeautomaticsub": opts.writeautomaticsub,
+        "allsubtitles": opts.allsubtitles,
+        "listsubtitles": opts.listsubtitles,
+        "subtitlesformat": opts.subtitlesformat,
+        "subtitleslangs": opts.subtitleslangs,
+        "matchtitle": module.decodeOption(opts.matchtitle),
+        "rejecttitle": module.decodeOption(opts.rejecttitle),
+        "max_downloads": opts.max_downloads,
+        "prefer_free_formats": opts.prefer_free_formats,
+        "trim_file_name": getattr(opts, "trim_file_name", None),
+        "verbose": opts.verbose,
+        "dump_intermediate_pages": opts.dump_intermediate_pages,
+        "write_pages": opts.write_pages,
+        "test": opts.test,
+        "keepvideo": opts.keepvideo,
+        "min_filesize": opts.min_filesize,
+        "max_filesize": opts.max_filesize,
+        "min_views": opts.min_views,
+        "max_views": opts.max_views,
+        "daterange": date,
+        "cachedir": opts.cachedir,
+        "youtube_print_sig_code": opts.youtube_print_sig_code,
+        "age_limit": opts.age_limit,
+        "download_archive": download_archive_fn,
+        "break_on_existing": getattr(opts, "break_on_existing", None),
+        "break_on_reject": getattr(opts, "break_on_reject", None),
+        "skip_playlist_after_errors": getattr(
+            opts, "skip_playlist_after_errors", None),
+        "cookiefile": opts.cookiefile,
+        "cookiesfrombrowser": getattr(opts, "cookiesfrombrowser", None),
+        "nocheckcertificate": opts.no_check_certificate,
+        "prefer_insecure": opts.prefer_insecure,
+        "proxy": opts.proxy,
+        "socket_timeout": opts.socket_timeout,
+        "bidi_workaround": opts.bidi_workaround,
+        "debug_printtraffic": opts.debug_printtraffic,
+        "prefer_ffmpeg": opts.prefer_ffmpeg,
+        "include_ads": opts.include_ads,
+        "default_search": opts.default_search,
+        "dynamic_mpd": getattr(opts, "dynamic_mpd", None),
+        "extractor_args": getattr(opts, "extractor_args", None),
+        "youtube_include_dash_manifest": getattr(
+            opts, "youtube_include_dash_manifest", None),
+        "youtube_include_hls_manifest": getattr(
+            opts, "youtube_include_hls_manifest", None),
+        "encoding": opts.encoding,
+        "extract_flat": opts.extract_flat,
+        "mark_watched": opts.mark_watched,
+        "merge_output_format": opts.merge_output_format,
+        "postprocessors": postprocessors,
+        "fixup": opts.fixup,
+        "source_address": opts.source_address,
+        "sleep_interval_requests": getattr(
+            opts, "sleep_interval_requests", None),
+        "sleep_interval": opts.sleep_interval,
+        "max_sleep_interval": opts.max_sleep_interval,
+        "sleep_interval_subtitles": getattr(
+            opts, "sleep_interval_subtitles", None),
+        "external_downloader": opts.external_downloader,
+        "playlist_items": opts.playlist_items,
+        "xattr_set_filesize": opts.xattr_set_filesize,
+        "match_filter": match_filter,
+        "no_color": opts.no_color,
+        "ffmpeg_location": opts.ffmpeg_location,
+        "hls_prefer_native": opts.hls_prefer_native,
+        "hls_use_mpegts": opts.hls_use_mpegts,
+        "hls_split_discontinuity": getattr(
+            opts, "hls_split_discontinuity", None),
+        "external_downloader_args": opts.external_downloader_args,
+        "postprocessor_args": opts.postprocessor_args,
+        "cn_verification_proxy": opts.cn_verification_proxy,
+        "geo_verification_proxy": opts.geo_verification_proxy,
+        "geo_bypass": opts.geo_bypass,
+        "geo_bypass_country": opts.geo_bypass_country,
+        "geo_bypass_ip_block": opts.geo_bypass_ip_block,
+        "compat_opts": compat_opts,
+    }
+
+
+def parse_retries(retries, name=""):
+    if retries in ("inf", "infinite"):
+        return float("inf")
+    return int(retries)
-- 
cgit v1.2.3