New upstream version 1.19.1.upstream/1.19.1

author: Unit 193 <unit193@unit193.net> 2021-11-01 05:03:49 -0400
committer: Unit 193 <unit193@unit193.net> 2021-11-01 05:03:49 -0400
commit: 4a965d875415907cc1a016b428ae305a964f9228 (patch)
tree: 7cece9948a7ba390348e00c669f9cb1f7a9ba39a /gallery_dl
parent: 34ba2951b8c523713425c98addb9256ea05c946f (diff)
19 files changed, 743 insertions, 227 deletions
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 7a49b61..923ed32 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -211,13 +211,18 @@ def _path():
     return os.path.join(cachedir, "cache.sqlite3")
 
 
-try:
-    dbfile = _path()
+def _init():
+    try:
+        dbfile = _path()
+
+        # restrict access permissions for new db files
+        os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
+
+        DatabaseCacheDecorator.db = sqlite3.connect(
+            dbfile, timeout=60, check_same_thread=False)
+    except (OSError, TypeError, sqlite3.OperationalError):
+        global cache
+        cache = memcache
 
-    # restrict access permissions for new db files
-    os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
 
-    DatabaseCacheDecorator.db = sqlite3.connect(
-        dbfile, timeout=60, check_same_thread=False)
-except (OSError, TypeError, sqlite3.OperationalError):
-    cache = memcache  # noqa: F811
+_init()
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 86e247b..f4d3e05 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -136,8 +136,9 @@ class YoutubeDLDownloader(DownloaderBase):
     def _progress_hook(self, info):
         if info["status"] == "downloading" and \
                 info["elapsed"] >= self.progress:
+            total = info.get("total_bytes") or info.get("total_bytes_estimate")
             self.out.progress(
-                info["total_bytes"],
+                None if total is None else int(total),
                 info["downloaded_bytes"],
                 int(info["speed"]),
             )
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index c512548..93702ab 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -95,6 +95,7 @@ modules = [
     "philomena",
     "photobucket",
     "photovogue",
+    "picarto",
     "piczel",
     "pillowfort",
     "pinterest",
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 2004921..d1b1b25 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -8,7 +8,6 @@
 
 from .common import Extractor, Message
 from .. import text
-import binascii
 
 
 class CyberdropAlbumExtractor(Extractor):
@@ -19,7 +18,7 @@ class CyberdropAlbumExtractor(Extractor):
     archive_fmt = "{album_id}_{id}"
     pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
     test = ("https://cyberdrop.me/a/keKRjm4t", {
-        "pattern": r"https://f\.cyberdrop\.cc/.*\.[a-z]+$",
+        "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.[a-z]+$",
         "keyword": {
             "album_id": "keKRjm4t",
             "album_name": "Fate (SFW)",
@@ -38,7 +37,14 @@ class CyberdropAlbumExtractor(Extractor):
     def items(self):
         url = self.root + "/a/" + self.album_id
         extr = text.extract_from(self.request(url).text)
-        extr("const albumData = {", "")
+
+        files = []
+        append = files.append
+        while True:
+            url = extr('downloadUrl: "', '"')
+            if not url:
+                break
+            append(text.unescape(url))
 
         data = {
             "album_id"   : self.album_id,
@@ -46,13 +52,11 @@ class CyberdropAlbumExtractor(Extractor):
             "date"       : text.parse_timestamp(extr("timestamp: ", ",")),
             "album_size" : text.parse_int(extr("totalSize: ", ",")),
             "description": extr("description: `", "`"),
+            "count"      : len(files),
         }
-        files = extr("fl: '", "'").split(",")
-        data["count"] = len(files)
 
         yield Message.Directory, data
-        for file_b64 in files:
-            file = binascii.a2b_base64(file_b64).decode()
-            text.nameext_from_url(file, data)
+        for url in files:
+            text.nameext_from_url(url, data)
             data["filename"], _, data["id"] = data["filename"].rpartition("-")
-            yield Message.Url, "https://f.cyberdrop.cc/" + file, data
+            yield Message.Url, url, data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 7dac770..4604d39 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -14,6 +14,7 @@ from ..cache import cache, memcache
 import collections
 import itertools
 import mimetypes
+import binascii
 import time
 import re
 
@@ -39,7 +40,6 @@ class DeviantartExtractor(Extractor):
         self.offset = 0
         self.flat = self.config("flat", True)
         self.extra = self.config("extra", False)
-        self.quality = self.config("quality", "100")
         self.original = self.config("original", True)
         self.comments = self.config("comments", False)
         self.user = match.group(1) or match.group(2)
@@ -53,9 +53,6 @@ class DeviantartExtractor(Extractor):
         else:
             self.unwatch = None
 
-        if self.quality:
-            self.quality = ",q_{}".format(self.quality)
-
         if self.original != "image":
             self._update_content = self._update_content_default
         else:
@@ -104,19 +101,8 @@ class DeviantartExtractor(Extractor):
 
                 if self.original and deviation["is_downloadable"]:
                     self._update_content(deviation, content)
-
-                if content["src"].startswith("https://images-wixmp-"):
-                    if deviation["index"] <= 790677560:
-                        # https://github.com/r888888888/danbooru/issues/4069
-                        intermediary, count = re.subn(
-                            r"(/f/[^/]+/[^/]+)/v\d+/.*",
-                            r"/intermediary\1", content["src"], 1)
-                        if count:
-                            deviation["_fallback"] = (content["src"],)
-                            content["src"] = intermediary
-                    if self.quality:
-                        content["src"] = re.sub(
-                            r",q_\d+", self.quality, content["src"], 1)
+                else:
+                    self._update_token(deviation, content)
 
                 yield self.commit(deviation, content)
 
@@ -302,6 +288,32 @@ class DeviantartExtractor(Extractor):
         if mtype and mtype.startswith("image/"):
             content.update(data)
 
+    def _update_token(self, deviation, content):
+        """Replace JWT to be able to remove width/height limits
+
+        All credit goes to @Ironchest337
+        for discovering and implementing this method
+        """
+        url, sep, _ = content["src"].partition("/v1/")
+        if not sep:
+            return
+
+        #  header = b'{"typ":"JWT","alg":"none"}'
+        payload = (
+            b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
+            url.partition("/f/")[2].encode() +
+            b'"}]],"aud":["urn:service:file.download"]}'
+        )
+
+        deviation["_fallback"] = (content["src"],)
+        content["src"] = (
+            "{}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{}.".format(
+                url,
+                #  base64 of 'header' is precomputed as 'eyJ0eX...'
+                #  binascii.a2b_base64(header).rstrip(b"=\n").decode(),
+                binascii.b2a_base64(payload).rstrip(b"=\n").decode())
+        )
+
     def _limited_request(self, url, **kwargs):
         """Limits HTTP requests to one every 2 seconds"""
         kwargs["fatal"] = None
@@ -746,29 +758,27 @@ class DeviantartPopularExtractor(DeviantartExtractor):
 
     def __init__(self, match):
         DeviantartExtractor.__init__(self, match)
-        self.search_term = self.time_range = self.category_path = None
         self.user = ""
 
         trange1, path, trange2, query = match.groups()
-        trange = trange1 or trange2
         query = text.parse_query(query)
-
-        if not trange:
-            trange = query.get("order")
-
-        if path:
-            self.category_path = path.strip("/")
-        if trange:
-            if trange.startswith("popular-"):
-                trange = trange[8:]
-            self.time_range = trange.replace("-", "").replace("hours", "hr")
-        if query:
-            self.search_term = query.get("q")
+        self.search_term = query.get("q")
+
+        trange = trange1 or trange2 or query.get("order", "")
+        if trange.startswith("popular-"):
+            trange = trange[8:]
+        self.time_range = {
+            "most-recent" : "now",
+            "this-week"   : "1week",
+            "this-month"  : "1month",
+            "this-century": "alltime",
+            "all-time"    : "alltime",
+        }.get(trange, "alltime")
 
         self.popular = {
             "search": self.search_term or "",
-            "range" : trange or "",
-            "path"  : self.category_path,
+            "range" : trange or "all-time",
+            "path"  : path.strip("/") if path else "",
         }
 
     def deviations(self):
@@ -851,12 +861,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
         }),
         # wixmp URL rewrite
         (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
-            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
-                        r"/intermediary/f/[^/]+/[^.]+\.jpg")
-        }),
-        # wixmp URL rewrite v2 (#369)
-        (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
-            "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f"
+                        r"/[^/]+/[^.]+\.jpg\?token="),
         }),
         # GIF (#242)
         (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 9516dfa..b5ecbd6 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -250,7 +250,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
         FuraffinityExtractor.__init__(self, match)
         self.query = text.parse_query(match.group(2))
         if self.user and "q" not in self.query:
-            self.query["q"] = text.unescape(self.user)
+            self.query["q"] = text.unquote(self.user)
 
     def metadata(self):
         return {"search": self.query.get("q")}
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 2757852..9b4d5ee 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -22,7 +22,13 @@ class GfycatExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.key = match.group(1).lower()
-        self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")
+
+        formats = self.config("format")
+        if formats is None:
+            formats = ("mp4", "webm", "mobile", "gif")
+        elif isinstance(formats, str):
+            formats = (formats, "mp4", "webm", "mobile", "gif")
+        self.formats = formats
 
     def items(self):
         metadata = self.metadata()
@@ -30,23 +36,25 @@ class GfycatExtractor(Extractor):
             if "gfyName" not in gfycat:
                 self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
                 continue
-            url = self._select_format(gfycat)
+            url = self._process(gfycat)
             gfycat.update(metadata)
-            gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
             yield Message.Directory, gfycat
             yield Message.Url, url, gfycat
 
-    def _select_format(self, gfyitem):
+    def _process(self, gfycat):
+        gfycat["_fallback"] = formats = self._formats(gfycat)
+        gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
+        return next(formats, "")
+
+    def _formats(self, gfycat):
         for fmt in self.formats:
             key = fmt + "Url"
-            if key in gfyitem:
-                url = gfyitem[key]
+            if key in gfycat:
+                url = gfycat[key]
                 if url.startswith("http:"):
                     url = "https" + url[4:]
-                gfyitem["extension"] = url.rpartition(".")[2]
-                return url
-        gfyitem["extension"] = ""
-        return ""
+                gfycat["extension"] = url.rpartition(".")[2]
+                yield url
 
     def metadata(self):
         return {}
@@ -146,8 +154,7 @@ class GfycatImageExtractor(GfycatExtractor):
             if "gfyName" not in gfycat:
                 self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
                 return
-            url = self._select_format(gfycat)
-            gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
+            url = self._process(gfycat)
             yield Message.Directory, gfycat
             yield Message.Url, url, gfycat
 
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index cbe0f43..3d09d79 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -135,33 +135,123 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
         return self.api.search(params)
 
 
+class InkbunnyPoolExtractor(InkbunnyExtractor):
+    """Extractor for inkbunny pools"""
+    subcategory = "pool"
+    pattern = (BASE_PATTERN + r"/(?:"
+               r"poolview_process\.php\?pool_id=(\d+)|"
+               r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
+    test = (
+        ("https://inkbunny.net/poolview_process.php?pool_id=28985", {
+            "count": 9,
+        }),
+        ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
+         "&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"),
+    )
+
+    def __init__(self, match):
+        InkbunnyExtractor.__init__(self, match)
+        pid = match.group(1)
+        if pid:
+            self.pool_id = pid
+            self.orderby = "pool_order"
+        else:
+            params = text.parse_query(match.group(2))
+            self.pool_id = params.get("pool_id")
+            self.orderby = params.get("orderby", "pool_order")
+
+    def posts(self):
+        params = {
+            "pool_id": self.pool_id,
+            "orderby": self.orderby,
+        }
+        return self.api.search(params)
+
+
 class InkbunnyFavoriteExtractor(InkbunnyExtractor):
     """Extractor for inkbunny user favorites"""
     subcategory = "favorite"
-    pattern = BASE_PATTERN + r"/userfavorites_process\.php\?favs_user_id=(\d+)"
+    pattern = (BASE_PATTERN + r"/(?:"
+               r"userfavorites_process\.php\?favs_user_id=(\d+)|"
+               r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
     test = (
         ("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
             "pattern": r"https://[\w.]+\.metapix\.net/files/full"
                        r"/\d+/\d+_\w+_.+",
             "range": "20-50",
         }),
+        ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
+         "&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"),
     )
 
     def __init__(self, match):
         InkbunnyExtractor.__init__(self, match)
-        self.user_id = match.group(1)
+        uid = match.group(1)
+        if uid:
+            self.user_id = uid
+            self.orderby = self.config("orderby", "fav_datetime")
+        else:
+            params = text.parse_query(match.group(2))
+            self.user_id = params.get("user_id")
+            self.orderby = params.get("orderby", "fav_datetime")
 
     def posts(self):
-        orderby = self.config("orderby", "fav_datetime")
         params = {
             "favs_user_id": self.user_id,
-            "orderby"     : orderby,
+            "orderby"     : self.orderby,
         }
-        if orderby and orderby.startswith("unread_"):
+        if self.orderby and self.orderby.startswith("unread_"):
             params["unread_submissions"] = "yes"
         return self.api.search(params)
 
 
+class InkbunnyFollowingExtractor(InkbunnyExtractor):
+    """Extractor for inkbunny user watches"""
+    subcategory = "following"
+    pattern = (BASE_PATTERN + r"/(?:"
+               r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
+               r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
+    test = (
+        (("https://inkbunny.net/watchlist_process.php"
+          "?mode=watching&user_id=20969"), {
+            "pattern": InkbunnyUserExtractor.pattern,
+            "count": ">= 90",
+        }),
+        ("https://inkbunny.net/usersviewall.php?rid=ffffffffff"
+         "&mode=watching&page=1&user_id=20969&orderby=added&namesonly="),
+    )
+
+    def __init__(self, match):
+        InkbunnyExtractor.__init__(self, match)
+        self.user_id = match.group(1) or \
+            text.parse_query(match.group(2)).get("user_id")
+
+    def items(self):
+        url = self.root + "/watchlist_process.php"
+        params = {"mode": "watching", "user_id": self.user_id}
+
+        with self.request(url, params=params) as response:
+            url, _, params = response.url.partition("?")
+            page = response.text
+
+        params = text.parse_query(params)
+        params["page"] = text.parse_int(params.get("page"), 1)
+        data = {"_extractor": InkbunnyUserExtractor}
+
+        while True:
+            cnt = 0
+            for user in text.extract_iter(
+                    page, '<a class="widget_userNameSmall" href="', '"',
+                    page.index('id="changethumboriginal_form"')):
+                cnt += 1
+                yield Message.Queue, self.root + user, data
+
+            if cnt < 20:
+                return
+            params["page"] += 1
+            page = self.request(url, params=params).text
+
+
 class InkbunnyPostExtractor(InkbunnyExtractor):
     """Extractor for individual Inkbunny posts"""
     subcategory = "post"
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index c5f5ae7..d5aad67 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,8 @@ from ..cache import cache
 import itertools
 import re
 
-BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
+BASE_PATTERN = r"(?:https?://)?kemono\.party"
+USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
 
 
 class KemonopartyExtractor(Extractor):
@@ -29,7 +30,9 @@ class KemonopartyExtractor(Extractor):
     def items(self):
         self._prepare_ddosguard_cookies()
 
-        find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+        find_inline = re.compile(
+            r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
+            r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
         skip_service = \
             "patreon" if self.config("patreon-skip-file", True) else None
 
@@ -101,7 +104,7 @@ class KemonopartyExtractor(Extractor):
 class KemonopartyUserExtractor(KemonopartyExtractor):
     """Extractor for all posts from a kemono.party user listing"""
     subcategory = "user"
-    pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
+    pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
     test = (
         ("https://kemono.party/fanbox/user/6993449", {
             "range": "1-25",
@@ -138,11 +141,11 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
 class KemonopartyPostExtractor(KemonopartyExtractor):
     """Extractor for a single kemono.party post"""
     subcategory = "post"
-    pattern = BASE_PATTERN + r"/post/([^/?#]+)"
+    pattern = USER_PATTERN + r"/post/([^/?#]+)"
     test = (
         ("https://kemono.party/fanbox/user/6993449/post/506575", {
-            "pattern": r"https://kemono\.party/data/files/fanbox"
-                       r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
+            "pattern": r"https://kemono.party/data/21/0f"
+                       r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
             "keyword": {
                 "added": "Wed, 06 May 2020 20:28:02 GMT",
                 "content": str,
@@ -197,10 +200,128 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
         return (posts[0],) if len(posts) > 1 else posts
 
 
+class KemonopartyDiscordExtractor(KemonopartyExtractor):
+    """Extractor for kemono.party discord servers"""
+    subcategory = "discord"
+    directory_fmt = ("{category}", "discord", "{server}",
+                     "{channel_name|channel}")
+    filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
+    archive_fmt = "discord_{server}_{id}_{num}"
+    pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
+    test = (
+        (("https://kemono.party/discord"
+          "/server/488668827274444803#finish-work"), {
+            "count": 4,
+            "keyword": {"channel_name": "finish-work"},
+        }),
+        (("https://kemono.party/discord"
+          "/server/256559665620451329/channel/462437519519383555#"), {
+            "pattern": r"https://kemono\.party/data/attachments/discord"
+                       r"/256559665620451329/\d+/\d+/.+",
+            "count": ">= 2",
+        }),
+        # 'inline' files
+        (("https://kemono.party/discord"
+          "/server/315262215055736843/channel/315262215055736843#general"), {
+            "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
+            "range": "1-5",
+            "options": (("image-filter", "type == 'inline'"),),
+        }),
+    )
+
+    def __init__(self, match):
+        KemonopartyExtractor.__init__(self, match)
+        self.server, self.channel, self.channel_name = match.groups()
+
+    def items(self):
+        self._prepare_ddosguard_cookies()
+
+        find_inline = re.compile(
+            r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
+            r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
+
+        posts = self.posts()
+        max_posts = self.config("max-posts")
+        if max_posts:
+            posts = itertools.islice(posts, max_posts)
+
+        for post in posts:
+            files = []
+            append = files.append
+            for attachment in post["attachments"]:
+                attachment["type"] = "attachment"
+                append(attachment)
+            for path in find_inline(post["content"] or ""):
+                append({"path": "https://cdn.discordapp.com" + path,
+                        "name": path, "type": "inline"})
+
+            post["channel_name"] = self.channel_name
+            post["date"] = text.parse_datetime(
+                post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+            yield Message.Directory, post
+
+            for post["num"], file in enumerate(files, 1):
+                post["type"] = file["type"]
+                url = file["path"]
+                if url[0] == "/":
+                    url = self.root + "/data" + url
+                elif url.startswith("https://kemono.party"):
+                    url = self.root + "/data" + url[20:]
+
+                text.nameext_from_url(file["name"], post)
+                yield Message.Url, url, post
+
+    def posts(self):
+        if self.channel is None:
+            url = "{}/api/discord/channels/lookup?q={}".format(
+                self.root, self.server)
+            for channel in self.request(url).json():
+                if channel["name"] == self.channel_name:
+                    self.channel = channel["id"]
+                    break
+            else:
+                raise exception.NotFoundError("channel")
+
+        url = "{}/api/discord/channel/{}".format(self.root, self.channel)
+        params = {"skip": 0}
+
+        while True:
+            posts = self.request(url, params=params).json()
+            yield from posts
+
+            if len(posts) < 25:
+                break
+            params["skip"] += 25
+
+
+class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
+    subcategory = "discord-server"
+    pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
+    test = ("https://kemono.party/discord/server/488668827274444803", {
+        "pattern": KemonopartyDiscordExtractor.pattern,
+        "count": 13,
+    })
+
+    def __init__(self, match):
+        KemonopartyExtractor.__init__(self, match)
+        self.server = match.group(1)
+
+    def items(self):
+        url = "{}/api/discord/channels/lookup?q={}".format(
+            self.root, self.server)
+        channels = self.request(url).json()
+
+        for channel in channels:
+            url = "{}/discord/server/{}/channel/{}#{}".format(
+                self.root, self.server, channel["id"], channel["name"])
+            channel["_extractor"] = KemonopartyDiscordExtractor
+            yield Message.Queue, url, channel
+
+
 class KemonopartyFavoriteExtractor(KemonopartyExtractor):
     """Extractor for kemono.party favorites"""
     subcategory = "favorite"
-    pattern = r"(?:https?://)?kemono\.party/favorites"
+    pattern = BASE_PATTERN + r"/favorites"
     test = ("https://kemono.party/favorites", {
         "pattern": KemonopartyUserExtractor.pattern,
         "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 634a92d..ff1d7c3 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -209,22 +209,15 @@ class MangadexAPI():
         return self._call("/manga/" + uuid)["data"]
 
     def manga_feed(self, uuid):
-        config = self.extractor.config
-        order = "desc" if config("chapter-reverse") else "asc"
+        order = "desc" if self.extractor.config("chapter-reverse") else "asc"
         params = {
-            "order[volume]"       : order,
-            "order[chapter]"      : order,
-            "translatedLanguage[]": config("lang"),
-            "contentRating[]"     : [
-                "safe", "suggestive", "erotica", "pornographic"],
+            "order[volume]" : order,
+            "order[chapter]": order,
         }
         return self._pagination("/manga/" + uuid + "/feed", params)
 
     def user_follows_manga_feed(self):
-        params = {
-            "order[publishAt]"    : "desc",
-            "translatedLanguage[]": self.extractor.config("lang"),
-        }
+        params = {"order[publishAt]": "desc"}
         return self._pagination("/user/follows/manga/feed", params)
 
     def authenticate(self):
@@ -275,8 +268,20 @@ class MangadexAPI():
     def _pagination(self, endpoint, params=None):
         if params is None:
             params = {}
+
+        config = self.extractor.config
+        ratings = config("ratings")
+        if ratings is None:
+            ratings = ("safe", "suggestive", "erotica", "pornographic")
+
+        params["contentRating[]"] = ratings
+        params["translatedLanguage[]"] = config("lang")
         params["offset"] = 0
 
+        api_params = config("api-parameters")
+        if api_params:
+            params.update(api_params)
+
         while True:
             data = self._call(endpoint, params)
             yield from data["data"]
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 20b716b..9df43e5 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -14,15 +14,10 @@ import collections
 import json
 
 
-class NhentaiBase():
-    """Base class for nhentai extractors"""
+class NhentaiGalleryExtractor(GalleryExtractor):
+    """Extractor for image galleries from nhentai.net"""
     category = "nhentai"
     root = "https://nhentai.net"
-    media_url = "https://i.nhentai.net"
-
-
-class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
-    """Extractor for image galleries from nhentai.net"""
     pattern = r"(?:https?://)?nhentai\.net/g/(\d+)"
     test = ("https://nhentai.net/g/147850/", {
         "url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
@@ -87,8 +82,8 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
         }
 
     def images(self, _):
-        ufmt = "{}/galleries/{}/{{}}.{{}}".format(
-            self.media_url, self.data["media_id"])
+        ufmt = ("https://i.nhentai.net/galleries/" +
+                self.data["media_id"] + "/{}.{}")
         extdict = {"j": "jpg", "p": "png", "g": "gif"}
 
         return [
@@ -99,28 +94,24 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
         ]
 
 
-class NhentaiSearchExtractor(NhentaiBase, Extractor):
-    """Extractor for nhentai search results"""
-    subcategory = "search"
-    pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"
-    test = ("https://nhentai.net/search/?q=touhou", {
-        "pattern": NhentaiGalleryExtractor.pattern,
-        "count": 30,
-        "range": "1-30",
-    })
+class NhentaiExtractor(Extractor):
+    """Base class for nhentai extractors"""
+    category = "nhentai"
+    root = "https://nhentai.net"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.params = text.parse_query(match.group(1))
+        self.path, self.query = match.groups()
 
     def items(self):
         data = {"_extractor": NhentaiGalleryExtractor}
-        for gallery_id in self._pagination(self.params):
+        for gallery_id in self._pagination():
             url = "{}/g/{}/".format(self.root, gallery_id)
             yield Message.Queue, url, data
 
-    def _pagination(self, params):
-        url = "{}/search/".format(self.root)
+    def _pagination(self):
+        url = self.root + self.path
+        params = text.parse_query(self.query)
         params["page"] = text.parse_int(params.get("page"), 1)
 
         while True:
@@ -131,29 +122,40 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor):
             params["page"] += 1
 
 
-class NhentaiFavoriteExtractor(NhentaiBase, Extractor):
+class NhentaiTagExtractor(NhentaiExtractor):
+    """Extractor for nhentai tag searches"""
+    subcategory = "tag"
+    pattern = (r"(?:https?://)?nhentai\.net("
+               r"/(?:artist|category|character|group|language|parody|tag)"
+               r"/[^/?#]+(?:/popular[^/?#]*)?/?)(?:\?([^#]+))?")
+    test = (
+        ("https://nhentai.net/tag/sole-female/", {
+            "pattern": NhentaiGalleryExtractor.pattern,
+            "count": 30,
+            "range": "1-30",
+        }),
+        ("https://nhentai.net/artist/itou-life/"),
+        ("https://nhentai.net/group/itou-life/"),
+        ("https://nhentai.net/parody/touhou-project/"),
+        ("https://nhentai.net/character/patchouli-knowledge/popular"),
+        ("https://nhentai.net/category/doujinshi/popular-today"),
+        ("https://nhentai.net/language/english/popular-week"),
+    )
+
+
+class NhentaiSearchExtractor(NhentaiExtractor):
+    """Extractor for nhentai search results"""
+    subcategory = "search"
+    pattern = r"(?:https?://)?nhentai\.net(/search/?)\?([^#]+)"
+    test = ("https://nhentai.net/search/?q=touhou", {
+        "pattern": NhentaiGalleryExtractor.pattern,
+        "count": 30,
+        "range": "1-30",
+    })
+
+
+class NhentaiFavoriteExtractor(NhentaiExtractor):
     """Extractor for nhentai favorites"""
     subcategory = "favorite"
-    pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?"
+    pattern = r"(?:https?://)?nhentai\.net(/favorites/?)(?:\?([^#]+))?"
     test = ("https://nhentai.net/favorites/",)
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.params = text.parse_query(match.group(1))
-
-    def items(self):
-        data = {"_extractor": NhentaiGalleryExtractor}
-        for gallery_id in self._pagination(self.params):
-            url = "{}/g/{}/".format(self.root, gallery_id)
-            yield Message.Queue, url, data
-
-    def _pagination(self, params):
-        url = "{}/favorites/".format(self.root)
-        params["page"] = text.parse_int(params.get("page"), 1)
-
-        while True:
-            page = self.request(url, params=params).text
-            yield from text.extract_iter(page, 'href="/g/', '/')
-            if 'class="next"' not in page:
-                return
-            params["page"] += 1
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 547465b..c7df089 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -32,22 +32,19 @@ class PatreonExtractor(Extractor):
             if "session_id" not in self.session.cookies:
                 self.log.warning("no 'session_id' cookie set")
             PatreonExtractor._warning = False
+        generators = self._build_file_generators(self.config("files"))
 
         for post in self.posts():
 
             if not post.get("current_user_can_view", True):
                 self.log.warning("Not allowed to view post %s", post["id"])
                 continue
+            yield Message.Directory, post
+
             post["num"] = 0
             hashes = set()
-
-            yield Message.Directory, post
-            for kind, url, name in itertools.chain(
-                self._images(post),
-                self._attachments(post),
-                self._postfile(post),
-                self._content(post),
-            ):
+            for kind, url, name in itertools.chain.from_iterable(
+                    g(post) for g in generators):
                 fhash = self._filehash(url)
                 if fhash not in hashes or not fhash:
                     hashes.add(fhash)
@@ -82,15 +79,14 @@ class PatreonExtractor(Extractor):
             if url:
                 yield "attachment", url, attachment["name"]
 
-    @staticmethod
-    def _content(post):
+    def _content(self, post):
         content = post.get("content")
         if content:
             for img in text.extract_iter(
                     content, '<img data-media-id="', '>'):
                 url = text.extract(img, 'src="', '"')[0]
                 if url:
-                    yield "content", url, url
+                    yield "content", url, self._filename(url) or url
 
     def posts(self):
         """Return all relevant post objects"""
@@ -155,7 +151,7 @@ class PatreonExtractor(Extractor):
                 included[file["type"]][file["id"]]
                 for file in files["data"]
             ]
-        return []
+        return ()
 
     @memcache(keyarg=1)
     def _user(self, url):
@@ -212,6 +208,20 @@ class PatreonExtractor(Extractor):
             "&json-api-version=1.0"
         )
 
+    def _build_file_generators(self, filetypes):
+        if filetypes is None:
+            return (self._images, self._attachments,
+                    self._postfile, self._content)
+        genmap = {
+            "images"     : self._images,
+            "attachments": self._attachments,
+            "postfile"   : self._postfile,
+            "content"    : self._content,
+        }
+        if isinstance(filetypes, str):
+            filetypes = filetypes.split(",")
+        return [genmap[ft] for ft in filetypes]
+
 
 class PatreonCreatorExtractor(PatreonExtractor):
     """Extractor for a creator's works"""
@@ -305,8 +315,9 @@ class PatreonPostExtractor(PatreonExtractor):
             "count": 4,
         }),
         # postfile + content
-        ("https://www.patreon.com/posts/19987002", {
-            "count": 4,
+        ("https://www.patreon.com/posts/56127163", {
+            "count": 3,
+            "keyword": {"filename": r"re:^(?!1).+$"},
         }),
         # tags (#1539)
         ("https://www.patreon.com/posts/free-post-12497641", {
diff --git a/gallery_dl/extractor/picarto.py b/gallery_dl/extractor/picarto.py
new file mode 100644
index 0000000..77a07b4
--- /dev/null
+++ b/gallery_dl/extractor/picarto.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://picarto.tv/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class PicartoGalleryExtractor(Extractor):
+    """Extractor for picarto galleries"""
+    category = "picarto"
+    subcategory = "gallery"
+    root = "https://picarto.tv"
+    directory_fmt = ("{category}", "{channel[name]}")
+    filename_fmt = "{id} {title}.{extension}"
+    archive_fmt = "{id}"
+    pattern = r"(?:https?://)?picarto\.tv/([^/?#]+)/gallery"
+    test = ("https://picarto.tv/fnook/gallery/default/", {
+        "pattern": r"https://images\.picarto\.tv/gallery/\d/\d\d/\d+/artwork"
+                   r"/[0-9a-f-]+/large-[0-9a-f]+\.(jpg|png|gif)",
+        "count": ">= 7",
+        "keyword": {"date": "type:datetime"},
+    })
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.username = match.group(1)
+
+    def items(self):
+        for post in self.posts():
+            post["date"] = text.parse_datetime(
+                post["created_at"], "%Y-%m-%d %H:%M:%S")
+            variations = post.pop("variations", ())
+            yield Message.Directory, post
+
+            image = post["default_image"]
+            if not image:
+                continue
+            url = "https://images.picarto.tv/gallery/" + image["name"]
+            text.nameext_from_url(url, post)
+            yield Message.Url, url, post
+
+            for variation in variations:
+                post.update(variation)
+                image = post["default_image"]
+                url = "https://images.picarto.tv/gallery/" + image["name"]
+                text.nameext_from_url(url, post)
+                yield Message.Url, url, post
+
+    def posts(self):
+        url = "https://ptvintern.picarto.tv/api/channel-gallery"
+        params = {
+            "first": "30",
+            "page": 1,
+            "filter_params[album_id]": "",
+            "filter_params[channel_name]": self.username,
+            "filter_params[q]": "",
+            "filter_params[visibility]": "",
+            "order_by[field]": "published_at",
+            "order_by[order]": "DESC",
+        }
+
+        while True:
+            posts = self.request(url, params=params).json()
+            if not posts:
+                return
+            yield from posts
+            params["page"] += 1
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index e21a82c..8e47e2e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -558,6 +558,68 @@ class PixivPixivisionExtractor(PixivExtractor):
         }
 
 
+class PixivSketchExtractor(Extractor):
+    """Extractor for user pages on sketch.pixiv.net"""
+    category = "pixiv"
+    subcategory = "sketch"
+    directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
+    filename_fmt = "{post_id} {id}.{extension}"
+    archive_fmt = "S{user[id]}_{id}"
+    root = "https://sketch.pixiv.net"
+    cookiedomain = ".pixiv.net"
+    pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
+    test = ("https://sketch.pixiv.net/@nicoby", {
+        "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium"
+                   r"/file/\d+/\d+\.(jpg|png)",
+        "count": ">= 35",
+    })
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.username = match.group(1)
+
+    def items(self):
+        headers = {"Referer": "{}/@{}".format(self.root, self.username)}
+
+        for post in self.posts():
+            media = post["media"]
+            post["post_id"] = post["id"]
+            post["date"] = text.parse_datetime(
+                post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+            util.delete_items(post, ("id", "media", "_links"))
+
+            yield Message.Directory, post
+            post["_http_headers"] = headers
+
+            for photo in media:
+                original = photo["photo"]["original"]
+                post["id"] = photo["id"]
+                post["width"] = original["width"]
+                post["height"] = original["height"]
+
+                url = original["url"]
+                text.nameext_from_url(url, post)
+                yield Message.Url, url, post
+
+    def posts(self):
+        url = "{}/api/walls/@{}/posts/public.json".format(
+            self.root, self.username)
+        headers = {
+            "Accept": "application/vnd.sketch-v4+json",
+            "X-Requested-With": "{}/@{}".format(self.root, self.username),
+            "Referer": self.root + "/",
+        }
+
+        while True:
+            data = self.request(url, headers=headers).json()
+            yield from data["data"]["items"]
+
+            next_url = data["_links"].get("next")
+            if not next_url:
+                return
+            url = self.root + next_url["href"]
+
+
 class PixivAppAPI():
     """Minimal interface for the Pixiv App API for mobile devices
 
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
index 28e049b..a2a24e0 100644
--- a/gallery_dl/extractor/seisoparty.py
+++ b/gallery_dl/extractor/seisoparty.py
@@ -9,7 +9,8 @@
 """Extractors for https://seiso.party/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
 import re
 
 
@@ -52,6 +53,25 @@ class SeisopartyExtractor(Extractor):
             "files"   : self._find_files(page),
         }
 
+    def login(self):
+        username, password = self._get_auth_info()
+        if username:
+            self._update_cookies(self._login_impl(username, password))
+
+    @cache(maxage=28*24*3600, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        url = self.root + "/account/login"
+        data = {"username": username, "password": password}
+
+        response = self.request(url, method="POST", data=data)
+        if response.url.endswith("/account/login") and \
+                "Username or password is incorrect" in response.text:
+            raise exception.AuthenticationError()
+
+        return {c.name: c.value for c in response.history[0].cookies}
+
 
 class SeisopartyUserExtractor(SeisopartyExtractor):
     """Extractor for all posts from a seiso.party user listing"""
@@ -136,3 +156,46 @@ class SeisopartyPostExtractor(SeisopartyExtractor):
         url = "{}/post/{}/{}/{}".format(
             self.root, self.service, self.user_id, self.post_id)
         return (self._parse_post(self.request(url).text, self.post_id),)
+
+
+class SeisopartyFavoriteExtractor(SeisopartyExtractor):
+    """Extractor for seiso.party favorites"""
+    subcategory = "favorite"
+    pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?"
+    test = (
+        ("https://seiso.party/favorites/artists", {
+            "pattern": SeisopartyUserExtractor.pattern,
+            "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683",
+            "count": 3,
+        }),
+        ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", {
+            "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3",
+        }),
+    )
+
+    def __init__(self, match):
+        SeisopartyExtractor.__init__(self, match)
+        self.query = match.group(1)
+
+    def items(self):
+        self._prepare_ddosguard_cookies()
+        self.login()
+
+        url = self.root + "/favorites/artists"
+        data = {"_extractor": SeisopartyUserExtractor}
+        params = text.parse_query(self.query)
+        params["page"] = text.parse_int(params.get("page"), 1)
+
+        while True:
+            page = self.request(url, params=params).text
+
+            cnt = 0
+            for card in text.extract_iter(
+                    page, '<div class="artist-card', '</a>'):
+                path = text.extract(card, '<a href="', '"')[0]
+                yield Message.Queue, self.root + path, data
+                cnt += 1
+
+            if cnt < 25:
+                return
+            params["page"] += 1
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4a3f6cd..568ee2e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,6 +41,16 @@ class TwitterExtractor(Extractor):
         self.cards = self.config("cards", False)
         self._user_cache = {}
 
+        size = self.config("size")
+        if size is None:
+            self._size_image = "orig"
+            self._size_fallback = ("large", "medium", "small")
+        else:
+            if isinstance(size, str):
+                size = size.split(",")
+            self._size_image = size[0]
+            self._size_fallback = size[1:]
+
     def items(self):
         self.login()
         metadata = self.metadata()
@@ -115,7 +125,7 @@ class TwitterExtractor(Extractor):
                 base, _, fmt = url.rpartition(".")
                 base += "?format=" + fmt + "&name="
                 files.append(text.nameext_from_url(url, {
-                    "url"      : base + "orig",
+                    "url"      : base + self._size_image,
                     "width"    : width,
                     "height"   : height,
                     "_fallback": self._image_fallback(base),
@@ -123,11 +133,9 @@ class TwitterExtractor(Extractor):
             else:
                 files.append({"url": media["media_url"]})
 
-    @staticmethod
-    def _image_fallback(base):
-        yield base + "large"
-        yield base + "medium"
-        yield base + "small"
+    def _image_fallback(self, base):
+        for fmt in self._size_fallback:
+            yield base + fmt
 
     def _extract_card(self, tweet, files):
         card = tweet["card"]
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9dd2d47..9724c4b 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -12,18 +12,67 @@ from .common import Extractor, Message
 from .. import text
 import re
 
+BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
 
-class VkPhotosExtractor(Extractor):
-    """Extractor for photos from a vk user"""
+
+class VkExtractor(Extractor):
+    """Base class for vk extractors"""
     category = "vk"
-    subcategory = "photos"
     directory_fmt = ("{category}", "{user[name]|user[id]}")
     filename_fmt = "{id}.{extension}"
     archive_fmt = "{id}"
     root = "https://vk.com"
     request_interval = 1.0
-    pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:"
-               r"(?:albums|photos|id)(-?\d+)|([^/?#]+))")
+
+    def items(self):
+        data = self.metadata()
+        yield Message.Directory, data
+        for photo in self.photos():
+            photo.update(data)
+            yield Message.Url, photo["url"], photo
+
+    def _pagination(self, photos_url, user_id):
+        sub = re.compile(r"/imp[fg]/").sub
+        needle = 'data-id="{}_'.format(user_id)
+        cnt = 0
+
+        headers = {
+            "X-Requested-With": "XMLHttpRequest",
+            "Origin"          : self.root,
+            "Referer"         : photos_url,
+        }
+        params = {
+            "al"    : "1",
+            "al_ad" : "0",
+            "offset": 0,
+            "part"  : "1",
+        }
+
+        while True:
+            payload = self.request(
+                photos_url, method="POST", headers=headers, data=params
+            ).json()["payload"][1]
+
+            offset = payload[0]
+            html = payload[1]
+
+            for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
+                pid = photo[:photo.find('"')]
+                url = photo[photo.rindex("(")+1:]
+                url = sub("/", url.partition("?")[0])
+                yield text.nameext_from_url(url, {"url": url, "id": pid})
+
+            if cnt <= 20 or offset == params["offset"]:
+                return
+            params["offset"] = offset
+
+
+class VkPhotosExtractor(VkExtractor):
+    """Extractor for photos from a vk user"""
+    subcategory = "photos"
+    pattern = (BASE_PATTERN + r"/(?:"
+               r"(?:albums|photos|id)(-?\d+)"
+               r"|(?!album-?\d+_)([^/?#]+))")
     test = (
         ("https://vk.com/id398982326", {
             "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
@@ -58,10 +107,14 @@ class VkPhotosExtractor(Extractor):
     )
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
+        VkExtractor.__init__(self, match)
         self.user_id, self.user_name = match.groups()
 
-    def items(self):
+    def photos(self):
+        url = "{}/photos{}".format(self.root, self.user_id)
+        return self._pagination(url, self.user_id)
+
+    def metadata(self):
         if self.user_id:
             user_id = self.user_id
             prefix = "public" if user_id[0] == "-" else "id"
@@ -70,40 +123,8 @@ class VkPhotosExtractor(Extractor):
         else:
             url = "{}/{}".format(self.root, self.user_name)
             data = self._extract_profile(url)
-            user_id = data["user"]["id"]
-
-        photos_url = "{}/photos{}".format(self.root, user_id)
-        headers = {
-            "X-Requested-With": "XMLHttpRequest",
-            "Origin"          : self.root,
-            "Referer"         : photos_url,
-        }
-        params = {
-            "al"    : "1",
-            "al_ad" : "0",
-            "offset": 0,
-            "part"  : "1",
-        }
-
-        yield Message.Directory, data
-        sub = re.compile(r"/imp[fg]/").sub
-        needle = 'data-id="{}_'.format(user_id)
-        cnt = 0
-
-        while True:
-            offset, html = self.request(
-                photos_url, method="POST", headers=headers, data=params
-            ).json()["payload"][1]
-
-            for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
-                data["id"] = photo[:photo.find('"')]
-                url = photo[photo.rindex("(")+1:]
-                url = sub("/", url.partition("?")[0])
-                yield Message.Url, url, text.nameext_from_url(url, data)
-
-            if cnt <= 40 or offset == params["offset"]:
-                return
-            params["offset"] = offset
+            self.user_id = data["user"]["id"]
+        return data
 
     def _extract_profile(self, url):
         extr = text.extract_from(self.request(url).text)
@@ -116,3 +137,32 @@ class VkPhotosExtractor(Extractor):
                 '<span class="current_text">', '</span'))),
             "id"  : extr('<a href="/albums', '"'),
         }}
+
+
+class VkAlbumExtractor(VkExtractor):
+    """Extractor for a vk album"""
+    subcategory = "album"
+    directory_fmt = ("{category}", "{user[id]}", "{album[id]}")
+    pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
+    test = (
+        ("https://vk.com/album221469416_0", {
+            "count": 3,
+        }),
+        ("https://vk.com/album-165740836_281339889", {
+            "count": 12,
+        }),
+    )
+
+    def __init__(self, match):
+        VkExtractor.__init__(self, match)
+        self.user_id, self.album_id = match.groups()
+
+    def photos(self):
+        url = "{}/album{}_{}".format(self.root, self.user_id, self.album_id)
+        return self._pagination(url, self.user_id)
+
+    def metadata(self):
+        return {
+            "user": {"id": self.user_id},
+            "album": {"id": self.album_id},
+        }
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index a08cdc4..b3b94f7 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -20,36 +20,32 @@ class ComparePP(PostProcessor):
         PostProcessor.__init__(self, job)
         if options.get("shallow"):
             self._compare = self._compare_size
+        self._equal_exc = self._equal_cnt = 0
 
-        action = options.get("action")
-        if action == "enumerate":
-            job.register_hooks({"file": self.enumerate}, options)
-        else:
-            job.register_hooks({"file": self.compare}, options)
-            action, _, smax = action.partition(":")
-            self._skipmax = text.parse_int(smax)
-            self._skipexc = self._skipcnt = 0
-            if action == "abort":
-                self._skipexc = exception.StopExtraction
-            elif action == "terminate":
-                self._skipexc = exception.TerminateExtraction
-            elif action == "exit":
-                self._skipexc = sys.exit
-
-    def compare(self, pathfmt):
+        equal = options.get("equal")
+        if equal:
+            equal, _, emax = equal.partition(":")
+            self._equal_max = text.parse_int(emax)
+            if equal == "abort":
+                self._equal_exc = exception.StopExtraction
+            elif equal == "terminate":
+                self._equal_exc = exception.TerminateExtraction
+            elif equal == "exit":
+                self._equal_exc = sys.exit
+
+        job.register_hooks({"file": (
+            self.enumerate
+            if options.get("action") == "enumerate" else
+            self.replace
+        )}, options)
+
+    def replace(self, pathfmt):
         try:
             if self._compare(pathfmt.realpath, pathfmt.temppath):
-                if self._skipexc:
-                    self._skipcnt += 1
-                    if self._skipcnt >= self._skipmax:
-                        util.remove_file(pathfmt.temppath)
-                        print()
-                        raise self._skipexc()
-                pathfmt.delete = True
-            else:
-                self._skipcnt = 0
+                return self._equal(pathfmt)
         except OSError:
             pass
+        self._equal_cnt = 0
 
     def enumerate(self, pathfmt):
         num = 1
@@ -58,9 +54,10 @@ class ComparePP(PostProcessor):
                 pathfmt.prefix = str(num) + "."
                 pathfmt.set_extension(pathfmt.extension, False)
                 num += 1
-            pathfmt.delete = True
+            return self._equal(pathfmt)
         except OSError:
             pass
+        self._equal_cnt = 0
 
     def _compare(self, f1, f2):
         return self._compare_size(f1, f2) and self._compare_content(f1, f2)
@@ -81,5 +78,14 @@ class ComparePP(PostProcessor):
                 if not buf1:
                     return True
 
+    def _equal(self, pathfmt):
+        if self._equal_exc:
+            self._equal_cnt += 1
+            if self._equal_cnt >= self._equal_max:
+                util.remove_file(pathfmt.temppath)
+                print()
+                raise self._equal_exc()
+        pathfmt.delete = True
+
 
 __postprocessor__ = ComparePP
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index acc3b8d..ee01549 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.19.0"
+__version__ = "1.19.1"
author	Unit 193 <unit193@unit193.net>	2021-11-01 05:03:49 -0400
committer	Unit 193 <unit193@unit193.net>	2021-11-01 05:03:49 -0400
commit	4a965d875415907cc1a016b428ae305a964f9228 (patch)
tree	7cece9948a7ba390348e00c669f9cb1f7a9ba39a /gallery_dl
parent	34ba2951b8c523713425c98addb9256ea05c946f (diff)