New upstream version 1.26.7.upstream/1.26.7

author: Unit 193 <unit193@unit193.net> 2024-01-23 23:35:00 -0500
committer: Unit 193 <unit193@unit193.net> 2024-01-23 23:35:00 -0500
commit: 12e23f1195164dcb740d6d4a4287e762c9e5e534 (patch)
tree: e6b13483475c510ea2f685c21363271f23745c56 /gallery_dl/extractor
parent: e949aaf6f6ac93896947d5b736e48e7911926efb (diff)
33 files changed, 903 insertions, 273 deletions
diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py
new file mode 100644
index 0000000..dbbf21b
--- /dev/null
+++ b/gallery_dl/extractor/2ch.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://2ch.hk/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class _2chThreadExtractor(Extractor):
+    """Extractor for 2ch threads"""
+    category = "2ch"
+    subcategory = "thread"
+    root = "https://2ch.hk"
+    directory_fmt = ("{category}", "{board}", "{thread} {title}")
+    filename_fmt = "{tim}{filename:? //}.{extension}"
+    archive_fmt = "{board}_{thread}_{tim}"
+    pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
+    example = "https://2ch.hk/a/res/12345.html"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.board, self.thread = match.groups()
+
+    def items(self):
+        url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
+        posts = self.request(url).json()["threads"][0]["posts"]
+
+        op = posts[0]
+        title = op.get("subject") or text.remove_html(op["comment"])
+
+        thread = {
+            "board" : self.board,
+            "thread": self.thread,
+            "title" : text.unescape(title)[:50],
+        }
+
+        yield Message.Directory, thread
+        for post in posts:
+            files = post.get("files")
+            if files:
+                post["post_name"] = post["name"]
+                post["date"] = text.parse_timestamp(post["timestamp"])
+                del post["files"]
+                del post["name"]
+
+                for file in files:
+                    file.update(thread)
+                    file.update(post)
+
+                    file["filename"] = file["fullname"].rpartition(".")[0]
+                    file["tim"], _, file["extension"] = \
+                        file["name"].rpartition(".")
+
+                    yield Message.Url, self.root + file["path"], file
+
+
+class _2chBoardExtractor(Extractor):
+    """Extractor for 2ch boards"""
+    category = "2ch"
+    subcategory = "board"
+    root = "https://2ch.hk"
+    pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
+    example = "https://2ch.hk/a/"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.board = match.group(1)
+
+    def items(self):
+        # index page
+        url = "{}/{}/index.json".format(self.root, self.board)
+        index = self.request(url).json()
+        index["_extractor"] = _2chThreadExtractor
+        for thread in index["threads"]:
+            url = "{}/{}/res/{}.html".format(
+                self.root, self.board, thread["thread_num"])
+            yield Message.Queue, url, index
+
+        # pages 1..n
+        for n in util.advance(index["pages"], 1):
+            url = "{}/{}/{}.json".format(self.root, self.board, n)
+            page = self.request(url).json()
+            page["_extractor"] = _2chThreadExtractor
+            for thread in page["threads"]:
+                url = "{}/{}/res/{}.html".format(
+                    self.root, self.board, thread["thread_num"])
+                yield Message.Queue, url, page
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 9e33f2c..d624736 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -10,6 +10,7 @@ import sys
 import re
 
 modules = [
+    "2ch",
     "2chan",
     "2chen",
     "35photo",
@@ -53,7 +54,7 @@ modules = [
     "gelbooru_v01",
     "gelbooru_v02",
     "gofile",
-    "hbrowse",
+    "hatenablog",
     "hentai2read",
     "hentaicosplays",
     "hentaifoundry",
@@ -145,6 +146,7 @@ modules = [
     "smugmug",
     "soundgasm",
     "speakerdeck",
+    "steamgriddb",
     "subscribestar",
     "szurubooru",
     "tapas",
@@ -175,6 +177,7 @@ modules = [
     "weibo",
     "wikiart",
     "wikifeet",
+    "wikimedia",
     "xhamster",
     "xvideos",
     "zerochan",
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index cd6302e..e82cd09 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -10,8 +10,11 @@ from .common import Extractor, ChapterExtractor, MangaExtractor
 from .. import text, exception
 import re
 
-BASE_PATTERN = (r"(?:https?://)?"
-                r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)")
+BASE_PATTERN = (r"(?:https?://)?(?:"
+                r"(?:ba|d|h|m|w)to\.to|"
+                r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|"
+                r"comiko\.(?:net|org)|"
+                r"bat(?:otoo|o?two)\.com)")
 
 
 class BatotoBase():
@@ -38,7 +41,8 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
     def metadata(self, page):
         extr = text.extract_from(page)
         manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
-        manga_id = extr("/title/", "/")
+        manga_id = text.extr(
+            extr('rel="canonical" href="', '"'), "/title/", "/")
 
         match = re.match(
             r"(?:Volume\s+(\d+) )?"
@@ -76,12 +80,13 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
     """Extractor for bato.to manga"""
     reverse = False
     chapterclass = BatotoChapterExtractor
-    pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$"
+    pattern = (BASE_PATTERN +
+               r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
     example = "https://bato.to/title/12345-MANGA/"
 
     def __init__(self, match):
         self.root = text.root_from_url(match.group(0))
-        self.manga_id = match.group(1)
+        self.manga_id = match.group(1) or match.group(2)
         url = "{}/title/{}".format(self.root, self.manga_id)
         MangaExtractor.__init__(self, match, url)
 
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 58ae59d..402408e 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -37,7 +37,7 @@ class BloggerExtractor(BaseExtractor):
         findall_image = re.compile(
             r'src="(https?://(?:'
             r'blogger\.googleusercontent\.com/img|'
-            r'lh\d+\.googleusercontent\.com/|'
+            r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
             r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
         findall_video = re.compile(
             r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 26123b8..e7fc14b 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,13 +6,13 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://bunkrr.su/"""
+"""Extractors for https://bunkrr.ru/"""
 
 from .lolisafe import LolisafeAlbumExtractor
 from .. import text
 from urllib.parse import urlsplit, urlunsplit
 
-BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)"
+BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:[rs]u|la|is|to)"
 
 MEDIA_DOMAIN_OVERRIDES = {
     "cdn9.bunkr.ru" : "c9.bunkr.ru",
@@ -27,11 +27,11 @@ CDN_HOSTED_EXTENSIONS = (
 
 
 class BunkrAlbumExtractor(LolisafeAlbumExtractor):
-    """Extractor for bunkrr.su albums"""
+    """Extractor for bunkrr.ru albums"""
     category = "bunkr"
-    root = "https://bunkrr.su"
+    root = "https://bunkrr.ru"
     pattern = BASE_PATTERN + r"/a/([^/?#]+)"
-    example = "https://bunkrr.su/a/ID"
+    example = "https://bunkrr.ru/a/ID"
 
     def fetch_album(self, album_id):
         # album metadata
@@ -84,11 +84,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
 
 
 class BunkrMediaExtractor(BunkrAlbumExtractor):
-    """Extractor for bunkrr.su media links"""
+    """Extractor for bunkrr.ru media links"""
     subcategory = "media"
     directory_fmt = ("{category}",)
     pattern = BASE_PATTERN + r"/[vid]/([^/?#]+)"
-    example = "https://bunkrr.su/v/FILENAME"
+    example = "https://bunkrr.ru/v/FILENAME"
 
     def fetch_album(self, album_id):
         try:
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 2bf200b..ef5a44c 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -38,10 +38,6 @@ BASE_PATTERN = CheveretoExtractor.update({
         "root": "https://jpg4.su",
         "pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
     },
-    "pixl": {
-        "root": "https://pixl.li",
-        "pattern": r"pixl\.(?:li|is)",
-    },
     "imgkiwi": {
         "root": "https://img.kiwi",
         "pattern": r"img\.kiwi",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 0dd05ef..cf0f8c9 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -102,6 +102,9 @@ class Extractor():
     def config_accumulate(self, key):
         return config.accumulate(self._cfgpath, key)
 
+    def config_instance(self, key, default=None):
+        return default
+
     def _config_shared(self, key, default=None):
         return config.interpolate_common(
             ("extractor",), self._cfgpath, key, default)
@@ -735,9 +738,10 @@ class BaseExtractor(Extractor):
         for index, group in enumerate(match.groups()):
             if group is not None:
                 if index:
-                    self.category, self.root = self.instances[index-1]
+                    self.category, self.root, info = self.instances[index-1]
                     if not self.root:
                         self.root = text.root_from_url(match.group(0))
+                    self.config_instance = info.get
                 else:
                     self.root = group
                     self.category = group.partition("://")[2]
@@ -757,7 +761,7 @@ class BaseExtractor(Extractor):
             root = info["root"]
             if root:
                 root = root.rstrip("/")
-            instance_list.append((category, root))
+            instance_list.append((category, root, info))
 
             pattern = info.get("pattern")
             if not pattern:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 4b5f1d7..bcfbe73 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -38,7 +38,7 @@ class DeviantartExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.user = (match.group(1) or match.group(2)).lower()
+        self.user = (match.group(1) or match.group(2) or "").lower()
         self.offset = 0
 
     def _init(self):
@@ -452,9 +452,11 @@ class DeviantartExtractor(Extractor):
             return None
 
         dev = self.api.deviation(deviation["deviationid"], False)
-        folder = dev["premium_folder_data"]
+        folder = deviation["premium_folder_data"]
         username = dev["author"]["username"]
-        has_access = folder["has_access"]
+
+        # premium_folder_data is no longer present when user has access (#5063)
+        has_access = ("premium_folder_data" not in dev) or folder["has_access"]
 
         if not has_access and folder["type"] == "watchers" and \
                 self.config("auto-watch"):
@@ -547,22 +549,45 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
     example = "https://www.deviantart.com/USER/avatar/"
 
     def deviations(self):
-        profile = self.api.user_profile(self.user.lower())
-        if profile:
-            url = profile["user"]["usericon"]
-            return ({
-                "author"         : profile["user"],
-                "category"       : "avatar",
-                "index"          : text.parse_int(url.rpartition("?")[2]),
-                "is_deleted"     : False,
-                "is_downloadable": False,
-                "published_time" : 0,
-                "title"          : "avatar",
-                "content"        : {
-                    "src": url.replace("/avatars/", "/avatars-big/", 1),
-                },
-            },)
-        return ()
+        name = self.user.lower()
+        profile = self.api.user_profile(name)
+        if not profile:
+            return ()
+
+        user = profile["user"]
+        icon = user["usericon"]
+        index = icon.rpartition("?")[2]
+
+        formats = self.config("formats")
+        if not formats:
+            url = icon.replace("/avatars/", "/avatars-big/", 1)
+            return (self._make_deviation(url, user, index, ""),)
+
+        if isinstance(formats, str):
+            formats = formats.replace(" ", "").split(",")
+
+        results = []
+        for fmt in formats:
+            fmt, _, ext = fmt.rpartition(".")
+            if fmt:
+                fmt = "-" + fmt
+            url = "https://a.deviantart.net/avatars{}/{}/{}/{}.{}?{}".format(
+                fmt, name[0], name[1], name, ext, index)
+            results.append(self._make_deviation(url, user, index, fmt))
+        return results
+
+    def _make_deviation(self, url, user, index, fmt):
+        return {
+            "author"         : user,
+            "category"       : "avatar",
+            "index"          : text.parse_int(index),
+            "is_deleted"     : False,
+            "is_downloadable": False,
+            "published_time" : 0,
+            "title"          : "avatar" + fmt,
+            "stats"          : {"comments": 0},
+            "content"        : {"src": url},
+        }
 
 
 class DeviantartBackgroundExtractor(DeviantartExtractor):
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 6a0e069..8c9da2f 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -44,24 +44,26 @@ class EromeExtractor(Extractor):
             pos = page.index('<div class="user-profile', pos)
             user, pos = text.extract(
                 page, 'href="https://www.erome.com/', '"', pos)
-            count, pos = text.extract(
-                page, 'fa-camera"></i>', '</span>', pos)
+
+            urls = []
+            groups = page.split('<div class="media-group"')
+            for group in util.advance(groups, 1):
+                url = (text.extr(group, '<source src="', '"') or
+                       text.extr(group, 'data-src="', '"'))
+                if url:
+                    urls.append(url)
 
             data = {
                 "album_id"     : album_id,
                 "title"        : text.unescape(title),
                 "user"         : text.unquote(user),
+                "count"        : len(urls),
                 "_http_headers": {"Referer": url},
-                "count"        : text.parse_int(count),
             }
 
             yield Message.Directory, data
-            groups = page.split('<div class="media-group"')
-            for data["num"], group in enumerate(util.advance(groups, 1), 1):
-                url = (text.extr(group, '<source src="', '"') or
-                       text.extr(group, 'data-src="', '"'))
-                if url:
-                    yield Message.Url, url, text.nameext_from_url(url, data)
+            for data["num"], url in enumerate(urls, 1):
+                yield Message.Url, url, text.nameext_from_url(url, data)
 
     def albums(self):
         return ()
diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py
index 20afb5a..beecbff 100644
--- a/gallery_dl/extractor/fuskator.py
+++ b/gallery_dl/extractor/fuskator.py
@@ -22,7 +22,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
 
     def __init__(self, match):
         self.gallery_hash = match.group(1)
-        url = "{}/thumbs/{}/".format(self.root, self.gallery_hash)
+        url = "{}/thumbs/{}/index.html".format(self.root, self.gallery_hash)
         GalleryExtractor.__init__(self, match, url)
 
     def metadata(self, page):
@@ -50,15 +50,16 @@ class FuskatorGalleryExtractor(GalleryExtractor):
             "gallery_id"  : text.parse_int(gallery_id),
             "gallery_hash": self.gallery_hash,
             "title"       : text.unescape(title[:-15]),
-            "views"       : data["hits"],
-            "score"       : data["rating"],
-            "tags"        : data["tags"].split(","),
-            "count"       : len(data["images"]),
+            "views"       : data.get("hits"),
+            "score"       : data.get("rating"),
+            "tags"        : (data.get("tags") or "").split(","),
         }
 
     def images(self, page):
-        for image in self.data["images"]:
-            yield "https:" + image["imageUrl"], image
+        return [
+            ("https:" + image["imageUrl"], image)
+            for image in self.data["images"]
+        ]
 
 
 class FuskatorSearchExtractor(Extractor):
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index eba1539..83f1392 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -23,7 +23,7 @@ class GelbooruBase():
     root = "https://gelbooru.com"
     offset = 0
 
-    def _api_request(self, params, key="post"):
+    def _api_request(self, params, key="post", log=False):
         if "s" not in params:
             params["s"] = "post"
         params["api_key"] = self.api_key
@@ -35,8 +35,9 @@ class GelbooruBase():
         try:
             posts = data[key]
         except KeyError:
-            self.log.error("Incomplete API response (missing '%s')", key)
-            self.log.debug("%s", data)
+            if log:
+                self.log.error("Incomplete API response (missing '%s')", key)
+                self.log.debug("%s", data)
             return []
 
         if not isinstance(posts, list):
@@ -117,7 +118,7 @@ class GelbooruBase():
 class GelbooruTagExtractor(GelbooruBase,
                            gelbooru_v02.GelbooruV02TagExtractor):
     """Extractor for images from gelbooru.com based on search-tags"""
-    pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)"
+    pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]*)"
     example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG"
 
 
@@ -169,7 +170,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
             "limit": "1",
         }
 
-        count = self._api_request(params, "@attributes")[0]["count"]
+        count = self._api_request(params, "@attributes", True)[0]["count"]
         if count <= self.offset:
             return
 
@@ -186,7 +187,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
         params["limit"] = self.per_page
 
         while True:
-            favs = self._api_request(params, "favorite")
+            favs = self._api_request(params, "favorite", True)
             favs.reverse()
 
             if skip:
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 0c8af3d..7ab6d02 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -22,14 +22,10 @@ class GelbooruV02Extractor(booru.BooruExtractor):
     def _init(self):
         self.api_key = self.config("api-key")
         self.user_id = self.config("user-id")
-
-        try:
-            self.api_root = INSTANCES[self.category]["api_root"]
-        except KeyError:
-            self.api_root = self.root
+        self.api_root = self.config_instance("api_root") or self.root
 
         if self.category == "realbooru":
-            self.items = self._items_realbooru
+            self._file_url = self._file_url_realbooru
             self._tags = self._tags_realbooru
 
     def _api_request(self, params):
@@ -128,28 +124,6 @@ class GelbooruV02Extractor(booru.BooruExtractor):
                 self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
         return url
 
-    def _items_realbooru(self):
-        from .common import Message
-        data = self.metadata()
-
-        for post in self.posts():
-            try:
-                html = self._html(post)
-                url = post["file_url"] = text.rextract(
-                    html, 'href="', '"', html.index(">Original<"))[0]
-            except Exception:
-                self.log.debug("Unable to fetch download URL for post %s "
-                               "(md5: %s)", post.get("id"), post.get("md5"))
-                continue
-
-            text.nameext_from_url(url, post)
-            post.update(data)
-            self._prepare(post)
-            self._tags(post, html)
-
-            yield Message.Directory, post
-            yield Message.Url, url, post
-
     def _tags_realbooru(self, post, page):
         tag_container = text.extr(page, 'id="tagLink"', '</div>')
         tags = collections.defaultdict(list)
@@ -161,7 +135,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
             post["tags_" + key] = " ".join(value)
 
 
-INSTANCES = {
+BASE_PATTERN = GelbooruV02Extractor.update({
     "realbooru": {
         "root": "https://realbooru.com",
         "pattern": r"realbooru\.com",
@@ -187,16 +161,14 @@ INSTANCES = {
         "root": "https://xbooru.com",
         "pattern": r"xbooru\.com",
     },
-}
-
-BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
+})
 
 
 class GelbooruV02TagExtractor(GelbooruV02Extractor):
     subcategory = "tag"
     directory_fmt = ("{category}", "{search_tags}")
     archive_fmt = "t_{search_tags}_{id}"
-    pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
+    pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
     example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
 
     def __init__(self, match):
@@ -208,6 +180,8 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
         return {"search_tags": self.tags}
 
     def posts(self):
+        if self.tags == "all":
+            self.tags = ""
         return self._pagination({"tags": self.tags})
 
 
diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py
new file mode 100644
index 0000000..792f666
--- /dev/null
+++ b/gallery_dl/extractor/hatenablog.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hatenablog.com"""
+
+import re
+from .common import Extractor, Message
+from .. import text
+
+
+BASE_PATTERN = (
+    r"(?:hatenablog:https?://([^/?#]+)|(?:https?://)?"
+    r"([\w-]+\.(?:hatenablog\.(?:com|jp)"
+    r"|hatenadiary\.com|hateblo\.jp)))"
+)
+QUERY_RE = r"(?:\?([^#]*))?(?:#.*)?$"
+
+
+class HatenablogExtractor(Extractor):
+    """Base class for HatenaBlog extractors"""
+    category = "hatenablog"
+    directory_fmt = ("{category}", "{domain}")
+    filename_fmt = "{category}_{domain}_{entry}_{num:>02}.{extension}"
+    archive_fmt = "{filename}"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.domain = match.group(1) or match.group(2)
+
+    def _init(self):
+        self._find_img = re.compile(r'<img +([^>]+)').finditer
+
+    def _handle_article(self, article: str):
+        extr = text.extract_from(article)
+        date = text.parse_datetime(extr('<time datetime="', '"'))
+        entry_link = text.unescape(extr('<a href="', '"'))
+        entry = entry_link.partition("/entry/")[2]
+        title = text.unescape(extr('>', '<'))
+        content = extr(
+            '<div class="entry-content hatenablog-entry">', '</div>')
+
+        images = []
+        for i in self._find_img(content):
+            attributes = i.group(1)
+            if 'class="hatena-fotolife"' not in attributes:
+                continue
+            image = text.unescape(text.extr(attributes, 'src="', '"'))
+            images.append(image)
+
+        data = {
+            "domain": self.domain,
+            "date": date,
+            "entry": entry,
+            "title": title,
+            "count": len(images),
+        }
+        yield Message.Directory, data
+        for data["num"], url in enumerate(images, 1):
+            yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class HatenablogEntriesExtractor(HatenablogExtractor):
+    """Base class for a list of entries"""
+    allowed_parameters = ()
+
+    def __init__(self, match):
+        HatenablogExtractor.__init__(self, match)
+        self.path = match.group(3)
+        self.query = {key: value for key, value in text.parse_query(
+            match.group(4)).items() if self._acceptable_query(key)}
+
+    def _init(self):
+        HatenablogExtractor._init(self)
+        self._find_pager_url = re.compile(
+            r' class="pager-next">\s*<a href="([^"]+)').search
+
+    def items(self):
+        url = "https://" + self.domain + self.path
+        query = self.query
+
+        while url:
+            page = self.request(url, params=query).text
+
+            extr = text.extract_from(page)
+            attributes = extr('<body ', '>')
+            if "page-archive" in attributes:
+                yield from self._handle_partial_articles(extr)
+            else:
+                yield from self._handle_full_articles(extr)
+
+            match = self._find_pager_url(page)
+            url = text.unescape(match.group(1)) if match else None
+            query = None
+
+    def _handle_partial_articles(self, extr):
+        while True:
+            section = extr('<section class="archive-entry', '</section>')
+            if not section:
+                break
+
+            url = "hatenablog:" + text.unescape(text.extr(
+                section, '<a class="entry-title-link" href="', '"'))
+            data = {"_extractor": HatenablogEntryExtractor}
+            yield Message.Queue, url, data
+
+    def _handle_full_articles(self, extr):
+        while True:
+            attributes = extr('<article ', '>')
+            if not attributes:
+                break
+            if "no-entry" in attributes:
+                continue
+
+            article = extr('', '</article>')
+            yield from self._handle_article(article)
+
+    def _acceptable_query(self, key):
+        return key == "page" or key in self.allowed_parameters
+
+
+class HatenablogEntryExtractor(HatenablogExtractor):
+    """Extractor for a single entry URL"""
+    subcategory = "entry"
+    pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE
+    example = "https://BLOG.hatenablog.com/entry/PATH"
+
+    def __init__(self, match):
+        HatenablogExtractor.__init__(self, match)
+        self.path = match.group(3)
+
+    def items(self):
+        url = "https://" + self.domain + "/entry/" + self.path
+        page = self.request(url).text
+
+        extr = text.extract_from(page)
+        while True:
+            attributes = extr('<article ', '>')
+            if "no-entry" in attributes:
+                continue
+            article = extr('', '</article>')
+            return self._handle_article(article)
+
+
+class HatenablogHomeExtractor(HatenablogEntriesExtractor):
+    """Extractor for a blog's home page"""
+    subcategory = "home"
+    pattern = BASE_PATTERN + r"(/?)" + QUERY_RE
+    example = "https://BLOG.hatenablog.com"
+
+
+class HatenablogArchiveExtractor(HatenablogEntriesExtractor):
+    """Extractor for a blog's archive page"""
+    subcategory = "archive"
+    pattern = (BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?"
+               r"|/category/[^?#]+)?)" + QUERY_RE)
+    example = "https://BLOG.hatenablog.com/archive/2024"
+
+
+class HatenablogSearchExtractor(HatenablogEntriesExtractor):
+    """Extractor for a blog's search results"""
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"(/search)" + QUERY_RE
+    example = "https://BLOG.hatenablog.com/search?q=QUERY"
+    allowed_parameters = ("q",)
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
deleted file mode 100644
index a522140..0000000
--- a/gallery_dl/extractor/hbrowse.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://www.hbrowse.com/"""
-
-from .common import ChapterExtractor, MangaExtractor
-from .. import text, util, exception
-
-
-class HbrowseBase():
-    """Base class for hbrowse extractors"""
-    category = "hbrowse"
-    root = "https://www.hbrowse.com"
-
-    def parse_page(self, page, data):
-        """Parse metadata on 'page' and add it to 'data'"""
-        data, pos = text.extract_all(page, (
-            ('manga' , '<td class="listLong">', '</td>'),
-            ('artist', '<td class="listLong">', '</td>'),
-            ('total' , '<td class="listLong">', ' '),
-            ('origin', '<td class="listLong">', '</td>'),
-        ), values=data)
-
-        if not data["manga"] and "<b>Warning</b>" in page:
-            msg = page.rpartition(">")[2].strip()
-            raise exception.StopExtraction("Site is not accessible: '%s'", msg)
-
-        tags = text.extract(page, 'class="listTable"', '</table>', pos)[0]
-
-        data["manga"] = text.unescape(data["manga"])
-        data["total"] = text.parse_int(data["total"])
-        data["artist"] = text.remove_html(data["artist"])
-        data["origin"] = text.remove_html(data["origin"])
-        data["tags"] = list(text.extract_iter(tags, 'href="/browse/', '"'))
-        return data
-
-
-class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
-    """Extractor for manga-chapters from hbrowse.com"""
-    directory_fmt = ("{category}", "{manga_id} {manga}", "c{chapter:>05}")
-    filename_fmt = ("{category}_{manga_id}_{chapter:>05}_"
-                    "{page:>03}.{extension}")
-    archive_fmt = "{manga_id}_{chapter}_{page}"
-    pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
-    example = "https://www.hbrowse.com/12345/c00000"
-
-    def __init__(self, match):
-        self.path, self.gid, self.chapter = match.groups()
-        self.path += "/"
-        ChapterExtractor.__init__(self, match)
-
-    def metadata(self, page):
-        return self.parse_page(page, {
-            "manga_id": text.parse_int(self.gid),
-            "chapter": text.parse_int(self.chapter)
-        })
-
-    def images(self, page):
-        base = self.root + "/data" + self.path
-        json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]"
-        return [(base + name, None) for name in util.json_loads(json_data)]
-
-
-class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
-    """Extractor for manga from hbrowse.com"""
-    chapterclass = HbrowseChapterExtractor
-    reverse = False
-    pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
-    example = "https://www.hbrowse.com/12345"
-
-    def chapters(self, page):
-        results = []
-        data = self.parse_page(page, {
-            "manga_id": text.parse_int(
-                self.manga_url.rstrip("/").rpartition("/")[2])
-        })
-
-        pos = 0
-        needle = '<td class="listMiddle">\n<a class="listLink" href="'
-        while True:
-            url, pos = text.extract(page, needle, '"', pos)
-            if not url:
-                return results
-            title, pos = text.extract(page, '>View ', '<', pos)
-            data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
-            data["title"] = title
-            results.append((text.urljoin(self.root, url), data.copy()))
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index f6170c2..54c6539 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -29,8 +29,9 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
     example = "https://issuu.com/issuu/docs/TITLE/"
 
     def metadata(self, page):
+        pos = page.rindex('id="initial-data"')
         data = util.json_loads(text.rextract(
-            page, '<script data-json="', '"')[0].replace("&quot;", '"'))
+            page, '<script data-json="', '"', pos)[0].replace("&quot;", '"'))
 
         doc = data["initialDocumentData"]["document"]
         doc["date"] = text.parse_datetime(
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index c24e57d..10228b5 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -9,9 +9,10 @@
 """Extractors for https://kemono.party/"""
 
 from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
 from ..cache import cache, memcache
 import itertools
+import json
 import re
 
 BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
@@ -37,10 +38,14 @@ class KemonopartyExtractor(Extractor):
         Extractor.__init__(self, match)
 
     def _init(self):
+        self.revisions = self.config("revisions")
         self._prepare_ddosguard_cookies()
         self._find_inline = re.compile(
             r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
             r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
+        self._json_dumps = json.JSONEncoder(
+            ensure_ascii=False, check_circular=False,
+            sort_keys=True, separators=(",", ":")).encode
 
     def items(self):
         find_hash = re.compile(HASH_PATTERN).match
@@ -223,11 +228,23 @@ class KemonopartyExtractor(Extractor):
 
         idx = len(revs)
         for rev in revs:
+            rev["revision_hash"] = self._revision_hash(rev)
             rev["revision_index"] = idx
             idx -= 1
 
         return revs
 
+    def _revision_hash(self, revision):
+        rev = revision.copy()
+        rev.pop("revision_id", None)
+        rev.pop("added", None)
+        rev.pop("next", None)
+        rev.pop("prev", None)
+        rev["file"].pop("name", None)
+        for a in rev["attachments"]:
+            a.pop("name", None)
+        return util.sha1(self._json_dumps(rev))
+
 
 def _validate(response):
     return (response.headers["content-length"] != "9" or
@@ -252,13 +269,13 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
         url = self.api_url
         params = text.parse_query(self.query)
         params["o"] = text.parse_int(params.get("o"))
-        revisions = self.config("revisions")
 
         while True:
             posts = self.request(url, params=params).json()
 
-            if revisions:
+            if self.revisions:
                 for post in posts:
+                    post["revision_hash"] = self._revision_hash(post)
                     post["revision_id"] = 0
                     post_url = "{}/post/{}".format(self.api_url, post["id"])
                     try:
@@ -296,7 +313,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
     def posts(self):
         if not self.revision:
             post = self.request(self.api_url).json()
-            if self.config("revisions"):
+            if self.revisions:
+                post["revision_hash"] = self._revision_hash(post)
                 post["revision_id"] = 0
                 try:
                     revs = self._post_revisions(self.api_url)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 94bea57..bca7e4d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -148,6 +148,32 @@ class MangadexFeedExtractor(MangadexExtractor):
         return self.api.user_follows_manga_feed()
 
 
+class MangadexListExtractor(MangadexExtractor):
+    """Extractor for mangadex lists"""
+    subcategory = "list"
+    pattern = (BASE_PATTERN +
+               r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
+    example = ("https://mangadex.org/list"
+               "/01234567-89ab-cdef-0123-456789abcdef/NAME")
+
+    def __init__(self, match):
+        MangadexExtractor.__init__(self, match)
+        if match.group(2) == "feed":
+            self.subcategory = "list-feed"
+        else:
+            self.items = self._items_titles
+
+    def chapters(self):
+        return self.api.list_feed(self.uuid)
+
+    def _items_titles(self):
+        data = {"_extractor": MangadexMangaExtractor}
+        for item in self.api.list(self.uuid)["relationships"]:
+            if item["type"] == "manga":
+                url = "{}/title/{}".format(self.root, item["id"])
+                yield Message.Queue, url, data
+
+
 class MangadexAPI():
     """Interface for the MangaDex API v5
 
@@ -173,6 +199,12 @@ class MangadexAPI():
         params = {"includes[]": ("scanlation_group",)}
         return self._call("/chapter/" + uuid, params)["data"]
 
+    def list(self, uuid):
+        return self._call("/list/" + uuid)["data"]
+
+    def list_feed(self, uuid):
+        return self._pagination("/list/" + uuid + "/feed")
+
     @memcache(keyarg=1)
     def manga(self, uuid):
         params = {"includes[]": ("artist", "author")}
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 0b63d6c..68b4196 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -75,7 +75,7 @@ class MastodonExtractor(BaseExtractor):
                              account["acct"], account["moved"]["acct"])
 
 
-INSTANCES = {
+BASE_PATTERN = MastodonExtractor.update({
     "mastodon.social": {
         "root"         : "https://mastodon.social",
         "pattern"      : r"mastodon\.social",
@@ -100,9 +100,7 @@ INSTANCES = {
         "client-id"    : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
         "client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
     }
-}
-
-BASE_PATTERN = MastodonExtractor.update(INSTANCES) + "(?:/web)?"
+}) + "(?:/web)?"
 
 
 class MastodonUserExtractor(MastodonExtractor):
@@ -174,10 +172,8 @@ class MastodonAPI():
         if access_token is None or access_token == "cache":
             access_token = _access_token_cache(extractor.instance)
         if not access_token:
-            try:
-                access_token = INSTANCES[extractor.category]["access-token"]
-            except (KeyError, TypeError):
-                pass
+            access_token = extractor.config_instance("access-token")
+
         if access_token:
             self.headers = {"Authorization": "Bearer " + access_token}
         else:
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index b991705..9614513 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -116,7 +116,8 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
             yield from text.extract_iter(
                 page, 'href="javascript:void(0);"><img src="', '"')
         else:
-            yield text.extr(page, 'itemprop="image" src="', '"')
+            pos = page.find('id="view-center"') + 1
+            yield text.extract(page, 'itemprop="image" src="', '"', pos)[0]
 
     @staticmethod
     def _extract_user_name(page):
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index bc7b308..d36f509 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -235,10 +235,6 @@ BASE_PATTERN = NitterExtractor.update({
         "root": "https://nitter.net",
         "pattern": r"nitter\.net",
     },
-    "nitter.lacontrevoie.fr": {
-        "root": "https://nitter.lacontrevoie.fr",
-        "pattern": r"nitter\.lacontrevoie\.fr",
-    },
     "nitter.1d4.us": {
         "root": "https://nitter.1d4.us",
         "pattern": r"nitter\.1d4\.us",
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 1690160..8c8a5a9 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -358,8 +358,8 @@ class OAuthMastodon(OAuthBase):
         yield Message.Version, 1
         from . import mastodon
 
-        for application in mastodon.INSTANCES.values():
-            if self.instance == application["root"].partition("://")[2]:
+        for _, root, application in mastodon.MastodonExtractor.instances:
+            if self.instance == root.partition("://")[2]:
                 break
         else:
             application = self._register(self.instance)
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 89c0d2f..5226724 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -56,7 +56,7 @@ class PahealExtractor(Extractor):
             "date"    : text.parse_datetime(
                 extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
             "source"  : text.unescape(text.extr(
-                extr(">Source&nbsp;Link<", "</td>"), "href='", "'")),
+                extr(">Source Link<", "</td>"), "href='", "'")),
         }
 
         dimensions, size, ext = extr("Info</th><td>", "<").split(" // ")
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 6c2f39d..62d11f2 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -52,19 +52,29 @@ class PatreonExtractor(Extractor):
                     post["hash"] = fhash
                     post["type"] = kind
                     post["num"] += 1
-                    yield Message.Url, url, text.nameext_from_url(name, post)
+                    text.nameext_from_url(name, post)
+                    if text.ext_from_url(url) == "m3u8":
+                        url = "ytdl:" + url
+                        post["extension"] = "mp4"
+                    yield Message.Url, url, post
                 else:
                     self.log.debug("skipping %s (%s %s)", url, fhash, kind)
 
-    @staticmethod
-    def _postfile(post):
+    def _postfile(self, post):
         postfile = post.get("post_file")
         if postfile:
-            return (("postfile", postfile["url"], postfile["name"]),)
+            url = postfile["url"]
+            name = postfile.get("name")
+            if not name:
+                if url.startswith("https://stream.mux.com/"):
+                    name = url
+                else:
+                    name = self._filename(url) or url
+            return (("postfile", url, name),)
         return ()
 
     def _images(self, post):
-        for image in post["images"]:
+        for image in post.get("images") or ():
             url = image.get("download_url")
             if url:
                 name = image.get("file_name") or self._filename(url) or url
@@ -80,7 +90,7 @@ class PatreonExtractor(Extractor):
         return ()
 
     def _attachments(self, post):
-        for attachment in post["attachments"]:
+        for attachment in post.get("attachments") or ():
             url = self.request(
                 attachment["url"], method="HEAD",
                 allow_redirects=False, fatal=False,
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index ac6a391..339646f 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -32,7 +32,7 @@ class PhilomenaExtractor(BooruExtractor):
         post["date"] = text.parse_datetime(post["created_at"])
 
 
-INSTANCES = {
+BASE_PATTERN = PhilomenaExtractor.update({
     "derpibooru": {
         "root": "https://derpibooru.org",
         "pattern": r"(?:www\.)?derpibooru\.org",
@@ -48,9 +48,7 @@ INSTANCES = {
         "pattern": r"furbooru\.org",
         "filter_id": "2",
     },
-}
-
-BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
+})
 
 
 class PhilomenaPostExtractor(PhilomenaExtractor):
@@ -176,10 +174,7 @@ class PhilomenaAPI():
         if filter_id:
             params["filter_id"] = filter_id
         elif not api_key:
-            try:
-                params["filter_id"] = INSTANCES[extr.category]["filter_id"]
-            except (KeyError, TypeError):
-                params["filter_id"] = "2"
+            params["filter_id"] = extr.config_instance("filter_id") or "2"
 
         params["page"] = extr.page_start
         params["per_page"] = extr.per_page
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 4414c71..b9821f2 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -826,9 +826,9 @@ class PixivAppAPI():
 
         extractor.session.headers.update({
             "App-OS"        : "ios",
-            "App-OS-Version": "13.1.2",
-            "App-Version"   : "7.7.6",
-            "User-Agent"    : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
+            "App-OS-Version": "16.7.2",
+            "App-Version"   : "7.19.1",
+            "User-Agent"    : "PixivIOSApp/7.19.1 (iOS 16.7.2; iPhone12,8)",
             "Referer"       : "https://app-api.pixiv.net/",
         })
 
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 602895c..b3b7a9c 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -143,7 +143,7 @@ class SankakuPostExtractor(SankakuExtractor):
     """Extractor for single posts from sankaku.app"""
     subcategory = "post"
     archive_fmt = "{id}"
-    pattern = BASE_PATTERN + r"/posts?(?:/show)?/([0-9a-f]+)"
+    pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
     example = "https://sankaku.app/post/show/12345"
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 8a08fab..67f38c4 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -19,17 +19,12 @@ class Shimmie2Extractor(BaseExtractor):
     archive_fmt = "{id}"
 
     def _init(self):
-        try:
-            instance = INSTANCES[self.category]
-        except KeyError:
-            return
-
-        cookies = instance.get("cookies")
+        cookies = self.config_instance("cookies")
         if cookies:
             domain = self.root.rpartition("/")[2]
             self.cookies_update_dict(cookies, domain=domain)
 
-        file_url = instance.get("file_url")
+        file_url = self.config_instance("file_url")
         if file_url:
             self.file_url_fmt = file_url
 
@@ -73,15 +68,15 @@ class Shimmie2Extractor(BaseExtractor):
             return "'"
 
 
-INSTANCES = {
+BASE_PATTERN = Shimmie2Extractor.update({
     "loudbooru": {
         "root": "https://loudbooru.com",
         "pattern": r"loudbooru\.com",
         "cookies": {"ui-tnc-agreed": "true"},
     },
     "giantessbooru": {
-        "root": "https://giantessbooru.com",
-        "pattern": r"giantessbooru\.com",
+        "root": "https://sizechangebooru.com",
+        "pattern": r"(?:sizechange|giantess)booru\.com",
         "cookies": {"agreed": "true"},
     },
     "tentaclerape": {
@@ -97,9 +92,7 @@ INSTANCES = {
         "root": "https://rule34hentai.net",
         "pattern": r"rule34hentai\.net",
     },
-}
-
-BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
+}) + r"/(?:index\.php\?q=/?)?"
 
 
 class Shimmie2TagExtractor(Shimmie2Extractor):
@@ -183,25 +176,25 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
             extr = text.extract_from(self.request(url).text)
 
             while True:
-                pid = extr('href="./index.php?q=/post/view/', '&')
+                pid = extr("href='./index.php?q=/post/view/", "&")
                 if not pid:
                     break
 
-                tags, dimensions, size = extr('title="', '"').split(" // ")
+                tags, dimensions, size = extr("title='", "'").split(" // ")
                 width, _, height = dimensions.partition("x")
 
                 yield {
                     "file_url": file_url_fmt(pid),
-                    "id": pid,
-                    "md5": "",
-                    "tags": tags,
-                    "width": width,
-                    "height": height,
-                    "size": text.parse_bytes(size[:-1]),
+                    "id"      : pid,
+                    "md5"     : "",
+                    "tags"    : tags,
+                    "width"   : width,
+                    "height"  : height,
+                    "size"    : text.parse_bytes(size[:-1]),
                 }
 
             pnum += 1
-            if not extr('/{}">{}<'.format(pnum, pnum), ">"):
+            if not extr("/{0}'>{0}<".format(pnum), ">"):
                 return
 
 
@@ -248,7 +241,7 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
             "id"      : self.post_id,
             "tags"    : extr(": ", "<").partition(" - ")[0].rstrip(")"),
             "md5"     : "",
-            "file_url": self.root + extr('id="main_image" src=".', '"'),
+            "file_url": self.root + extr("id='main_image' src='.", "'"),
             "width"   : extr("orig_width =", ";"),
             "height"  : 0,
             "size"    : 0,
diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py
new file mode 100644
index 0000000..9d46fd6
--- /dev/null
+++ b/gallery_dl/extractor/steamgriddb.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.steamgriddb.com"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
+LANGUAGE_CODES = (
+    "aa", "ab", "ae", "af", "ak", "am", "an", "ar", "as", "av", "ay", "az",
+    "ba", "be", "bg", "bh", "bi", "bm", "bn", "bo", "br", "bs", "ca", "ce",
+    "ch", "co", "cr", "cs", "cu", "cv", "cy", "da", "de", "dv", "dz", "ee",
+    "el", "en", "eo", "es", "et", "eu", "fa", "ff", "fi", "fj", "fo", "fr",
+    "fy", "ga", "gd", "gl", "gn", "gu", "gv", "ha", "he", "hi", "ho", "hr",
+    "ht", "hu", "hy", "hz", "ia", "id", "ie", "ig", "ii", "ik", "io", "is",
+    "it", "iu", "ja", "jv", "ka", "kg", "ki", "kj", "kk", "kl", "km", "kn",
+    "ko", "kr", "ks", "ku", "kv", "kw", "ky", "la", "lb", "lg", "li", "ln",
+    "lo", "lt", "lu", "lv", "mg", "mh", "mi", "mk", "ml", "mn", "mr", "ms",
+    "mt", "my", "na", "nb", "nd", "ne", "ng", "nl", "nn", "no", "nr", "nv",
+    "ny", "oc", "oj", "om", "or", "os", "pa", "pi", "pl", "ps", "pt", "qu",
+    "rm", "rn", "ro", "ru", "rw", "sa", "sc", "sd", "se", "sg", "si", "sk",
+    "sl", "sm", "sn", "so", "sq", "sr", "ss", "st", "su", "sv", "sw", "ta",
+    "te", "tg", "th", "ti", "tk", "tl", "tn", "to", "tr", "ts", "tt", "tw",
+    "ty", "ug", "uk", "ur", "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi",
+    "yo", "za", "zh", "zu",
+)
+FILE_EXT_TO_MIME = {
+    "png": "image/png",
+    "jpeg": "image/jpeg",
+    "jpg": "image/jpeg",
+    "webp": "image/webp",
+    "ico": "image/vnd.microsoft.icon",
+    "all": "all",
+}
+
+
+class SteamgriddbExtractor(Extractor):
+    """Base class for SteamGridDB"""
+    category = "steamgriddb"
+    directory_fmt = ("{category}", "{subcategory}", "{game[id]}")
+    filename_fmt = "{game[id]}_{id}_{num:>02}.{extension}"
+    archive_fmt = "{filename}"
+    root = "https://www.steamgriddb.com"
+
+    def _init(self):
+        self.cookies_update({
+            "userprefs": "%7B%22adult%22%3Afalse%7D",
+        })
+
+    def items(self):
+        download_fake_png = self.config("download-fake-png", True)
+
+        for asset in self.assets():
+            if download_fake_png and asset.get("fake_png"):
+                urls = (asset["url"], asset["fake_png"])
+            else:
+                urls = (asset["url"],)
+
+            asset["count"] = len(urls)
+            yield Message.Directory, asset
+            for asset["num"], url in enumerate(urls, 1):
+                yield Message.Url, url, text.nameext_from_url(url, asset)
+
+    def _call(self, endpoint, **kwargs):
+        data = self.request(self.root + endpoint, **kwargs).json()
+        if not data["success"]:
+            raise exception.StopExtraction(data["error"])
+        return data["data"]
+
+
+class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
+    """Base class for extracting a list of assets"""
+
+    def __init__(self, match):
+        SteamgriddbExtractor.__init__(self, match)
+        list_type = match.group(1)
+        id = int(match.group(2))
+        self.game_id = id if list_type == "game" else None
+        self.collection_id = id if list_type == "collection" else None
+        self.page = int(match.group(3) or 1)
+
+    def assets(self):
+        limit = 48
+        page = min(self.page - 1, 0)
+
+        sort = self.config("sort", "score_desc")
+        if sort not in ("score_desc", "score_asc", "score_old_desc",
+                        "score_old_asc", "age_desc", "age_asc"):
+            raise exception.StopExtractor("Invalid sort '%s'", sort)
+
+        json = {
+            "static"  : self.config("static", True),
+            "animated": self.config("animated", True),
+            "humor"   : self.config("humor", True),
+            "nsfw"    : self.config("nsfw", True),
+            "epilepsy": self.config("epilepsy", True),
+            "untagged": self.config("untagged", True),
+
+            "asset_type": self.asset_type,
+            "limit": limit,
+            "order": sort,
+        }
+        if self.valid_dimensions:
+            json["dimensions"] = self.config_list(
+                "dimensions", "dimension", self.valid_dimensions)
+        json["styles"] = self.config_list("styles", "style", self.valid_styles)
+        json["languages"] = self.config_list(
+            "languages", "language", LANGUAGE_CODES)
+        file_types = self.config_list(
+            "file-types", "file type", self.valid_file_types)
+        json["mime"] = [FILE_EXT_TO_MIME[i] for i in file_types]
+
+        if self.game_id:
+            json["game_id"] = [self.game_id]
+        else:
+            json["collection_id"] = self.collection_id
+
+        while True:
+            json["page"] = page
+
+            data = self._call(
+                "/api/public/search/assets", method="POST", json=json)
+            for asset in data["assets"]:
+                if not asset.get("game"):
+                    asset["game"] = data["game"]
+                yield asset
+
+            if data["total"] <= limit * page:
+                break
+            page += 1
+
+    def config_list(self, key, type_name, valid_values):
+        value = self.config(key)
+        if isinstance(value, str):
+            value = value.split(",")
+
+        if value is None or "all" in value:
+            return ["all"]
+
+        for i in value:
+            if i not in valid_values:
+                raise exception.StopExtraction("Invalid %s '%s'", type_name, i)
+
+        return value
+
+
+class SteamgriddbAssetExtractor(SteamgriddbExtractor):
+    """Extractor for a single asset"""
+    subcategory = "asset"
+    pattern = BASE_PATTERN + r"/(grid|hero|logo|icon)/(\d+)"
+    example = "https://www.steamgriddb.com/grid/1234"
+
+    def __init__(self, match):
+        SteamgriddbExtractor.__init__(self, match)
+        self.asset_type = match.group(1)
+        self.asset_id = match.group(2)
+
+    def assets(self):
+        endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
+        asset = self._call(endpoint)["asset"]
+        return (asset,)
+
+
+class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor):
+    subcategory = "grids"
+    asset_type = "grid"
+    pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/grids(?:/(\d+))?"
+    example = "https://www.steamgriddb.com/game/1234/grids"
+    valid_dimensions = ("460x215", "920x430", "600x900", "342x482", "660x930",
+                        "512x512", "1024x1024")
+    valid_styles = ("alternate", "blurred", "no_logo", "material",
+                    "white_logo")
+    valid_file_types = ("png", "jpeg", "jpg", "webp")
+
+
+class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor):
+    subcategory = "heroes"
+    asset_type = "hero"
+    pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/heroes(?:/(\d+))?"
+    example = "https://www.steamgriddb.com/game/1234/heroes"
+    valid_dimensions = ("1920x620", "3840x1240", "1600x650")
+    valid_styles = ("alternate", "blurred", "material")
+    valid_file_types = ("png", "jpeg", "jpg", "webp")
+
+
+class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor):
+    subcategory = "logos"
+    asset_type = "logo"
+    pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/logos(?:/(\d+))?"
+    example = "https://www.steamgriddb.com/game/1234/logos"
+    valid_dimensions = None
+    valid_styles = ("official", "white", "black", "custom")
+    valid_file_types = ("png", "webp")
+
+
+class SteamgriddbIconsExtractor(SteamgriddbAssetsExtractor):
+    subcategory = "icons"
+    asset_type = "icon"
+    pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/icons(?:/(\d+))?"
+    example = "https://www.steamgriddb.com/game/1234/icons"
+    valid_dimensions = ["{0}x{0}".format(i) for i in (8, 10, 14, 16, 20, 24,
+                        28, 32, 35, 40, 48, 54, 56, 57, 60, 64, 72, 76, 80, 90,
+                        96, 100, 114, 120, 128, 144, 150, 152, 160, 180, 192,
+                        194, 256, 310, 512, 768, 1024)]
+    valid_styles = ("official", "custom")
+    valid_file_types = ("png", "ico")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index aa9ab9f..cf759e0 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -546,7 +546,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
     def _select_tweet_source(self):
         strategy = self.config("strategy")
         if strategy is None or strategy == "auto":
-            if self.retweets or self.replies or self.textonly:
+            if self.retweets or self.textonly:
                 return self.api.user_tweets
             else:
                 return self.api.user_media
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index f2e6521..49a3deb 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -15,7 +15,7 @@ class UrlshortenerExtractor(BaseExtractor):
     basecategory = "urlshortener"
 
 
-INSTANCES = {
+BASE_PATTERN = UrlshortenerExtractor.update({
     "bitly": {
         "root": "https://bit.ly",
         "pattern": r"bit\.ly",
@@ -26,9 +26,7 @@ INSTANCES = {
         "root": "https://t.co",
         "pattern": r"t\.co",
     },
-}
-
-BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
+})
 
 
 class UrlshortenerLinkExtractor(UrlshortenerExtractor):
@@ -42,10 +40,7 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
         self.id = match.group(match.lastindex)
 
     def _init(self):
-        try:
-            self.headers = INSTANCES[self.category]["headers"]
-        except Exception:
-            self.headers = None
+        self.headers = self.config_instance("headers")
 
     def items(self):
         response = self.request(
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index c22e67e..95eeafe 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -10,6 +10,7 @@
 
 from .common import Extractor, Message
 from .. import text, exception
+import re
 
 BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
 
@@ -24,6 +25,7 @@ class VkExtractor(Extractor):
     request_interval = (0.5, 1.5)
 
     def items(self):
+        sub = re.compile(r"/imp[fg]/").sub
         sizes = "wzyxrqpo"
 
         data = self.metadata()
@@ -40,11 +42,15 @@ class VkExtractor(Extractor):
                 continue
 
             try:
-                photo["url"] = photo[size + "src"]
+                url = photo[size + "src"]
             except KeyError:
                 self.log.warning("no photo URL found (%s)", photo.get("id"))
                 continue
 
+            photo["url"] = sub("/", url.partition("?")[0])
+            #  photo["url"] = url
+            photo["_fallback"] = (url,)
+
             try:
                 _, photo["width"], photo["height"] = photo[size]
             except ValueError:
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 3f2f410..949c7cb 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -87,23 +87,41 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
         self.episode_no = params.get("episode_no")
 
     def metadata(self, page):
-        keywords, pos = text.extract(
-            page, '<meta name="keywords" content="', '"')
-        title, pos = text.extract(
-            page, '<meta property="og:title" content="', '"', pos)
-        descr, pos = text.extract(
-            page, '<meta property="og:description" content="', '"', pos)
+        extr = text.extract_from(page)
+        title = extr('<meta property="og:title" content="', '"')
+        descr = extr('<meta property="og:description" content="', '"')
+
+        if extr('<div class="subj_info"', '\n'):
+            comic_name = extr('>', '<')
+            episode_name = extr('<h1 class="subj_episode" title="', '"')
+        else:
+            comic_name = episode_name = ""
+
+        if extr('<span class="tx _btnOpenEpisodeList ', '"'):
+            episode = extr('>#', '<')
+        else:
+            episode = ""
+
+        if extr('<div class="author_area"', '\n'):
+            username = extr('/creator/', '"')
+            author_name = extr('<span>', '</span>')
+        else:
+            username = author_name = ""
 
         return {
-            "genre"      : self.genre,
-            "comic"      : self.comic,
-            "title_no"   : self.title_no,
-            "episode_no" : self.episode_no,
-            "title"      : text.unescape(title),
-            "episode"    : keywords.split(", ")[1],
-            "description": text.unescape(descr),
-            "lang"       : self.lang,
-            "language"   : util.code_to_language(self.lang),
+            "genre"       : self.genre,
+            "comic"       : self.comic,
+            "title_no"    : self.title_no,
+            "episode_no"  : self.episode_no,
+            "title"       : text.unescape(title),
+            "episode"     : episode,
+            "comic_name"  : text.unescape(comic_name),
+            "episode_name": text.unescape(episode_name),
+            "username"    : username,
+            "author_name" : text.unescape(author_name),
+            "description" : text.unescape(descr),
+            "lang"        : self.lang,
+            "language"    : util.code_to_language(self.lang),
         }
 
     @staticmethod
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
new file mode 100644
index 0000000..1eafc29
--- /dev/null
+++ b/gallery_dl/extractor/wikimedia.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Ailothaen
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Wikimedia sites"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class WikimediaExtractor(BaseExtractor):
+    """Base class for wikimedia extractors"""
+    basecategory = "wikimedia"
+    filename_fmt = "{filename} ({sha1[:8]}).{extension}"
+    directory_fmt = ("{category}", "{page}")
+    archive_fmt = "{sha1}"
+    request_interval = (1.0, 2.0)
+
+    def __init__(self, match):
+        BaseExtractor.__init__(self, match)
+        path = match.group(match.lastindex)
+
+        if self.category == "fandom":
+            self.category = \
+                "fandom-" + self.root.partition(".")[0].rpartition("/")[2]
+
+        if path.startswith("wiki/"):
+            path = path[5:]
+            self.api_path = "/w/api.php"
+        else:
+            self.api_path = "/api.php"
+
+        pre, sep, _ = path.partition(":")
+        prefix = pre.lower() if sep else None
+
+        self.title = path = text.unquote(path)
+        if prefix:
+            self.subcategory = prefix
+
+        if prefix == "category":
+            self.params = {
+                "generator": "categorymembers",
+                "gcmtitle" : path,
+                "gcmtype"  : "file",
+            }
+        elif prefix == "file":
+            self.params = {
+                "titles"   : path,
+            }
+        else:
+            self.params = {
+                "generator": "images",
+                "titles"   : path,
+            }
+
+    def _init(self):
+        api_path = self.config_instance("api-path")
+        if api_path:
+            if api_path[0] == "/":
+                self.api_url = self.root + api_path
+            else:
+                self.api_url = api_path
+        else:
+            self.api_url = self.root + self.api_path
+
+    def items(self):
+        for info in self._pagination(self.params):
+            image = info["imageinfo"][0]
+
+            image["metadata"] = {
+                m["name"]: m["value"]
+                for m in image["metadata"]}
+            image["commonmetadata"] = {
+                m["name"]: m["value"]
+                for m in image["commonmetadata"]}
+
+            filename = image["canonicaltitle"]
+            image["filename"], _, image["extension"] = \
+                filename.partition(":")[2].rpartition(".")
+            image["date"] = text.parse_datetime(
+                image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
+            image["page"] = self.title
+
+            yield Message.Directory, image
+            yield Message.Url, image["url"], image
+
+    def _pagination(self, params):
+        """
+        https://www.mediawiki.org/wiki/API:Query
+        https://opendata.stackexchange.com/questions/13381
+        """
+
+        url = self.api_url
+        params["action"] = "query"
+        params["format"] = "json"
+        params["prop"] = "imageinfo"
+        params["iiprop"] = (
+            "timestamp|user|userid|comment|canonicaltitle|url|size|"
+            "sha1|mime|metadata|commonmetadata|extmetadata|bitdepth"
+        )
+
+        while True:
+            data = self.request(url, params=params).json()
+
+            try:
+                pages = data["query"]["pages"]
+            except KeyError:
+                pass
+            else:
+                yield from pages.values()
+
+            try:
+                continuation = data["continue"]
+            except KeyError:
+                break
+            params.update(continuation)
+
+
+BASE_PATTERN = WikimediaExtractor.update({
+    "wikipedia": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wikipedia\.org",
+    },
+    "wiktionary": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wiktionary\.org",
+    },
+    "wikiquote": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wikiquote\.org",
+    },
+    "wikibooks": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wikibooks\.org",
+    },
+    "wikisource": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wikisource\.org",
+    },
+    "wikinews": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wikinews\.org",
+    },
+    "wikiversity": {
+        "root": None,
+        "pattern": r"[a-z]{2,}\.wikiversity\.org",
+    },
+    "wikispecies": {
+        "root": "https://species.wikimedia.org",
+        "pattern": r"species\.wikimedia\.org",
+    },
+    "wikimediacommons": {
+        "root": "https://commons.wikimedia.org",
+        "pattern": r"commons\.wikimedia\.org",
+    },
+    "mediawiki": {
+        "root": "https://www.mediawiki.org",
+        "pattern": r"(?:www\.)?mediawiki\.org",
+    },
+    "fandom": {
+        "root": None,
+        "pattern": r"[\w-]+\.fandom\.com",
+        "api-path": "/api.php",
+    },
+    "mariowiki": {
+        "root": "https://www.mariowiki.com",
+        "pattern": r"(?:www\.)?mariowiki\.com",
+    },
+})
+
+
+class WikimediaArticleExtractor(WikimediaExtractor):
+    """Extractor for wikimedia articles"""
+    subcategory = "article"
+    pattern = BASE_PATTERN + r"/(?!static/)([^?#]+)"
+    example = "https://en.wikipedia.org/wiki/TITLE"
author	Unit 193 <unit193@unit193.net>	2024-01-23 23:35:00 -0500
committer	Unit 193 <unit193@unit193.net>	2024-01-23 23:35:00 -0500
commit	12e23f1195164dcb740d6d4a4287e762c9e5e534 (patch)
tree	e6b13483475c510ea2f685c21363271f23745c56 /gallery_dl/extractor
parent	e949aaf6f6ac93896947d5b736e48e7911926efb (diff)