New upstream version 1.25.3.upstream/1.25.3

author: Unit 193 <unit193@unit193.net> 2023-04-30 16:45:21 -0400
committer: Unit 193 <unit193@unit193.net> 2023-04-30 16:45:21 -0400
commit: 33d4eae5a6df8aaf6757f52ae25f514ff1211c62 (patch)
tree: 7ad425b022dcc1daea1c84c720a266f0134db705 /gallery_dl
parent: f98ab7aaca3c4acbd5a793267791749740330e9c (diff)
25 files changed, 931 insertions, 140 deletions
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 88e86e9..4ec0398 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -44,6 +44,12 @@ class HttpDownloader(DownloaderBase):
         self.mtime = self.config("mtime", True)
         self.rate = self.config("rate")
 
+        if not self.config("consume-content", False):
+            # this resets the underlying TCP connection, and therefore
+            # if the program makes another request to the same domain,
+            # a new connection (either TLS or plain TCP) must be made
+            self.release_conn = lambda resp: resp.close()
+
         if self.retries < 0:
             self.retries = float("inf")
         if self.minsize:
@@ -106,7 +112,7 @@ class HttpDownloader(DownloaderBase):
         while True:
             if tries:
                 if response:
-                    response.close()
+                    self.release_conn(response)
                     response = None
                 self.log.warning("%s (%s/%s)", msg, tries, self.retries+1)
                 if tries > self.retries:
@@ -165,18 +171,24 @@ class HttpDownloader(DownloaderBase):
                 retry = kwdict.get("_http_retry")
                 if retry and retry(response):
                     continue
+                self.release_conn(response)
                 self.log.warning(msg)
                 return False
 
             # check for invalid responses
             validate = kwdict.get("_http_validate")
             if validate and self.validate:
-                result = validate(response)
+                try:
+                    result = validate(response)
+                except Exception:
+                    self.release_conn(response)
+                    raise
                 if isinstance(result, str):
                     url = result
                     tries -= 1
                     continue
                 if not result:
+                    self.release_conn(response)
                     self.log.warning("Invalid response")
                     return False
 
@@ -184,11 +196,13 @@ class HttpDownloader(DownloaderBase):
             size = text.parse_int(size, None)
             if size is not None:
                 if self.minsize and size < self.minsize:
+                    self.release_conn(response)
                     self.log.warning(
                         "File size smaller than allowed minimum (%s < %s)",
                         size, self.minsize)
                     return False
                 if self.maxsize and size > self.maxsize:
+                    self.release_conn(response)
                     self.log.warning(
                         "File size larger than allowed maximum (%s > %s)",
                         size, self.maxsize)
@@ -280,6 +294,18 @@ class HttpDownloader(DownloaderBase):
 
         return True
 
+    def release_conn(self, response):
+        """Release connection back to pool by consuming response body"""
+        try:
+            for _ in response.iter_content(self.chunk_size):
+                pass
+        except (RequestException, SSLError, OpenSSLError) as exc:
+            print()
+            self.log.debug(
+                "Unable to consume response body (%s: %s); "
+                "closing the connection anyway", exc.__class__.__name__, exc)
+            response.close()
+
     @staticmethod
     def receive(fp, content, bytes_total, bytes_start):
         write = fp.write
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
index d9674d8..f142690 100644
--- a/gallery_dl/extractor/2chen.py
+++ b/gallery_dl/extractor/2chen.py
@@ -4,35 +4,46 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://2chen.moe/"""
+"""Extractors for https://sturdychan.help/"""
 
 from .common import Extractor, Message
 from .. import text
 
+BASE_PATTERN = r"(?:https?://)?(?:sturdychan.help|2chen\.(?:moe|club))"
+
 
 class _2chenThreadExtractor(Extractor):
     """Extractor for 2chen threads"""
     category = "2chen"
     subcategory = "thread"
+    root = "https://sturdychan.help"
     directory_fmt = ("{category}", "{board}", "{thread} {title}")
     filename_fmt = "{time} {filename}.{extension}"
     archive_fmt = "{board}_{thread}_{hash}_{time}"
-    pattern = r"(?:https?://)?2chen\.(?:moe|club)/([^/?#]+)/(\d+)"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
     test = (
-        ("https://2chen.moe/tv/496715", {
-            "pattern": r"https://2chen\.su/assets/images/src/\w{40}\.\w+$",
+        ("https://sturdychan.help/tv/268929", {
+            "pattern": r"https://sturdychan\.help/assets/images"
+                       r"/src/\w{40}\.\w+$",
             "count": ">= 179",
+            "keyword": {
+                "board": "tv",
+                "date": "type:datetime",
+                "hash": r"re:[0-9a-f]{40}",
+                "name": "Anonymous",
+                "no": r"re:\d+",
+                "thread": "268929",
+                "time": int,
+                "title": "「/ttg/ #118: 🇧🇷 edition」",
+                "url": str,
+            },
         }),
-        ("https://2chen.club/tv/1", {
-            "count": 5,
-        }),
-        # 404
+        ("https://2chen.club/tv/1"),
         ("https://2chen.moe/jp/303786"),
     )
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.root = text.root_from_url(match.group(0))
         self.board, self.thread = match.groups()
 
     def items(self):
@@ -88,9 +99,10 @@ class _2chenBoardExtractor(Extractor):
     """Extractor for 2chen boards"""
     category = "2chen"
     subcategory = "board"
-    pattern = r"(?:https?://)?2chen\.(?:moe|club)/([^/?#]+)(?:/catalog|/?$)"
+    root = "https://sturdychan.help"
+    pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)"
     test = (
-        ("https://2chen.moe/co/", {
+        ("https://sturdychan.help/co/", {
             "pattern": _2chenThreadExtractor.pattern
         }),
         ("https://2chen.moe/co"),
@@ -100,7 +112,6 @@ class _2chenBoardExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.root = text.root_from_url(match.group(0))
         self.board = match.group(1)
 
     def items(self):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 553a110..9841ca7 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -74,6 +74,7 @@ modules = [
     "instagram",
     "issuu",
     "itaku",
+    "itchio",
     "kabeuchi",
     "keenspot",
     "kemonoparty",
@@ -93,7 +94,6 @@ modules = [
     "mangapark",
     "mangasee",
     "mangoxo",
-    "mememuseum",
     "misskey",
     "myhentaigallery",
     "myportfolio",
@@ -133,6 +133,7 @@ modules = [
     "seiga",
     "senmanga",
     "sexcom",
+    "shimmie2",
     "simplyhentai",
     "skeb",
     "slickpic",
@@ -156,6 +157,7 @@ modules = [
     "urlshortener",
     "vanillarock",
     "vichan",
+    "vipergirls",
     "vk",
     "vsco",
     "wallhaven",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 1469aad..d8cc51d 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -81,10 +81,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
         ("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
             "count": 20,
             "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
+            "pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules"
+                       r"/source/[0-9a-f]+.[0-9a-f]+\.jpg"
         }),
         # 'video' modules (#1282)
         ("https://www.behance.net/gallery/101185577/COLCCI", {
-            "pattern": r"ytdl:https://cdn-prod-ccv\.adobe\.com/",
+            "pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+"
+                       r"/rend/\w+_720\.mp4\?",
             "count": 3,
         }),
     )
@@ -129,26 +132,35 @@ class BehanceGalleryExtractor(BehanceExtractor):
         append = result.append
 
         for module in data["modules"]:
-            mtype = module["type"]
+            mtype = module["__typename"]
 
-            if mtype == "image":
-                url = module["sizes"]["original"]
+            if mtype == "ImageModule":
+                url = module["imageSizes"]["size_original"]["url"]
                 append((url, module))
 
-            elif mtype == "video":
-                page = self.request(module["src"]).text
-                url = text.extr(page, '<source src="', '"')
-                if text.ext_from_url(url) == "m3u8":
-                    url = "ytdl:" + url
+            elif mtype == "VideoModule":
+                renditions = module["videoData"]["renditions"]
+                try:
+                    url = [
+                        r["url"] for r in renditions
+                        if text.ext_from_url(r["url"]) != "m3u8"
+                    ][-1]
+                except Exception as exc:
+                    self.log.debug("%s: %s", exc.__class__.__name__, exc)
+                    url = "ytdl:" + renditions[-1]["url"]
                 append((url, module))
 
-            elif mtype == "media_collection":
+            elif mtype == "MediaCollectionModule":
                 for component in module["components"]:
-                    url = component["sizes"]["source"]
-                    append((url, module))
-
-            elif mtype == "embed":
-                embed = module.get("original_embed") or module.get("embed")
+                    for size in component["imageSizes"].values():
+                        if size:
+                            parts = size["url"].split("/")
+                            parts[4] = "source"
+                            append(("/".join(parts), module))
+                            break
+
+            elif mtype == "EmbedModule":
+                embed = module.get("originalEmbed") or module.get("fluidEmbed")
                 if embed:
                     append(("ytdl:" + text.extr(embed, 'src="', '"'), module))
 
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index f532a97..18d9867 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1417,7 +1417,14 @@ class DeviantartOAuthAPI():
         """Get the original file download (if allowed)"""
         endpoint = "/deviation/download/" + deviation_id
         params = {"mature_content": self.mature}
-        return self._call(endpoint, params=params, public=public)
+
+        try:
+            return self._call(
+                endpoint, params=params, public=public, log=False)
+        except Exception:
+            if not self.refresh_token_key:
+                raise
+            return self._call(endpoint, params=params, public=False)
 
     def deviation_metadata(self, deviations):
         """ Fetch deviation metadata for a set of deviations"""
@@ -1518,7 +1525,7 @@ class DeviantartOAuthAPI():
                 refresh_token_key, data["refresh_token"])
         return "Bearer " + data["access_token"]
 
-    def _call(self, endpoint, fatal=True, public=None, **kwargs):
+    def _call(self, endpoint, fatal=True, log=True, public=None, **kwargs):
         """Call an API endpoint"""
         url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
         kwargs["fatal"] = None
@@ -1563,7 +1570,8 @@ class DeviantartOAuthAPI():
                             "cs/configuration.rst#extractordeviantartclient-id"
                             "--client-secret")
             else:
-                self.log.error(msg)
+                if log:
+                    self.log.error(msg)
                 return data
 
     def _pagination(self, endpoint, params,
@@ -1571,15 +1579,14 @@ class DeviantartOAuthAPI():
         warn = True
         if public is None:
             public = self.public
-        elif not public:
-            self.public = False
 
         while True:
             data = self._call(endpoint, params=params, public=public)
-            if key not in data:
+            try:
+                results = data[key]
+            except KeyError:
                 self.log.error("Unexpected API response: %s", data)
                 return
-            results = data[key]
 
             if unpack:
                 results = [item["journal"] for item in results
@@ -1588,7 +1595,7 @@ class DeviantartOAuthAPI():
                 if public and len(results) < params["limit"]:
                     if self.refresh_token_key:
                         self.log.debug("Switching to private access token")
-                        self.public = public = False
+                        public = False
                         continue
                     elif data["has_more"] and warn:
                         warn = False
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 497f1ef..c91347e 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -49,14 +49,16 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
         ("https://www.imagefap.com/gallery/7102714", {
             "pattern": r"https://cdnh?\.imagefap\.com"
                        r"/images/full/\d+/\d+/\d+\.jpg",
-            "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
+            "keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",
             "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
         }),
         ("https://www.imagefap.com/gallery/7876223", {
             "pattern": r"https://cdnh?\.imagefap\.com"
                        r"/images/full/\d+/\d+/\d+\.jpg",
             "keyword": {
+                "categories": ["Asses", "Softcore", "Pornstars"],
                 "count": 44,
+                "description": "",
                 "gallery_id": 7876223,
                 "image_id": int,
                 "num": int,
@@ -67,6 +69,21 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
             },
             "count": 44,
         }),
+        # description (#3905)
+        ("https://www.imagefap.com/gallery/6180555", {
+            "range": "1",
+            "keyword": {
+                "categories": ["Amateur", "Softcore", "Homemade"],
+                "count": 36,
+                "description": "Nude and dressed sluts showing off the goods",
+                "gallery_id": 6180555,
+                "image_id": int,
+                "num": int,
+                "tags": []  ,
+                "title": "Dressed or Undressed MG*",
+                "uploader": "splitopen",
+            },
+        }),
         ("https://www.imagefap.com/pictures/7102714"),
         ("https://www.imagefap.com/gallery.php?gid=7102714"),
         ("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -92,9 +109,14 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
 
         data = {
             "gallery_id": text.parse_int(self.gid),
-            "tags": extr('name="keywords" content="', '"').split(", "),
             "uploader": extr("porn picture gallery by ", " to see hottest"),
             "title": text.unescape(extr("<title>", "<")),
+            "description": text.unescape(extr(
+                'id="gdesc_text"', '<').partition(">")[2]),
+            "categories": text.split_html(extr(
+                'id="cnt_cats"', '</div>'))[1::2],
+            "tags": text.split_html(extr(
+                'id="cnt_tags"', '</div>'))[1::2],
             "count": text.parse_int(extr(' 1 of ', ' pics"')),
         }
 
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index d57ec89..df4ff26 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -135,6 +135,29 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
         }
 
 
+class ImxtoGalleryExtractor(ImagehostImageExtractor):
+    """Extractor for image galleries from imx.to"""
+    category = "imxto"
+    subcategory = "gallery"
+    pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))"
+    test = ("https://imx.to/g/ozdy", {
+        "pattern": ImxtoImageExtractor.pattern,
+        "keyword": {"title": "untitled gallery"},
+        "count": 40,
+    })
+
+    def items(self):
+        page = self.request(self.page_url).text
+        title, pos = text.extract(page, '<div class="title', '<')
+        data = {
+            "_extractor": ImxtoImageExtractor,
+            "title": text.unescape(title.partition(">")[2]).strip(),
+        }
+
+        for url in text.extract_iter(page, "<a href=", " ", pos):
+            yield Message.Queue, url.strip("\"'"), data
+
+
 class AcidimgImageExtractor(ImagehostImageExtractor):
     """Extractor for single images from acidimg.cc"""
     category = "acidimg"
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 42d0a7b..f8f1600 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -347,8 +347,8 @@ class ImgurAPI():
     def __init__(self, extractor):
         self.extractor = extractor
         self.headers = {
-            "Authorization": "Client-ID " + extractor.config(
-                "client-id", "546c25a59c58ad7"),
+            "Authorization": "Client-ID " + (
+                extractor.config("client-id") or "546c25a59c58ad7"),
         }
 
     def account_favorites(self, account):
diff --git a/gallery_dl/extractor/itchio.py b/gallery_dl/extractor/itchio.py
new file mode 100644
index 0000000..6034d12
--- /dev/null
+++ b/gallery_dl/extractor/itchio.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://itch.io/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class ItchioGameExtractor(Extractor):
+    """Extractor for itch.io games"""
+    category = "itchio"
+    subcategory = "game"
+    root = "https://itch.io"
+    directory_fmt = ("{category}", "{user[name]}")
+    filename_fmt = "{game[title]} ({id}).{extension}"
+    archive_fmt = "{id}"
+    pattern = r"(?:https?://)?(\w+).itch\.io/([\w-]+)"
+    test = (
+        ("https://sirtartarus.itch.io/a-craft-of-mine", {
+            "pattern": r"https://\w+\.ssl\.hwcdn\.net/upload2"
+                       r"/game/1983311/7723751\?",
+            "count": 1,
+            "keyword": {
+                "extension": "",
+                "filename": "7723751",
+                "game": {
+                    "id": 1983311,
+                    "noun": "game",
+                    "title": "A Craft Of Mine",
+                    "url": "https://sirtartarus.itch.io/a-craft-of-mine",
+                },
+                "user": {
+                    "id": 4060052,
+                    "name": "SirTartarus",
+                    "url": "https://sirtartarus.itch.io",
+                },
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        self.user, self.slug = match.groups()
+        Extractor.__init__(self, match)
+
+    def items(self):
+        game_url = "https://{}.itch.io/{}".format(self.user, self.slug)
+        page = self.request(game_url).text
+
+        params = {
+            "source": "view_game",
+            "as_props": "1",
+            "after_download_lightbox": "true",
+        }
+        headers = {
+            "Referer": game_url,
+            "X-Requested-With": "XMLHttpRequest",
+            "Origin": "https://{}.itch.io".format(self.user),
+        }
+        data = {
+            "csrf_token": text.unquote(self.session.cookies["itchio_token"]),
+        }
+
+        for upload_id in text.extract_iter(page, 'data-upload_id="', '"'):
+            file_url = "{}/file/{}".format(game_url, upload_id)
+            info = self.request(file_url, method="POST", params=params,
+                                headers=headers, data=data).json()
+
+            game = info["lightbox"]["game"]
+            user = info["lightbox"]["user"]
+            game["url"] = game_url
+            user.pop("follow_button", None)
+            game = {"game": game, "user": user, "id": upload_id}
+
+            url = info["url"]
+            yield Message.Directory, game
+            yield Message.Url, url, text.nameext_from_url(url, game)
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 5ba18a3..6fd9f49 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -16,21 +16,26 @@ BASE_PATTERN = r"(?:https?://)?((?:chap|read|www\.|m\.)?mangan(?:at|el)o\.com)"
 class ManganeloBase():
     category = "manganelo"
     root = "https://chapmanganato.com"
+    _match_chapter = None
 
     def __init__(self, match):
         domain, path = match.groups()
         super().__init__(match, "https://" + domain + path)
         self.session.headers['Referer'] = self.root
 
-        self._match_chapter = re.compile(
-            r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
-            r"[Cc]hapter\s*([^:]+)"
-            r"(?::\s*(.+))?").match
+        if self._match_chapter is None:
+            ManganeloBase._match_chapter = re.compile(
+                r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
+                r"[Cc]hapter\s*(\d+)([^:]*)"
+                r"(?::\s*(.+))?").match
 
     def _parse_chapter(self, info, manga, author, date=None):
         match = self._match_chapter(info)
-        volume, chapter, title = match.groups() if match else ("", "", info)
-        chapter, sep, minor = chapter.partition(".")
+        if match:
+            volume, chapter, minor, title = match.groups()
+        else:
+            volume = chapter = minor = ""
+            title = info
 
         return {
             "manga"        : manga,
@@ -39,7 +44,7 @@ class ManganeloBase():
             "title"        : text.unescape(title) if title else "",
             "volume"       : text.parse_int(volume),
             "chapter"      : text.parse_int(chapter),
-            "chapter_minor": sep + minor,
+            "chapter_minor": minor,
             "lang"         : "en",
             "language"     : "English",
         }
@@ -61,6 +66,10 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
             "keyword": "06e01fa9b3fc9b5b954c0d4a98f0153b40922ded",
             "count": 45,
         }),
+        ("https://chapmanganato.com/manga-no991297/chapter-8", {
+            "keyword": {"chapter": 8, "chapter_minor": "-1"},
+            "count": 20,
+        }),
         ("https://readmanganato.com/manga-gn983696/chapter-23"),
         ("https://manganelo.com/chapter/gamers/chapter_15"),
         ("https://manganelo.com/chapter/gq921227/chapter_23"),
diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py
index 0f79d7f..24e676f 100644
--- a/gallery_dl/extractor/nana.py
+++ b/gallery_dl/extractor/nana.py
@@ -20,19 +20,23 @@ class NanaGalleryExtractor(GalleryExtractor):
           "059f7de55a4297413bfbd432ce7d6e724dd42bae"), {
             "pattern": r"https://nana\.my\.id/reader/"
                        r"\w+/image/page\?path=.*\.\w+",
-            "title"  : "Everybody Loves Shion",
-            "artist" : "fuzui",
-            "tags"   : list,
-            "count"  : 29,
+            "keyword": {
+                "title" : "Everybody Loves Shion",
+                "artist": "fuzui",
+                "tags"  : list,
+                "count" : 29,
+            },
         }),
         (("https://nana.my.id/reader/"
           "77c8712b67013e427923573379f5bafcc0c72e46"), {
             "pattern": r"https://nana\.my\.id/reader/"
                        r"\w+/image/page\?path=.*\.\w+",
-            "title"  : "Lovey-Dovey With an Otaku-Friendly Gyaru",
-            "artist" : "Sueyuu",
-            "tags"   : ["Sueyuu"],
-            "count"  : 58,
+            "keyword": {
+                "title" : "Lovey-Dovey With an Otaku-Friendly Gyaru",
+                "artist": "Sueyuu",
+                "tags"  : ["Sueyuu"],
+                "count" : 58,
+            },
         }),
     )
 
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 5f4ceea..beb3da2 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -162,7 +162,11 @@ class NitterExtractor(BaseExtractor):
         banner = extr('class="profile-banner"><a href="', '"')
 
         try:
-            uid = banner.split("%2F")[4]
+            if "/enc/" in banner:
+                uid = binascii.a2b_base64(banner.rpartition(
+                    "/")[2]).decode().split("/")[4]
+            else:
+                uid = banner.split("%2F")[4]
         except Exception:
             uid = 0
 
@@ -302,7 +306,10 @@ class NitterTweetsExtractor(NitterExtractor):
                        r"/media%2FCGMNYZvW0AIVoom\.jpg",
             "range": "1",
         }),
-        ("https://nitter.1d4.us/supernaturepics"),
+        ("https://nitter.1d4.us/supernaturepics", {
+            "range": "1",
+            "keyword": {"user": {"id": "2976459548"}},
+        }),
         ("https://nitter.kavin.rocks/id:2976459548"),
         ("https://nitter.unixfox.eu/supernaturepics"),
     )
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index f381f12..af2a367 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -63,10 +63,20 @@ class NozomiExtractor(Extractor):
             yield Message.Directory, post
             for post["num"], image in enumerate(images, 1):
                 post["filename"] = post["dataid"] = did = image["dataid"]
-                post["extension"] = ext = image["type"]
                 post["is_video"] = video = bool(image.get("is_video"))
+
+                ext = image["type"]
+                if video:
+                    subdomain = "v"
+                elif ext == "gif":
+                    subdomain = "g"
+                else:
+                    subdomain = "w"
+                    ext = "webp"
+
+                post["extension"] = ext
                 post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
-                    "v" if video else "i", did[-1], did[-3:-1], did, ext)
+                    subdomain, did[-1], did[-3:-1], did, ext)
                 yield Message.Url, url, post
 
     def posts(self):
@@ -97,15 +107,17 @@ class NozomiPostExtractor(NozomiExtractor):
     pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
     test = (
         ("https://nozomi.la/post/3649262.html", {
-            "url": "f4522adfc8159355fd0476de28761b5be0f02068",
-            "content": "cd20d2c5149871a0b80a1b0ce356526278964999",
+            "url": "e5525e717aec712843be8b88592d6406ae9e60ba",
+            "pattern": r"https://w\.nozomi\.la/2/15/aaa9f7c632cde1e1a5baaff3fb"
+                       r"6a6d857ec73df7fdc5cf5a358caf604bf73152\.webp",
+            "content": "6d62c4a7fea50c0a89d499603c4e7a2b4b9bffa8",
             "keyword": {
                 "artist"   : ["hammer (sunset beach)"],
                 "character": ["patchouli knowledge"],
                 "copyright": ["touhou"],
                 "dataid"   : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
                 "date"     : "dt:2016-07-26 02:32:03",
-                "extension": "jpg",
+                "extension": "webp",
                 "filename" : str,
                 "height"   : 768,
                 "is_video" : False,
@@ -118,14 +130,26 @@ class NozomiPostExtractor(NozomiExtractor):
         }),
         #  multiple images per post
         ("https://nozomi.la/post/25588032.html", {
-            "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
-            "keyword": "2a2998af93c6438863c4077bd386b613b8bc2957",
+            "url": "fb956ccedcf2cf509739d26e2609e910244aa56c",
+            "keyword": "516ca5cbd0d2a46a8ce26679d6e08de5ac42184b",
             "count": 7,
         }),
         # empty 'date' (#1163)
         ("https://nozomi.la/post/130309.html", {
             "keyword": {"date": None},
-        })
+        }),
+        # gif
+        ("https://nozomi.la/post/1647.html", {
+            "pattern": r"https://g\.nozomi\.la/a/f0/d1b06469e00d72e4f6346209c1"
+                       r"49db459d76b58a074416c260ed93cc31fa9f0a\.gif",
+            "content": "952efb78252bbc9fb56df2e8fafb68d5e6364181",
+        }),
+        # video
+        ("https://nozomi.la/post/2269847.html", {
+            "pattern": r"https://v\.nozomi\.la/d/0e/ff88398862669783691b31519f"
+                       r"2bea3a35c24b6e62e3ba2d89b4409e41c660ed\.webm",
+            "content": "57065e6c16da7b1c7098a63b36fb0c6c6f1b9bca",
+        }),
     )
 
     def __init__(self, match):
@@ -160,7 +184,7 @@ class NozomiTagExtractor(NozomiExtractor):
     archive_fmt = "t_{search_tags}_{dataid}"
     pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
     test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
-        "pattern": r"^https://[iv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
+        "pattern": r"^https://[wgv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
         "count": ">= 25",
         "range": "1-25",
     })
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index ec46ca3..404f296 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -71,8 +71,11 @@ class OAuthBase(Extractor):
 
         browser = self.config("browser", True)
         if browser:
-            import webbrowser
-            browser = webbrowser.get()
+            try:
+                import webbrowser
+                browser = webbrowser.get()
+            except Exception:
+                browser = None
 
         if browser and browser.open(url):
             name = getattr(browser, "name", "Browser")
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 56e3b39..f0a50c8 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -14,7 +14,7 @@ from .. import text
 
 class PahealExtractor(Extractor):
     """Base class for paheal extractors"""
-    basecategory = "booru"
+    basecategory = "shimmie2"
     category = "paheal"
     filename_fmt = "{category}_{id}_{md5}.{extension}"
     archive_fmt = "{id}"
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a17518f..b704031 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -596,6 +596,9 @@ class PixivSearchExtractor(PixivExtractor):
         sort_map = {
             "date": "date_asc",
             "date_d": "date_desc",
+            "popular_d": "popular_desc",
+            "popular_male_d": "popular_male_desc",
+            "popular_female_d": "popular_female_desc",
         }
         try:
             self.sort = sort = sort_map[sort]
@@ -670,7 +673,7 @@ class PixivPixivisionExtractor(PixivExtractor):
 
     def works(self):
         return (
-            self.api.illust_detail(illust_id)
+            self.api.illust_detail(illust_id.partition("?")[0])
             for illust_id in util.unique_sequence(text.extract_iter(
                 self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
         )
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 305de2a..cefe8d3 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -303,8 +303,8 @@ class RedditImageExtractor(Extractor):
     category = "reddit"
     subcategory = "image"
     archive_fmt = "{filename}"
-    pattern = (r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)"
-               r"/[^/?#]+(?:\?[^#]*)?")
+    pattern = (r"(?:https?://)?((?:i|preview)\.redd\.it|i\.reddituploads\.com)"
+               r"/([^/?#]+)(\?[^#]*)?")
     test = (
         ("https://i.redd.it/upjtjcx2npzz.jpg", {
             "url": "0de614900feef103e580b632190458c0b62b641a",
@@ -315,12 +315,29 @@ class RedditImageExtractor(Extractor):
             "url": "f24f25efcedaddeec802e46c60d77ef975dc52a5",
             "content": "541dbcc3ad77aa01ee21ca49843c5e382371fae7",
         }),
+        # preview.redd.it -> i.redd.it
+        (("https://preview.redd.it/00af44lpn0u51.jpg?width=960&crop=smart"
+         "&auto=webp&v=enabled&s=dbca8ab84033f4a433772d9c15dbe0429c74e8ac"), {
+            "pattern": r"^https://i\.redd\.it/00af44lpn0u51\.jpg$"
+        }),
     )
 
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        domain = match.group(1)
+        self.path = match.group(2)
+        if domain == "preview.redd.it":
+            self.domain = "i.redd.it"
+            self.query = ""
+        else:
+            self.domain = domain
+            self.query = match.group(3) or ""
+
     def items(self):
-        data = text.nameext_from_url(self.url)
+        url = "https://{}/{}{}".format(self.domain, self.path, self.query)
+        data = text.nameext_from_url(url)
         yield Message.Directory, data
-        yield Message.Url, self.url, data
+        yield Message.Url, url, data
 
 
 class RedditAPI():
@@ -459,6 +476,9 @@ class RedditAPI():
     def _pagination(self, endpoint, params):
         id_min = self._parse_id("id-min", 0)
         id_max = self._parse_id("id-max", float("inf"))
+        if id_max == 2147483647:
+            self.log.debug("Ignoring 'id-max' setting \"zik0zj\"")
+            id_max = float("inf")
         date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
 
         while True:
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index ea4cf43..f36051b 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -13,6 +13,7 @@ from .common import Message
 from .. import text, util, exception
 from ..cache import cache
 import collections
+import re
 
 BASE_PATTERN = r"(?:https?://)?" \
     r"(?:(?:chan|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
@@ -101,6 +102,11 @@ class SankakuTagExtractor(SankakuExtractor):
         # match arbitrary query parameters
         ("https://chan.sankakucomplex.com"
          "/?tags=marie_rose&page=98&next=3874906&commit=Search"),
+        # 'date:' tags (#1790)
+        ("https://chan.sankakucomplex.com/?tags=date:2023-03-20", {
+            "range": "1",
+            "count": 1,
+        }),
     )
 
     def __init__(self, match):
@@ -108,6 +114,15 @@ class SankakuTagExtractor(SankakuExtractor):
         query = text.parse_query(match.group(1))
         self.tags = text.unquote(query.get("tags", "").replace("+", " "))
 
+        if "date:" in self.tags:
+            # rewrite 'date:' tags (#1790)
+            self.tags = re.sub(
+                r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)",
+                r"date:\3.\2.\1", self.tags)
+            self.tags = re.sub(
+                r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)",
+                r"date:\1.\2.\3", self.tags)
+
     def metadata(self):
         return {"search_tags": self.tags}
 
@@ -153,7 +168,7 @@ class SankakuPostExtractor(SankakuExtractor):
     """Extractor for single posts from sankaku.app"""
     subcategory = "post"
     archive_fmt = "{id}"
-    pattern = BASE_PATTERN + r"/post/show/(\d+)"
+    pattern = BASE_PATTERN + r"/post/show/([0-9a-f]+)"
     test = (
         ("https://sankaku.app/post/show/360451", {
             "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
@@ -181,6 +196,17 @@ class SankakuPostExtractor(SankakuExtractor):
                 "tags_general": ["key(mangaka)", "key(mangaka)"],
             },
         }),
+        # md5 hexdigest instead of ID (#3952)
+        (("https://chan.sankakucomplex.com/post/show"
+          "/f8ba89043078f0e4be2d9c46550b840a"), {
+            "pattern": r"https://s\.sankakucomplex\.com"
+                       r"/data/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
+            "count": 1,
+            "keyword": {
+                "id": 33195194,
+                "md5": "f8ba89043078f0e4be2d9c46550b840a",
+            },
+        }),
         ("https://chan.sankakucomplex.com/post/show/360451"),
         ("https://chan.sankakucomplex.com/ja/post/show/360451"),
         ("https://beta.sankakucomplex.com/post/show/360451"),
@@ -248,7 +274,7 @@ class SankakuAPI():
             "lang" : "en",
             "page" : "1",
             "limit": "1",
-            "tags" : "id_range:" + post_id,
+            "tags" : ("md5:" if len(post_id) == 32 else "id_range:") + post_id,
         }
         return self._call("/posts", params)
 
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
new file mode 100644
index 0000000..285cd8f
--- /dev/null
+++ b/gallery_dl/extractor/shimmie2.py
@@ -0,0 +1,326 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Shimmie2 instances"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class Shimmie2Extractor(BaseExtractor):
+    """Base class for shimmie2 extractors"""
+    basecategory = "shimmie2"
+    filename_fmt = "{category}_{id}{md5:?_//}.{extension}"
+    archive_fmt = "{id}"
+
+    def __init__(self, match):
+        BaseExtractor.__init__(self, match)
+
+        try:
+            instance = INSTANCES[self.category]
+        except KeyError:
+            pass
+        else:
+            cookies = instance.get("cookies")
+            if cookies:
+                domain = self.root.rpartition("/")[2]
+                self._update_cookies_dict(cookies, domain=domain)
+            file_url = instance.get("file_url")
+            if file_url:
+                self.file_url_fmt = file_url
+
+    def items(self):
+        data = self.metadata()
+
+        for post in self.posts():
+
+            for key in ("id", "width", "height"):
+                post[key] = text.parse_int(post[key])
+            post["tags"] = text.unquote(post["tags"])
+            post.update(data)
+
+            url = post["file_url"]
+            if "/index.php?" in url:
+                post["filename"], _, post["extension"] = \
+                    url.rpartition("/")[2].rpartition(".")
+            else:
+                text.nameext_from_url(url, post)
+
+            yield Message.Directory, post
+            yield Message.Url, url, post
+
+    def metadata(self):
+        """Return general metadata"""
+        return ()
+
+    def posts(self):
+        """Return an iterable containing data of all relevant posts"""
+        return ()
+
+
+INSTANCES = {
+    "mememuseum": {
+        "root": "https://meme.museum",
+        "pattern": r"meme\.museum",
+    },
+    "loudbooru": {
+        "root": "https://loudbooru.com",
+        "pattern": r"loudbooru\.com",
+        "cookies": {"ui-tnc-agreed": "true"},
+    },
+    "giantessbooru": {
+        "root": "https://giantessbooru.com",
+        "pattern": r"giantessbooru\.com",
+        "cookies": {"agreed": "true"},
+    },
+    "tentaclerape": {
+        "root": "https://tentaclerape.net",
+        "pattern": r"tentaclerape\.net",
+    },
+    "cavemanon": {
+        "root": "https://booru.cavemanon.xyz",
+        "pattern": r"booru\.cavemanon\.xyz",
+        "file_url": "{0}/index.php?q=image/{2}.{4}"
+    },
+}
+
+BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=)?"
+
+
+class Shimmie2TagExtractor(Shimmie2Extractor):
+    """Extractor for shimmie2 posts by tag search"""
+    subcategory = "tag"
+    directory_fmt = ("{category}", "{search_tags}")
+    file_url_fmt = "{}/_images/{}/{}%20-%20{}.{}"
+    pattern = BASE_PATTERN + r"post/list/([^/?#]+)(?:/(\d+))?()"
+    test = (
+        ("https://meme.museum/post/list/animated/1", {
+            "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
+            "count": ">= 30"
+        }),
+        ("https://loudbooru.com/post/list/original_character/1", {
+            "pattern": r"https://loudbooru\.com/_images/[0-9a-f]{32}/\d+",
+            "range": "1-100",
+            "count": 100,
+        }),
+        ("https://giantessbooru.com/post/list/smiling/1", {
+            "pattern": r"https://giantessbooru\.com/_images/[0-9a-f]{32}/\d+",
+            "range": "1-100",
+            "count": 100,
+        }),
+        ("https://tentaclerape.net/post/list/comic/1", {
+            "pattern": r"https://tentaclerape\.net/_images/[0-9a-f]{32}/\d+",
+            "range": "1-100",
+            "count": 100,
+        }),
+        ("https://booru.cavemanon.xyz/index.php?q=post/list/Amber/1", {
+            "pattern": r"https://booru\.cavemanon\.xyz"
+                       r"/index\.php\?q=image/\d+\.\w+",
+            "range": "1-100",
+            "count": 100,
+        }),
+    )
+
+    def __init__(self, match):
+        Shimmie2Extractor.__init__(self, match)
+        lastindex = match.lastindex
+        self.tags = text.unquote(match.group(lastindex-2))
+        self.page = match.group(lastindex-1)
+
+    def metadata(self):
+        return {"search_tags": self.tags}
+
+    def posts(self):
+        pnum = text.parse_int(self.page, 1)
+        file_url_fmt = self.file_url_fmt.format
+
+        init = True
+        mime = ""
+
+        while True:
+            url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
+            page = self.request(url).text
+            extr = text.extract_from(page)
+
+            if init:
+                init = False
+                has_mime = ("data-mime='" in page)
+                has_pid = ("data-post-id='" in page)
+
+            while True:
+                if has_mime:
+                    mime = extr("data-mime='", "'")
+                if has_pid:
+                    pid = extr("data-post-id='", "'")
+                else:
+                    pid = extr("href='/post/view/", "?")
+
+                if not pid:
+                    break
+
+                tags, dimensions, size = extr("title='", "'").split(" // ")
+                width, _, height = dimensions.partition("x")
+                md5 = extr("/_thumbs/", "/")
+
+                yield {
+                    "file_url": file_url_fmt(
+                        self.root, md5, pid, text.quote(tags),
+                        mime.rpartition("/")[2] if mime else "jpg"),
+                    "id": pid,
+                    "md5": md5,
+                    "tags": tags,
+                    "width": width,
+                    "height": height,
+                    "size": text.parse_bytes(size[:-1]),
+                }
+
+            pnum += 1
+            if not extr(">Next<", ">"):
+                if not extr("/{}'>{}<".format(pnum, pnum), ">"):
+                    return
+
+
+class Shimmie2PostExtractor(Shimmie2Extractor):
+    """Extractor for single shimmie2 posts"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"post/view/(\d+)"
+    test = (
+        ("https://meme.museum/post/view/10243", {
+            "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc"
+                       r"49971f78/10243%20-%20g%20beard%20open_source%20richar"
+                       r"d_stallman%20stallman%20tagme%20text\.jpg",
+            "content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
+            "keyword": {
+                "extension": "jpg",
+                "file_url": "https://meme.museum/_images/105febebcd5ca791ee332"
+                            "adc49971f78/10243%20-%20g%20beard%20open_source%2"
+                            "0richard_stallman%20stallman%20tagme%20text.jpg",
+                "filename": "10243 - g beard open_source richard_stallman "
+                            "stallman tagme text",
+                "height": 451,
+                "id": 10243,
+                "md5": "105febebcd5ca791ee332adc49971f78",
+                "size": 0,
+                "subcategory": "post",
+                "tags": "/g/ beard open_source "
+                        "richard_stallman stallman tagme text",
+                "width": 480,
+            },
+        }),
+        ("https://loudbooru.com/post/view/33828", {
+            "pattern": r"https://loudbooru\.com/_images/.+\.png",
+            "content": "a4755f787ba23ae2aa297a46810f802ca9032739",
+            "keyword": {
+                "extension": "png",
+                "file_url": "https://loudbooru.com/_images/ca2638d903c86e8337f"
+                            "e9aeb4974be88/33828%20-%202020%20artist%3Astikyfi"
+                            "nkaz%20character%3Alisa_loud%20cover%20fanfiction"
+                            "%3Aplatz_eins%20frowning%20half-closed_eyes%20sol"
+                            "o%20text%20title_card.png",
+                "filename": "33828 - 2020 artist:stikyfinkaz character:lisa_"
+                            "loud cover fanfiction:platz_eins frowning "
+                            "half-closed_eyes solo text title_card",
+                "height": 1920,
+                "id": 33828,
+                "md5": "ca2638d903c86e8337fe9aeb4974be88",
+                "tags": "2020 artist:stikyfinkaz character:lisa_loud cover "
+                        "fanfiction:platz_eins frowning half-closed_eyes "
+                        "solo text title_card",
+                "width": 1078,
+            },
+        }),
+        ("https://giantessbooru.com/post/view/41", {
+            "pattern": r"https://giantessbooru\.com/_images"
+                       r"/3f67e1986496806b7b14ff3e82ac5af4/41\.jpg",
+            "content": "79115ed309d1f4e82e7bead6948760e889139c91",
+            "keyword": {
+                "extension": "jpg",
+                "file_url": "https://giantessbooru.com/_images"
+                            "/3f67e1986496806b7b14ff3e82ac5af4/41.jpg",
+                "filename": "41",
+                "height": 0,
+                "id": 41,
+                "md5": "3f67e1986496806b7b14ff3e82ac5af4",
+                "size": 0,
+                "tags": "anime bare_midriff color drawing gentle giantess "
+                        "karbo looking_at_tinies negeyari outdoors smiling "
+                        "snake_girl white_hair",
+                "width": 0
+
+
+            },
+        }),
+        ("https://tentaclerape.net/post/view/10", {
+            "pattern": r"https://tentaclerape\.net/\./index\.php"
+                       r"\?q=/image/10\.jpg",
+            "content": "d0fd8f0f6517a76cb5e23ba09f3844950bf2c516",
+            "keyword": {
+                "extension": "jpg",
+                "file_url": "https://tentaclerape.net/./index.php"
+                            "?q=/image/10.jpg",
+                "filename": "10",
+                "height": 427,
+                "id": 10,
+                "md5": "945db71eeccaef82ce44b77564260c0b",
+                "size": 0,
+                "subcategory": "post",
+                "tags": "Deviant_Art Pet Tentacle artist_sche blonde_hair "
+                        "blouse boots green_eyes highheels leash miniskirt "
+                        "octopus schoolgirl white_skin willing",
+                "width": 300,
+            },
+        }),
+        # video
+        ("https://tentaclerape.net/post/view/91267", {
+            "pattern": r"https://tentaclerape\.net/\./index\.php"
+                       r"\?q=/image/91267\.mp4",
+        }),
+        ("https://booru.cavemanon.xyz/index.php?q=post/view/8335", {
+            "pattern": r"https://booru\.cavemanon\.xyz"
+                       r"/index\.php\?q=image/8335\.png",
+            "content": "7158f7e4abbbf143bad5835eb93dbe4d68c1d4ab",
+            "keyword": {
+                "extension": "png",
+                "file_url": "https://booru.cavemanon.xyz"
+                            "/index.php?q=image/8335.png",
+                "filename": "8335",
+                "height": 460,
+                "id": 8335,
+                "md5": "",
+                "size": 0,
+                "tags": "Color Fang",
+                "width": 459,
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        Shimmie2Extractor.__init__(self, match)
+        self.post_id = match.group(match.lastindex)
+
+    def posts(self):
+        url = "{}/post/view/{}".format(self.root, self.post_id)
+        extr = text.extract_from(self.request(url).text)
+
+        post = {
+            "id"      : self.post_id,
+            "tags"    : extr(": ", "<").partition(" - ")[0].rstrip(")"),
+            "md5"     : extr("/_thumbs/", "/"),
+            "file_url": self.root + (
+                extr("id='main_image' src='", "'") or
+                extr("<source src='", "'")),
+            "width"   : extr("data-width=", " ").strip("\"'"),
+            "height"  : extr("data-height=", ">").partition(
+                " ")[0].strip("\"'"),
+            "size"    : 0,
+        }
+
+        if not post["md5"]:
+            post["md5"] = text.extr(post["file_url"], "/_images/", "/")
+
+        return (post,)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 155db1e..b45609d 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,7 @@
 
 from .common import Extractor, Message
 from .. import text, oauth, exception
-from datetime import datetime, timedelta
+from datetime import datetime, date, timedelta
 import re
 
 
@@ -269,7 +269,7 @@ class TumblrExtractor(Extractor):
 
 
 class TumblrUserExtractor(TumblrExtractor):
-    """Extractor for all images from a tumblr-user"""
+    """Extractor for a Tumblr user's posts"""
     subcategory = "user"
     pattern = BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$"
     test = (
@@ -307,6 +307,16 @@ class TumblrUserExtractor(TumblrExtractor):
             "options": (("date-min", "201804"), ("date-max", "201805"),
                         ("date-format", "%Y%m"))
         }),
+        # pagination with 'date-max' (#2191) and 'api-key'
+        ("https://donttrustthetits.tumblr.com/", {
+            "options": (
+                ("access-token", None),
+                ("original", False),
+                ("date-max", "2015-04-25T00:00:00"),
+                ("date-min", "2015-04-01T00:00:00"),
+            ),
+            "count": 316,
+        }),
         ("https://demo.tumblr.com/page/2"),
         ("https://demo.tumblr.com/archive"),
         ("tumblr:http://www.b-authentique.com/"),
@@ -321,7 +331,7 @@ class TumblrUserExtractor(TumblrExtractor):
 
 
 class TumblrPostExtractor(TumblrExtractor):
-    """Extractor for images from a single post on tumblr"""
+    """Extractor for a single Tumblr post"""
     subcategory = "post"
     pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)"
     test = (
@@ -389,7 +399,7 @@ class TumblrPostExtractor(TumblrExtractor):
 
 
 class TumblrTagExtractor(TumblrExtractor):
-    """Extractor for images from a tumblr-user by tag"""
+    """Extractor for Tumblr user's posts by tag"""
     subcategory = "tag"
     pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
     test = (
@@ -411,8 +421,37 @@ class TumblrTagExtractor(TumblrExtractor):
         return self.api.posts(self.blog, {"tag": self.tag})
 
 
+class TumblrDayExtractor(TumblrExtractor):
+    """Extractor for Tumblr user's posts by day"""
+    subcategory = "day"
+    pattern = BASE_PATTERN + r"/day/(\d\d\d\d/\d\d/\d\d)"
+    test = (
+        ("https://mikf123.tumblr.com/day/2018/01/05", {
+            "pattern": r"https://64\.media\.tumblr\.com"
+                       r"/1a2be8c63f1df58abd2622861696c72a"
+                       r"/tumblr_ozm9nqst9t1wgha4yo1_1280\.jpg",
+            "keyword": {"id": 169341068404},
+            "count": 1,
+        }),
+        ("https://www.tumblr.com/blog/view/mikf123/day/2018/01/05"),
+        ("https://www.tumblr.com/blog/mikf123/day/2018/01/05"),
+        ("https://www.tumblr.com/mikf123/day/2018/01/05"),
+    )
+
+    def __init__(self, match):
+        TumblrExtractor.__init__(self, match)
+        year, month, day = match.group(4).split("/")
+        self.date_min = ts = (
+            # 719163 == date(1970, 1, 1).toordinal()
+            date(int(year), int(month), int(day)).toordinal() - 719163) * 86400
+        self.api.before = ts + 86400
+
+    def posts(self):
+        return self.api.posts(self.blog, {})
+
+
 class TumblrLikesExtractor(TumblrExtractor):
-    """Extractor for images from a tumblr-user's liked posts"""
+    """Extractor for a Tumblr user's liked posts"""
     subcategory = "likes"
     directory_fmt = ("{category}", "{blog_name}", "likes")
     archive_fmt = "f_{blog[name]}_{id}_{num}"
@@ -431,7 +470,11 @@ class TumblrLikesExtractor(TumblrExtractor):
 
 
 class TumblrAPI(oauth.OAuth1API):
-    """Minimal interface for the Tumblr API v2"""
+    """Interface for the Tumblr API v2
+
+    https://github.com/tumblr/docs/blob/master/api.md
+    """
+    ROOT = "https://api.tumblr.com"
     API_KEY = "O3hU2tMi5e4Qs5t3vezEi6L0qRORJ5y9oUpSGsrWu8iA3UCc3B"
     API_SECRET = "sFdsK3PDdP2QpYMRAoq0oDnw0sFS24XigXmdfnaeNZpJpqAn03"
     BLOG_CACHE = {}
@@ -442,55 +485,46 @@ class TumblrAPI(oauth.OAuth1API):
 
     def info(self, blog):
         """Return general information about a blog"""
-        if blog not in self.BLOG_CACHE:
-            self.BLOG_CACHE[blog] = self._call(blog, "info", {})["blog"]
-        return self.BLOG_CACHE[blog]
+        try:
+            return self.BLOG_CACHE[blog]
+        except KeyError:
+            endpoint = "/v2/blog/{}/info".format(blog)
+            params = {"api_key": self.api_key} if self.api_key else None
+            self.BLOG_CACHE[blog] = blog = self._call(endpoint, params)["blog"]
+            return blog
 
     def avatar(self, blog, size="512"):
         """Retrieve a blog avatar"""
         if self.api_key:
-            url_fmt = "https://api.tumblr.com/v2/blog/{}/avatar/{}?api_key={}"
-            return url_fmt.format(blog, size, self.api_key)
+            return "{}/v2/blog/{}/avatar/{}?api_key={}".format(
+                self.ROOT, blog, size, self.api_key)
+        endpoint = "/v2/blog/{}/avatar".format(blog)
         params = {"size": size}
-        data = self._call(blog, "avatar", params, allow_redirects=False)
-        return data["avatar_url"]
+        return self._call(
+            endpoint, params, allow_redirects=False)["avatar_url"]
 
     def posts(self, blog, params):
         """Retrieve published posts"""
-        params["offset"] = self.extractor.config("offset") or 0
-        params["limit"] = 50
+        params["offset"] = self.extractor.config("offset")
+        params["limit"] = "50"
         params["reblog_info"] = "true"
+        params["type"] = self.posts_type
+        params["before"] = self.before
 
-        if self.posts_type:
-            params["type"] = self.posts_type
-        if self.before:
-            params["before"] = self.before
+        if self.before and params["offset"]:
+            self.log.warning("'offset' and 'date-max' cannot be used together")
 
-        while True:
-            data = self._call(blog, "posts", params)
-            self.BLOG_CACHE[blog] = data["blog"]
-            yield from data["posts"]
-            params["offset"] += params["limit"]
-            if params["offset"] >= data["total_posts"]:
-                return
+        return self._pagination(blog, "/posts", params, cache=True)
 
     def likes(self, blog):
         """Retrieve liked posts"""
         params = {"limit": "50", "before": self.before}
-        while True:
-            posts = self._call(blog, "likes", params)["liked_posts"]
-            if not posts:
-                return
-            yield from posts
-            params["before"] = posts[-1]["liked_timestamp"]
-
-    def _call(self, blog, endpoint, params, **kwargs):
-        if self.api_key:
-            params["api_key"] = self.api_key
-        url = "https://api.tumblr.com/v2/blog/{}/{}".format(
-            blog, endpoint)
+        return self._pagination(blog, "/likes", params, key="liked_posts")
 
-        response = self.request(url, params=params, **kwargs)
+    def _call(self, endpoint, params, **kwargs):
+        url = self.ROOT + endpoint
+        kwargs["params"] = params
+        response = self.request(url, **kwargs)
 
         try:
             data = response.json()
@@ -535,7 +569,7 @@ class TumblrAPI(oauth.OAuth1API):
 
                 if self.extractor.config("ratelimit") == "wait":
                     self.extractor.wait(seconds=reset)
-                    return self._call(blog, endpoint, params)
+                    return self._call(endpoint, params, **kwargs)
 
                 t = (datetime.now() + timedelta(seconds=float(reset))).time()
                 raise exception.StopExtraction(
@@ -547,6 +581,29 @@ class TumblrAPI(oauth.OAuth1API):
             if reset:
                 self.log.info("Hourly API rate limit exceeded")
                 self.extractor.wait(seconds=reset)
-                return self._call(blog, endpoint, params)
+                return self._call(endpoint, params, **kwargs)
 
         raise exception.StopExtraction(data)
+
+    def _pagination(self, blog, endpoint, params, key="posts", cache=False):
+        endpoint = "/v2/blog/{}{}".format(blog, endpoint)
+        if self.api_key:
+            params["api_key"] = self.api_key
+
+        while True:
+            data = self._call(endpoint, params)
+
+            if cache:
+                self.BLOG_CACHE[blog] = data["blog"]
+                cache = False
+
+            yield from data[key]
+
+            try:
+                endpoint = data["_links"]["next"]["href"]
+            except KeyError:
+                return
+
+            params = None
+            if self.api_key:
+                endpoint += "&api_key=" + self.api_key
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2ccc7e5..5e68f13 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -45,7 +45,8 @@ class TwitterExtractor(Extractor):
         if not self.config("transform", True):
             self._transform_user = util.identity
             self._transform_tweet = util.identity
-        self._user = self._user_obj = None
+        self._user = None
+        self._user_obj = None
         self._user_cache = {}
         self._init_sizes()
 
@@ -769,6 +770,13 @@ class TwitterTweetExtractor(TwitterExtractor):
             "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
             "count": 4,
         }),
+        # different 'user' and 'author' in quoted Tweet (#3922)
+        ("https://twitter.com/web/status/1644907989109751810", {
+            "keyword": {
+                "author": {"id": 321629993         , "name": "Cakes_Comics"},
+                "user"  : {"id": 718928225360080897, "name": "StobiesGalaxy"},
+            },
+        }),
         # TwitPic embeds (#579)
         ("https://twitter.com/i/web/status/112900228289540096", {
             "options": (("twitpic", True), ("cards", False)),
@@ -897,7 +905,8 @@ Your reaction.""",
         for tweet in self.api.tweet_detail(tweet_id):
             if tweet["rest_id"] == tweet_id or \
                     tweet.get("_retweet_id_str") == tweet_id:
-                self._assign_user(tweet["core"]["user_results"]["result"])
+                if self._user_obj is None:
+                    self._assign_user(tweet["core"]["user_results"]["result"])
                 tweets.append(tweet)
 
                 tweet_id = tweet["legacy"].get("quoted_status_id_str")
@@ -1561,9 +1570,9 @@ class TwitterAPI():
 
                 if esw("tweet-"):
                     tweets.append(entry)
-                elif esw("homeConversation-"):
-                    tweets.extend(entry["content"]["items"])
-                elif esw("conversationthread-"):
+                elif esw(("homeConversation-",
+                          "profile-conversation-",
+                          "conversationthread-")):
                     tweets.extend(entry["content"]["items"])
                 elif esw("tombstone-"):
                     item = entry["content"]["itemContent"]
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
new file mode 100644
index 0000000..1cebdf7
--- /dev/null
+++ b/gallery_dl/extractor/vipergirls.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://vipergirls.to/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
+
+
+class VipergirlsExtractor(Extractor):
+    """Base class for vipergirls extractors"""
+    category = "vipergirls"
+    root = "https://vipergirls.to"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.session.headers["Referer"] = self.root
+
+    def items(self):
+        for html in self.posts():
+
+            pos = html.find('<a href="')
+            if pos < 0:
+                continue
+
+            title = text.extr(html, '<h2 class="title', '<')
+            data = {
+                "title": text.unescape(title.partition(">")[2].strip()),
+            }
+
+            yield Message.Directory, data
+            for href in text.extract_iter(html, '<a href="', '"', pos):
+                yield Message.Queue, href, data
+
+
+class VipergirlsThreadExtractor(VipergirlsExtractor):
+    """Extractor for vipergirls threads"""
+    subcategory = "thread"
+    pattern = BASE_PATTERN + r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?$"
+    test = (
+        (("https://vipergirls.to/threads/4328304"
+          "-2011-05-28-Danica-Simply-Beautiful-x112-4500x3000"), {
+            "url": "b22feaa35a358bb36086c2b9353aee28989e1d7a",
+            "count": 227,
+        }),
+        ("https://vipergirls.to/threads/6858916-Karina/page4", {
+            "count": 1294,
+        }),
+        ("https://vipergirls.to/threads/4328304"),
+    )
+
+    def __init__(self, match):
+        VipergirlsExtractor.__init__(self, match)
+        self.thread_id, self.page = match.groups()
+
+    def posts(self):
+        url = "{}/threads/{}{}".format(
+            self.root, self.thread_id, self.page or "")
+
+        while True:
+            page = self.request(url).text
+            yield from text.extract_iter(
+                page, '<div class="postbody">', '</blockquote>')
+
+            url = text.extr(page, '<a rel="next" href="', '"')
+            if not url:
+                return
+            url = "{}/{}".format(self.root, url)
+
+
+class VipergirlsPostExtractor(VipergirlsExtractor):
+    """Extractor for vipergirls posts"""
+    subcategory = "post"
+    pattern = (BASE_PATTERN +
+               r"/threads/(\d+)(?:-[^/?#]+)?\?(p=\d+[^#]*)#post(\d+)")
+    test = (
+        (("https://vipergirls.to/threads/4328304-2011-05-28-Danica-Simply-"
+          "Beautiful-x112-4500x3000?p=116038081&viewfull=1#post116038081"), {
+            "pattern": r"https://vipr\.im/\w{12}$",
+            "range": "2-113",
+            "count": 112,
+            "keyword": {
+                "title": "FemJoy Danica - Simply Beautiful (x112) 3000x4500",
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        VipergirlsExtractor.__init__(self, match)
+        self.thread_id, self.query, self.post_id = match.groups()
+
+    def posts(self):
+        url = "{}/threads/{}?{}".format(self.root, self.thread_id, self.query)
+        page = self.request(url).text
+
+        try:
+            pos = page.index('id="post_' + self.post_id + '"')
+            return (text.extract(
+                page, '<div class="postbody">', '</blockquote>', pos)[0],)
+        except Exception:
+            raise exception.NotFoundError("post")
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 714f4fe..5004bed 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -124,10 +124,8 @@ class MetadataPP(PostProcessor):
         for key, func in self.fields.items():
             obj = kwdict
             try:
-                while "[" in key:
-                    name, _, key = key.partition("[")
-                    obj = obj[name]
-                    key = key.rstrip("]")
+                if "[" in key:
+                    obj, key = _traverse(obj, key)
                 obj[key] = func(kwdict)
             except Exception:
                 pass
@@ -137,10 +135,8 @@ class MetadataPP(PostProcessor):
         for key in self.fields:
             obj = kwdict
             try:
-                while "[" in key:
-                    name, _, key = key.partition("[")
-                    obj = obj[name]
-                    key = key.rstrip("]")
+                if "[" in key:
+                    obj, key = _traverse(obj, key)
                 del obj[key]
             except Exception:
                 pass
@@ -214,4 +210,15 @@ class MetadataPP(PostProcessor):
         )
 
 
+def _traverse(obj, key):
+    name, _, key = key.partition("[")
+    obj = obj[name]
+
+    while "[" in key:
+        name, _, key = key.partition("[")
+        obj = obj[name.strip("\"']")]
+
+    return obj, key.strip("\"']")
+
+
 __postprocessor__ = MetadataPP
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index c40736a..d4ef532 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.25.2"
+__version__ = "1.25.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index b4638b7..eb09b9b 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -409,9 +409,12 @@ def parse_command_line(module, argv):
         "postprocessor_args": opts.postprocessor_args,
         "cn_verification_proxy": opts.cn_verification_proxy,
         "geo_verification_proxy": opts.geo_verification_proxy,
-        "geo_bypass": opts.geo_bypass,
-        "geo_bypass_country": opts.geo_bypass_country,
-        "geo_bypass_ip_block": opts.geo_bypass_ip_block,
+        "geo_bypass": getattr(
+            opts, "geo_bypass", "default"),
+        "geo_bypass_country": getattr(
+            opts, "geo_bypass_country", None),
+        "geo_bypass_ip_block": getattr(
+            opts, "geo_bypass_ip_block", None),
         "compat_opts": compat_opts,
     }
author	Unit 193 <unit193@unit193.net>	2023-04-30 16:45:21 -0400
committer	Unit 193 <unit193@unit193.net>	2023-04-30 16:45:21 -0400
commit	33d4eae5a6df8aaf6757f52ae25f514ff1211c62 (patch)
tree	7ad425b022dcc1daea1c84c720a266f0134db705 /gallery_dl
parent	f98ab7aaca3c4acbd5a793267791749740330e9c (diff)