New upstream version 1.24.3.upstream/1.24.3

author: Unit 193 <unit193@unit193.net> 2023-01-11 04:09:13 -0500
committer: Unit 193 <unit193@unit193.net> 2023-01-11 04:09:13 -0500
commit: fe385c3ff784ba3d19454a35446502c0ec295893 (patch)
tree: 897982793ef2a0c0f349044bf4cf803ccd483e6e /gallery_dl
parent: ebdfcd3cd3f76534a590ba08933ff7ea54813316 (diff)
28 files changed, 764 insertions, 215 deletions
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index ee00bf7..f18cc47 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -980,6 +980,7 @@ def _is_path(value):
 
 def _parse_browser_specification(
         browser, profile=None, keyring=None, container=None):
+    browser = browser.lower()
     if browser not in SUPPORTED_BROWSERS:
         raise ValueError("unsupported browser '{}'".format(browser))
     if keyring and keyring not in SUPPORTED_KEYRINGS:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 444075c..f26f6a9 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -38,6 +38,7 @@ modules = [
     "exhentai",
     "fallenangels",
     "fanbox",
+    "fanleaks",
     "fantia",
     "fapello",
     "fapachi",
@@ -135,6 +136,7 @@ modules = [
     "speakerdeck",
     "subscribestar",
     "tapas",
+    "tcbscans",
     "telegraph",
     "toyhouse",
     "tsumino",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index cf332ac..6da6175 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract images from https://www.behance.net/"""
+"""Extractors for https://www.behance.net/"""
 
 from .common import Extractor, Message
 from .. import text
@@ -17,6 +17,7 @@ class BehanceExtractor(Extractor):
     """Base class for behance extractors"""
     category = "behance"
     root = "https://www.behance.net"
+    request_interval = (2.0, 4.0)
 
     def items(self):
         for gallery in self.galleries():
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 882c2b3..8283fbc 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -56,8 +56,12 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             files = album["files"]
         except Exception as exc:
             self.log.debug("%s: %s", exc.__class__.__name__, exc)
+            self.log.debug("Falling back to lolisafe API")
             self.root = root.replace("://", "://app.", 1)
             files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+            # fix file URLs (bunkr..ru -> bunkr.ru) (#3481)
+            for file in files:
+                file["file"] = file["file"].replace("bunkr..", "bunkr.", 1)
         else:
             for file in files:
                 file["file"] = file["cdn"] + "/" + file["name"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 4352aa7..ad766da 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -327,6 +327,7 @@ class Extractor():
                 except Exception as exc:
                     self.log.warning("cookies: %s", exc)
                 else:
+                    self.log.debug("Loading cookies from '%s'", cookies)
                     self._cookiefile = cookiefile
 
             elif isinstance(cookies, (list, tuple)):
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ef17176..4c93604 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -101,8 +101,8 @@ class DanbooruExtractor(BaseExtractor):
 
             if self.extended_metadata:
                 template = (
-                    "{}/posts/{}.json"
-                    "?only=artist_commentary,children,notes,parent"
+                    "{}/posts/{}.json?only=artist_commentary,children,notes,"
+                    "parent,uploader"
                 )
                 resp = self.request(template.format(self.root, post["id"]))
                 post.update(resp.json())
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index aa78cfb..aeb2d0a 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -987,13 +987,9 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
     _warning = True
 
     def deviations(self):
-        eclipse_api = DeviantartEclipseAPI(self)
-        if self._warning:
-            DeviantartScrapsExtractor._warning = False
-            if not self._check_cookies(self.cookienames):
-                self.log.warning(
-                    "No session cookies set: Unable to fetch mature scraps.")
+        self.login()
 
+        eclipse_api = DeviantartEclipseAPI(self)
         for obj in eclipse_api.gallery_scraps(self.user, self.offset):
             deviation = obj["deviation"]
             deviation_uuid = eclipse_api.deviation_extended_fetch(
@@ -1004,6 +1000,17 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
 
             yield self.api.deviation(deviation_uuid)
 
+    def login(self):
+        """Login and obtain session cookies"""
+        if not self._check_cookies(self.cookienames):
+            username, password = self._get_auth_info()
+            if username:
+                self._update_cookies(_login_impl(self, username, password))
+            elif self._warning:
+                self.log.warning(
+                    "No session cookies set: Unable to fetch mature scraps.")
+            DeviantartScrapsExtractor._warning = False
+
 
 class DeviantartFollowingExtractor(DeviantartExtractor):
     """Extractor for user's watched users"""
@@ -1513,13 +1520,47 @@ class DeviantartEclipseAPI():
         return token
 
 
-@cache(maxage=100*365*24*3600, keyarg=0)
+@cache(maxage=100*365*86400, keyarg=0)
 def _refresh_token_cache(token):
     if token and token[0] == "#":
         return None
     return token
 
 
+@cache(maxage=28*86400, keyarg=1)
+def _login_impl(extr, username, password):
+    extr.log.info("Logging in as %s", username)
+
+    url = "https://www.deviantart.com/users/login"
+    page = extr.request(url).text
+
+    data = {}
+    for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
+        name, _, value = item.partition('" value="')
+        data[name] = value
+
+    challenge = data.get("challenge")
+    if challenge and challenge != "0":
+        extr.log.warning("Login requires solving a CAPTCHA")
+        extr.log.debug(challenge)
+
+    data["username"] = username
+    data["password"] = password
+    data["remember"] = "on"
+
+    extr.sleep(2.0, "login")
+    url = "https://www.deviantart.com/_sisu/do/signin"
+    response = extr.request(url, method="POST", data=data)
+
+    if not response.history:
+        raise exception.AuthenticationError()
+
+    return {
+        cookie.name: cookie.value
+        for cookie in extr.session.cookies
+    }
+
+
 ###############################################################################
 # Journal Formats #############################################################
 
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index f692a90..41431dc 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -69,14 +69,28 @@ class FanboxExtractor(Extractor):
             if post["type"] == "article":
                 post["articleBody"] = content_body.copy()
             if "blocks" in content_body:
-                content = []
+                content = []  # text content
+                images = []   # image IDs in 'body' order
+
                 append = content.append
+                append_img = images.append
                 for block in content_body["blocks"]:
                     if "text" in block:
                         append(block["text"])
                     if "links" in block:
                         for link in block["links"]:
                             append(link["url"])
+                    if "imageId" in block:
+                        append_img(block["imageId"])
+
+                if images and "imageMap" in content_body:
+                    # reorder 'imageMap' (#2718)
+                    image_map = content_body["imageMap"]
+                    content_body["imageMap"] = {
+                        image_id: image_map[image_id]
+                        for image_id in images
+                    }
+
                 post["content"] = "\n".join(content)
 
         post["date"] = text.parse_datetime(post["publishedDatetime"])
@@ -294,6 +308,10 @@ class FanboxPostExtractor(FanboxExtractor):
                            r"Thank you for your continued support of FANBOX.$",
             },
         }),
+        # imageMap file order (#2718)
+        ("https://mochirong.fanbox.cc/posts/3746116", {
+            "url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062",
+        }),
     )
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py
new file mode 100644
index 0000000..466bb8c
--- /dev/null
+++ b/gallery_dl/extractor/fanleaks.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fanleaks.club/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+class FanleaksExtractor(Extractor):
+    """Base class for Fanleaks extractors"""
+    category = "fanleaks"
+    directory_fmt = ("{category}", "{model}")
+    filename_fmt = "{model_id}_{id}.{extension}"
+    archive_fmt = "{model_id}_{id}"
+    root = "https://fanleaks.club"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.model_id = match.group(1)
+
+    def extract_post(self, url):
+        extr = text.extract_from(self.request(url, notfound="post").text)
+        data = {
+            "model_id": self.model_id,
+            "model"   : text.unescape(extr('text-lg">', "</a>")),
+            "id"      : text.parse_int(self.id),
+            "type"    : extr('type="', '"')[:5] or "photo",
+        }
+        url = extr('src="', '"')
+        yield Message.Directory, data
+        yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class FanleaksPostExtractor(FanleaksExtractor):
+    """Extractor for individual posts on fanleak.club"""
+    subcategory = "post"
+    pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
+    test = (
+        ("https://fanleaks.club/selti/880", {
+            "pattern": (r"https://fanleaks\.club//models"
+                        r"/selti/images/selti_0880\.jpg"),
+            "keyword": {
+                "model_id": "selti",
+                "model"   : "Selti",
+                "id"      : 880,
+                "type"    : "photo",
+            },
+        }),
+        ("https://fanleaks.club/daisy-keech/1038", {
+            "pattern": (r"https://fanleaks\.club//models"
+                        r"/daisy-keech/videos/daisy-keech_1038\.mp4"),
+            "keyword": {
+                "model_id": "daisy-keech",
+                "model"   : "Daisy Keech",
+                "id"      : 1038,
+                "type"    : "video",
+            },
+        }),
+        ("https://fanleaks.club/hannahowo/000", {
+            "exception": exception.NotFoundError,
+        }),
+    )
+
+    def __init__(self, match):
+        FanleaksExtractor.__init__(self, match)
+        self.id = match.group(2)
+
+    def items(self):
+        url = "{}/{}/{}".format(self.root, self.model_id, self.id)
+        return self.extract_post(url)
+
+
+class FanleaksModelExtractor(FanleaksExtractor):
+    """Extractor for all posts from a fanleaks model"""
+    subcategory = "model"
+    pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
+               r"/(?!latest/?$)([^/?#]+)/?$")
+    test = (
+        ("https://fanleaks.club/hannahowo", {
+            "pattern": (r"https://fanleaks\.club//models"
+                        r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"),
+            "range"  : "1-100",
+            "count"  : 100,
+        }),
+        ("https://fanleaks.club/belle-delphine", {
+            "pattern": (r"https://fanleaks\.club//models"
+                        r"/belle-delphine/(images|videos)"
+                        r"/belle-delphine_\d+\.\w+"),
+            "range"  : "1-100",
+            "count"  : 100,
+        }),
+        ("https://fanleaks.club/daisy-keech"),
+    )
+
+    def items(self):
+        page_num = 1
+        page = self.request(
+            self.root + "/" + self.model_id, notfound="model").text
+        data = {
+            "model_id": self.model_id,
+            "model"   : text.unescape(
+                text.extr(page, 'mt-4">', "</h1>")),
+            "type"    : "photo",
+        }
+        page_url = text.extr(page, "url: '", "'")
+        while True:
+            page = self.request("{}{}".format(page_url, page_num)).text
+            if not page:
+                return
+
+            for item in text.extract_iter(page, '<a href="/', "</a>"):
+                self.id = id = text.extr(item, "/", '"')
+                if "/icon-play.svg" in item:
+                    url = "{}/{}/{}".format(self.root, self.model_id, id)
+                    yield from self.extract_post(url)
+                    continue
+
+                data["id"] = text.parse_int(id)
+                url = text.extr(item, 'src="', '"').replace(
+                    "/thumbs/", "/", 1)
+                yield Message.Directory, data
+                yield Message.Url, url, text.nameext_from_url(url, data)
+            page_num += 1
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index d8109e1..8d73949 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -195,7 +195,7 @@ class GelbooruPostExtractor(GelbooruBase,
         # notes
         ("https://gelbooru.com/index.php?page=post&s=view&id=5997331", {
             "options": (("notes", True),),
-            "keywords": {
+            "keyword": {
                 "notes": [
                     {
                         "body": "Look over this way when you talk~",
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 56bd048..1efbbf0 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -9,25 +9,37 @@
 """Extractors for https://www.imagefap.com/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, exception
 import json
 
-
 BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
 
 
 class ImagefapExtractor(Extractor):
     """Base class for imagefap extractors"""
     category = "imagefap"
+    root = "https://www.imagefap.com"
     directory_fmt = ("{category}", "{gallery_id} {title}")
     filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"
     archive_fmt = "{gallery_id}_{image_id}"
-    root = "https://www.imagefap.com"
+    request_interval = (2.0, 4.0)
 
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.session.headers["Referer"] = self.root
 
+    def request(self, url, **kwargs):
+        response = Extractor.request(self, url, **kwargs)
+
+        if response.history and response.url.endswith("/human-verification"):
+            msg = text.extr(response.text, '<div class="mt-4', '<')
+            if msg:
+                msg = " ".join(msg.partition(">")[2].split())
+                raise exception.StopExtraction("'%s'", msg)
+            self.log.warning("HTTP redirect to %s", response.url)
+
+        return response
+
 
 class ImagefapGalleryExtractor(ImagefapExtractor):
     """Extractor for image galleries from imagefap.com"""
@@ -41,12 +53,20 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
             "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
             "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
         }),
-        ("https://www.imagefap.com/gallery/5486966", {
+        ("https://www.imagefap.com/gallery/7876223", {
             "pattern": r"https://cdnh?\.imagefap\.com"
                        r"/images/full/\d+/\d+/\d+\.jpg",
-            "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
-            "archive": False,
-            "count": 62,
+            "keyword": {
+                "count": 44,
+                "gallery_id": 7876223,
+                "image_id": int,
+                "num": int,
+                "tags": ["big ass", "panties", "horny",
+                         "pussy", "exposed", "outdoor"],
+                "title": "Kelsi Monroe in lingerie",
+                "uploader": "BdRachel",
+            },
+            "count": 44,
         }),
         ("https://www.imagefap.com/gallery.php?gid=7102714"),
         ("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -118,12 +138,20 @@ class ImagefapImageExtractor(ImagefapExtractor):
     subcategory = "image"
     pattern = BASE_PATTERN + r"/photo/(\d+)"
     test = (
-        ("https://www.imagefap.com/photo/1369341772/", {
+        ("https://www.imagefap.com/photo/1962981893", {
             "pattern": r"https://cdnh?\.imagefap\.com"
-                       r"/images/full/\d+/\d+/\d+\.jpg",
-            "keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
+                       r"/images/full/65/196/1962981893\.jpg",
+            "keyword": {
+                "date": "21/08/2014",
+                "gallery_id": 7876223,
+                "height": 1600,
+                "image_id": 1962981893,
+                "title": "Kelsi Monroe in lingerie",
+                "uploader": "BdRachel",
+                "width": 1066,
+            },
         }),
-        ("https://beta.imagefap.com/photo/1369341772/"),
+        ("https://beta.imagefap.com/photo/1962981893"),
     )
 
     def __init__(self, match):
@@ -159,61 +187,70 @@ class ImagefapImageExtractor(ImagefapExtractor):
         })
 
 
-class ImagefapUserExtractor(ImagefapExtractor):
-    """Extractor for all galleries from a user at imagefap.com"""
-    subcategory = "user"
-    categorytransfer = True
-    pattern = (BASE_PATTERN +
-               r"/(?:profile(?:\.php\?user=|/)([^/?#]+)"
-               r"|usergallery\.php\?userid=(\d+))")
+class ImagefapFolderExtractor(ImagefapExtractor):
+    """Extractor for imagefap user folders"""
+    subcategory = "folder"
+    pattern = (BASE_PATTERN + r"/(?:organizer/|"
+               r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
+               r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
     test = (
-        ("https://www.imagefap.com/profile/LucyRae/galleries", {
-            "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a",
+        ("https://www.imagefap.com/organizer/409758", {
+            "pattern": r"https://www\.imagefap\.com/gallery/7876223",
+            "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+            "count": 1,
         }),
-        ("https://www.imagefap.com/usergallery.php?userid=1862791", {
-            "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a",
+        (("https://www.imagefap.com/usergallery.php"
+          "?userid=1981976&folderid=409758"), {
+            "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+        }),
+        (("https://www.imagefap.com/usergallery.php"
+          "?user=BdRachel&folderid=409758"), {
+            "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+        }),
+        ("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
+            "pattern": ImagefapGalleryExtractor.pattern,
+            "range": "1-40",
+        }),
+        (("https://www.imagefap.com/usergallery.php"
+          "?userid=1981976&folderid=-1"), {
+            "pattern": ImagefapGalleryExtractor.pattern,
+            "range": "1-40",
+        }),
+        (("https://www.imagefap.com/usergallery.php"
+          "?user=BdRachel&folderid=-1"), {
+            "pattern": ImagefapGalleryExtractor.pattern,
+            "range": "1-40",
         }),
-        ("https://www.imagefap.com/profile.php?user=LucyRae"),
-        ("https://beta.imagefap.com/profile.php?user=LucyRae"),
     )
 
     def __init__(self, match):
         ImagefapExtractor.__init__(self, match)
-        self.user, self.user_id = match.groups()
+        self._id, user, profile, self.folder_id = match.groups()
+        self.user = user or profile
 
     def items(self):
-        for folder_id in self.folders():
-            for gallery_id, name in self.galleries(folder_id):
-                url = "{}/gallery/{}".format(self.root, gallery_id)
-                data = {
-                    "gallery_id": text.parse_int(gallery_id),
-                    "title"     : text.unescape(name),
-                    "_extractor": ImagefapGalleryExtractor,
-                }
-                yield Message.Queue, url, data
-
-    def folders(self):
-        """Return a list of folder_ids of a specific user"""
-        if self.user:
-            url = "{}/profile/{}/galleries".format(self.root, self.user)
-        else:
-            url = "{}/usergallery.php?userid={}".format(
-                self.root, self.user_id)
-
-        response = self.request(url)
-        self.user = response.url.split("/")[-2]
-        folders = text.extr(response.text, ' id="tgl_all" value="', '"')
-        return folders.rstrip("|").split("|")
+        for gallery_id, name in self.galleries(self.folder_id):
+            url = "{}/gallery/{}".format(self.root, gallery_id)
+            data = {
+                "gallery_id": gallery_id,
+                "title"     : text.unescape(name),
+                "_extractor": ImagefapGalleryExtractor,
+            }
+            yield Message.Queue, url, data
 
     def galleries(self, folder_id):
-        """Yield gallery_ids of a folder"""
+        """Yield gallery IDs and titles of a folder"""
         if folder_id == "-1":
-            url = "{}/profile/{}/galleries?folderid=-1".format(
-                self.root, self.user)
+            if self._id:
+                url = "{}/usergallery.php?userid={}&folderid=-1".format(
+                    self.root, self.user)
+            else:
+                url = "{}/profile/{}/galleries?folderid=-1".format(
+                    self.root, self.user)
         else:
             url = "{}/organizer/{}/".format(self.root, folder_id)
-        params = {"page": 0}
 
+        params = {"page": 0}
         while True:
             extr = text.extract_from(self.request(url, params=params).text)
             cnt = 0
@@ -228,3 +265,53 @@ class ImagefapUserExtractor(ImagefapExtractor):
             if cnt < 25:
                 break
             params["page"] += 1
+
+
+class ImagefapUserExtractor(ImagefapExtractor):
+    """Extractor for an imagefap user profile"""
+    subcategory = "user"
+    pattern = (BASE_PATTERN +
+               r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
+               r"|usergallery\.php\?userid=(\d+))(?:$|#)")
+    test = (
+        ("https://www.imagefap.com/profile/BdRachel", {
+            "pattern": ImagefapFolderExtractor.pattern,
+            "count": ">= 18",
+        }),
+        ("https://www.imagefap.com/usergallery.php?userid=1862791", {
+            "pattern": r"https://www\.imagefap\.com"
+                       r"/profile/LucyRae/galleries\?folderid=-1",
+            "count": 1,
+        }),
+        ("https://www.imagefap.com/profile/BdRachel/galleries"),
+        ("https://www.imagefap.com/profile.php?user=BdRachel"),
+        ("https://beta.imagefap.com/profile.php?user=BdRachel"),
+    )
+
+    def __init__(self, match):
+        ImagefapExtractor.__init__(self, match)
+        self.user, self.user_id = match.groups()
+
+    def items(self):
+        data = {"_extractor": ImagefapFolderExtractor}
+
+        for folder_id in self.folders():
+            if folder_id == "-1":
+                url = "{}/profile/{}/galleries?folderid=-1".format(
+                    self.root, self.user)
+            else:
+                url = "{}/organizer/{}/".format(self.root, folder_id)
+            yield Message.Queue, url, data
+
+    def folders(self):
+        """Return a list of folder IDs of a user"""
+        if self.user:
+            url = "{}/profile/{}/galleries".format(self.root, self.user)
+        else:
+            url = "{}/usergallery.php?userid={}".format(
+                self.root, self.user_id)
+
+        response = self.request(url)
+        self.user = response.url.split("/")[-2]
+        folders = text.extr(response.text, ' id="tgl_all" value="', '"')
+        return folders.rstrip("|").split("|")
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 8a61728..541e427 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -67,6 +67,7 @@ class KemonopartyExtractor(Extractor):
             headers["Referer"] = "{}/{}/user/{}/post/{}".format(
                 self.root, post["service"], post["user"], post["id"])
             post["_http_headers"] = headers
+            post["_http_validate"] = _validate
             post["date"] = text.parse_datetime(
                 post["published"] or post["added"],
                 "%a, %d %b %Y %H:%M:%S %Z")
@@ -197,6 +198,11 @@ class KemonopartyExtractor(Extractor):
         return dms
 
 
+def _validate(response):
+    return (response.headers["content-length"] != "9" and
+            response.content != b"not found")
+
+
 class KemonopartyUserExtractor(KemonopartyExtractor):
     """Extractor for all posts from a kemono.party user listing"""
     subcategory = "user"
@@ -309,6 +315,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
             "pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968"
                        r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
         }),
+        # invalid file (#3510)
+        ("https://kemono.party/patreon/user/19623797/post/29035449", {
+            "pattern": r"907ba78b4545338d3539683e63ecb51c"
+                       r"f51c10adc9dabd86e92bd52339f298b9\.txt",
+            "content": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
+        }),
         ("https://kemono.party/subscribestar/user/alcorart/post/184330"),
         ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
         ("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index bbcf9c0..85e8bb1 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -17,9 +17,13 @@ class LynxchanExtractor(BaseExtractor):
 
 
 BASE_PATTERN = LynxchanExtractor.update({
+    "bbw-chan": {
+        "root": "https://bbw-chan.nl",
+        "pattern": r"bbw-chan\.nl",
+    },
     "kohlchan": {
         "root": "https://kohlchan.net",
-        "pattern": r"kohlchan\.net"
+        "pattern": r"kohlchan\.net",
     },
     "endchan": {
         "root": None,
@@ -37,6 +41,11 @@ class LynxchanThreadExtractor(LynxchanExtractor):
     archive_fmt = "{boardUri}_{postId}_{num}"
     pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
     test = (
+        ("https://bbw-chan.nl/bbwdraw/res/499.html", {
+            "pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$",
+            "count": ">= 352",
+        }),
+        ("https://bbw-chan.nl/bbwdraw/res/489.html"),
         ("https://kohlchan.net/a/res/4594.html", {
             "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
             "count": ">= 80",
@@ -78,6 +87,11 @@ class LynxchanBoardExtractor(LynxchanExtractor):
     subcategory = "board"
     pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
     test = (
+        ("https://bbw-chan.nl/bbwdraw/", {
+            "pattern": LynxchanThreadExtractor.pattern,
+            "count": ">= 148",
+        }),
+        ("https://bbw-chan.nl/bbwdraw/2.html"),
         ("https://kohlchan.net/a/", {
             "pattern": LynxchanThreadExtractor.pattern,
             "count": ">= 100",
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 3dbd5fc..5dc4cb6 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -44,7 +44,10 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
         extr = text.extract_from(page)
         split = text.split_html
 
-        title = extr('<div class="comic-description">\n<h1>', '</h1>')
+        title = extr('<div class="comic-description">\n', '</h1>').lstrip()
+        if title.startswith("<h1>"):
+            title = title[len("<h1>"):]
+
         if not title:
             raise exception.NotFoundError("gallery")
 
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index dfe78ae..f9c6abf 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -451,7 +451,7 @@ class NitterTweetExtractor(NitterExtractor):
         }),
         # age-restricted (#2354)
         ("https://nitter.unixfox.eu/mightbecurse/status/1492954264909479936", {
-            "keywords": {"date": "dt:2022-02-13 20:10:09"},
+            "keyword": {"date": "dt:2022-02-13 20:10:00"},
             "count": 1,
         }),
     )
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index f786be6..63b16ce 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,13 @@ class PinterestExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
+
+        domain = self.config("domain")
+        if not domain or domain == "auto" :
+            self.root = text.root_from_url(match.group(0))
+        else:
+            self.root = text.ensure_http_scheme(domain)
+
         self.api = PinterestAPI(self)
 
     def items(self):
@@ -142,7 +149,7 @@ class PinterestBoardExtractor(PinterestExtractor):
     directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
     archive_fmt = "{board[id]}_{id}"
     pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)"
-               "/(?!_saved|_created)([^/?#&]+)/?$")
+               "/(?!_saved|_created|pins/)([^/?#&]+)/?$")
     test = (
         ("https://www.pinterest.com/g1952849/test-/", {
             "pattern": r"https://i\.pinimg\.com/originals/",
@@ -151,7 +158,7 @@ class PinterestBoardExtractor(PinterestExtractor):
         # board with sections (#835)
         ("https://www.pinterest.com/g1952849/stuff/", {
             "options": (("sections", True),),
-            "count": 5,
+            "count": 4,
         }),
         # secret board (#1055)
         ("https://www.pinterest.de/g1952849/secret/", {
@@ -194,11 +201,11 @@ class PinterestUserExtractor(PinterestExtractor):
     subcategory = "user"
     pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$"
     test = (
-        ("https://www.pinterest.de/g1952849/", {
+        ("https://www.pinterest.com/g1952849/", {
             "pattern": PinterestBoardExtractor.pattern,
             "count": ">= 2",
         }),
-        ("https://www.pinterest.de/g1952849/_saved/"),
+        ("https://www.pinterest.com/g1952849/_saved/"),
     )
 
     def __init__(self, match):
@@ -213,15 +220,38 @@ class PinterestUserExtractor(PinterestExtractor):
                 yield Message.Queue, self.root + url, board
 
 
+class PinterestAllpinsExtractor(PinterestExtractor):
+    """Extractor for a user's 'All Pins' feed"""
+    subcategory = "allpins"
+    directory_fmt = ("{category}", "{user}")
+    pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$"
+    test = ("https://www.pinterest.com/g1952849/pins/", {
+        "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
+                   r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
+        "count": 7,
+    })
+
+    def __init__(self, match):
+        PinterestExtractor.__init__(self, match)
+        self.user = text.unquote(match.group(1))
+
+    def metadata(self):
+        return {"user": self.user}
+
+    def pins(self):
+        return self.api.user_pins(self.user)
+
+
 class PinterestCreatedExtractor(PinterestExtractor):
     """Extractor for a user's created pins"""
     subcategory = "created"
     directory_fmt = ("{category}", "{user}")
     pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$"
-    test = ("https://www.pinterest.com/amazon/_created", {
+    test = ("https://www.pinterest.de/digitalmomblog/_created/", {
         "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
                    r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
         "count": 10,
+        "range": "1-10",
     })
 
     def __init__(self, match):
@@ -272,7 +302,7 @@ class PinterestSearchExtractor(PinterestExtractor):
     subcategory = "search"
     directory_fmt = ("{category}", "Search", "{search}")
     pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)"
-    test = ("https://www.pinterest.de/search/pins/?q=nature", {
+    test = ("https://www.pinterest.com/search/pins/?q=nature", {
         "range": "1-50",
         "count": ">= 50",
     })
@@ -357,26 +387,23 @@ class PinterestAPI():
     - https://github.com/seregazhuk/php-pinterest-bot
     """
 
-    BASE_URL = "https://www.pinterest.com"
-    HEADERS = {
-        "Accept"              : "application/json, text/javascript, "
-                                "*/*, q=0.01",
-        "Accept-Language"     : "en-US,en;q=0.5",
-        "Referer"             : BASE_URL + "/",
-        "X-Requested-With"    : "XMLHttpRequest",
-        "X-APP-VERSION"       : "31461e0",
-        "X-CSRFToken"         : None,
-        "X-Pinterest-AppState": "active",
-        "Origin"              : BASE_URL,
-    }
-
     def __init__(self, extractor):
-        self.extractor = extractor
-
         csrf_token = util.generate_token()
-        self.headers = self.HEADERS.copy()
-        self.headers["X-CSRFToken"] = csrf_token
+
+        self.extractor = extractor
+        self.root = extractor.root
         self.cookies = {"csrftoken": csrf_token}
+        self.headers = {
+            "Accept"              : "application/json, text/javascript, "
+                                    "*/*, q=0.01",
+            "Accept-Language"     : "en-US,en;q=0.5",
+            "Referer"             : self.root + "/",
+            "X-Requested-With"    : "XMLHttpRequest",
+            "X-APP-VERSION"       : "0c4af40",
+            "X-CSRFToken"         : csrf_token,
+            "X-Pinterest-AppState": "active",
+            "Origin"              : self.root,
+        }
 
     def pin(self, pin_id):
         """Query information about a pin"""
@@ -437,6 +464,16 @@ class PinterestAPI():
         options = {"board_id": board_id, "add_vase": True}
         return self._pagination("BoardRelatedPixieFeed", options)
 
+    def user_pins(self, user):
+        """Yield all pins from 'user'"""
+        options = {
+            "is_own_profile_pins": False,
+            "username"           : user,
+            "field_set_key"      : "grid_item",
+            "pin_filter"         : None,
+        }
+        return self._pagination("UserPins", options)
+
     def user_activity_pins(self, user):
         """Yield pins created by 'user'"""
         options = {
@@ -462,7 +499,7 @@ class PinterestAPI():
     def _login_impl(self, username, password):
         self.extractor.log.info("Logging in as %s", username)
 
-        url = self.BASE_URL + "/resource/UserSessionResource/create/"
+        url = self.root + "/resource/UserSessionResource/create/"
         options = {
             "username_or_email": username,
             "password"         : password,
@@ -485,7 +522,7 @@ class PinterestAPI():
         }
 
     def _call(self, resource, options):
-        url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource)
+        url = "{}/resource/{}Resource/get/".format(self.root, resource)
         params = {"data": json.dumps({"options": options}), "source_url": ""}
 
         response = self.extractor.request(
@@ -497,10 +534,11 @@ class PinterestAPI():
         except ValueError:
             data = {}
 
-        if response.status_code < 400 and not response.history:
+        if response.history:
+            self.root = text.root_from_url(response.url)
+        if response.status_code < 400:
             return data
-
-        if response.status_code == 404 or response.history:
+        if response.status_code == 404:
             resource = self.extractor.subcategory.rpartition("-")[2]
             raise exception.NotFoundError(resource)
         self.extractor.log.debug("Server response: %s", response.text)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 134361d..a17518f 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -45,7 +45,8 @@ class PixivExtractor(Extractor):
                 work["tags"] = [tag["name"] for tag in work["tags"]]
 
         ratings = {0: "General", 1: "R-18", 2: "R-18G"}
-        userdata = self.config("metadata")
+        meta_user = self.config("metadata")
+        meta_bookmark = self.config("metadata-bookmark")
         metadata = self.metadata()
 
         works = self.works()
@@ -61,8 +62,12 @@ class PixivExtractor(Extractor):
             del work["image_urls"]
             del work["meta_pages"]
 
-            if userdata:
+            if meta_user:
                 work.update(self.api.user_detail(work["user"]["id"]))
+            if meta_bookmark and work["is_bookmarked"]:
+                detail = self.api.illust_bookmark_detail(work["id"])
+                work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
+                                         if tag["is_registered"]]
             if transform_tags:
                 transform_tags(work)
             work["num"] = 0
@@ -398,6 +403,8 @@ class PixivFavoriteExtractor(PixivExtractor):
         # own bookmarks
         ("https://www.pixiv.net/bookmark.php", {
             "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
+            "keyword": {"tags_bookmark": ["47", "hitman"]},
+            "options": (("metadata-bookmark", True),),
         }),
         # own bookmarks with tag (#596)
         ("https://www.pixiv.net/bookmark.php?tag=foobar", {
@@ -880,6 +887,11 @@ class PixivAppAPI():
         params = {"illust_id": illust_id}
         return self._call("/v1/illust/detail", params)["illust"]
 
+    def illust_bookmark_detail(self, illust_id):
+        params = {"illust_id": illust_id}
+        return self._call(
+            "/v2/illust/bookmark/detail", params)["bookmark_detail"]
+
     def illust_follow(self, restrict="all"):
         params = {"restrict": restrict}
         return self._pagination("/v2/illust/follow", params)
@@ -900,9 +912,16 @@ class PixivAppAPI():
         return self._pagination("/v1/search/illust", params)
 
     def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
+        """Return illusts bookmarked by a user"""
         params = {"user_id": user_id, "tag": tag, "restrict": restrict}
         return self._pagination("/v1/user/bookmarks/illust", params)
 
+    def user_bookmark_tags_illust(self, user_id, restrict="public"):
+        """Return bookmark tags defined by a user"""
+        params = {"user_id": user_id, "restrict": restrict}
+        return self._pagination(
+            "/v1/user/bookmark-tags/illust", params, "bookmark_tags")
+
     @memcache(keyarg=1)
     def user_detail(self, user_id):
         params = {"user_id": user_id}
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 4283081..c35ee74 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -59,7 +59,7 @@ class PoipikuExtractor(Extractor):
                     "//img.", "//img-org.", 1)
                 yield Message.Url, url, text.nameext_from_url(url, post)
 
-            if not extr('> show all', '<'):
+            if not extr(' show all(+', '<'):
                 continue
 
             url = self.root + "/f/ShowAppendFileF.jsp"
@@ -79,6 +79,9 @@ class PoipikuExtractor(Extractor):
             page = self.request(
                 url, method="POST", headers=headers, data=data).json()["html"]
 
+            if page.startswith("You need to"):
+                self.log.warning("'%s'", page)
+
             for thumb in text.extract_iter(
                     page, 'class="IllustItemThumbImg" src="', '"'):
                 post["num"] += 1
@@ -162,6 +165,21 @@ class PoipikuPostExtractor(PoipikuExtractor):
                 "user_name": "wadahito",
             },
         }),
+        # different warning button style
+        ("https://poipiku.com/3572553/5776587.html", {
+            "pattern": r"https://img-org\.poipiku.com/user_img\d+/003572553"
+                       r"/005776587_(\d+_)?\w+\.jpeg$",
+            "count": 3,
+            "keyword": {
+                "count": "3",
+                "description": "ORANGE OASISボスネタバレ",
+                "num": int,
+                "post_category": "SPOILER",
+                "post_id": "5776587",
+                "user_id": "3572553",
+                "user_name": "nagakun",
+            },
+        }),
     )
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
new file mode 100644
index 0000000..cac5a54
--- /dev/null
+++ b/gallery_dl/extractor/tcbscans.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://onepiecechapters.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+
+
+class TcbscansChapterExtractor(ChapterExtractor):
+    category = "tcbscans"
+    pattern = (r"(?:https?://)?onepiecechapters\.com"
+               r"(/chapters/\d+/[^/?#]+)")
+    root = "https://onepiecechapters.com"
+    test = (
+        (("https://onepiecechapters.com"
+          "/chapters/4708/chainsaw-man-chapter-108"), {
+            "pattern": (r"https://cdn\.[^/]+"
+                        r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+            "count"  : 17,
+            "keyword": {
+                "manga": "Chainsaw Man",
+                "chapter": 108,
+                "chapter_minor": "",
+                "lang": "en",
+                "language": "English",
+            },
+        }),
+        ("https://onepiecechapters.com/chapters/4716/one-piece-chapter-1065", {
+            "pattern": (r"https://cdn\.[^/]+"
+                        r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+            "count"  : 18,
+            "keyword": {
+                "manga": "One Piece",
+                "chapter": 1065,
+                "chapter_minor": "",
+                "lang": "en",
+                "language": "English",
+            },
+        }),
+        (("https://onepiecechapters.com/"
+          "chapters/44/ace-novel-manga-adaptation-chapter-1")),
+    )
+
+    def images(self, page):
+        return [
+            (url, None)
+            for url in text.extract_iter(
+                page, '<img class="fixed-ratio-content" src="', '"')
+        ]
+
+    def metadata(self, page):
+        manga, _, chapter = text.extr(
+            page, 'font-bold mt-8">', "</h1>").rpartition(" - Chapter ")
+        chapter, sep, minor = chapter.partition(".")
+        return {
+            "manga": text.unescape(manga),
+            "chapter": text.parse_int(chapter),
+            "chapter_minor": sep + minor,
+            "lang": "en", "language": "English",
+        }
+
+
+class TcbscansMangaExtractor(MangaExtractor):
+    category = "tcbscans"
+    chapterclass = TcbscansChapterExtractor
+    pattern = (r"(?:https?://)?onepiecechapters\.com"
+               r"(/mangas/\d+/[^/?#]+)")
+    root = "https://onepiecechapters.com"
+    test = (
+        ("https://onepiecechapters.com/mangas/13/chainsaw-man", {
+            "pattern": TcbscansChapterExtractor.pattern,
+            "range"  : "1-50",
+            "count"  : 50,
+        }),
+        ("https://onepiecechapters.com/mangas/4/jujutsu-kaisen", {
+            "pattern": TcbscansChapterExtractor.pattern,
+            "range"  : "1-50",
+            "count"  : 50,
+        }),
+        ("https://onepiecechapters.com/mangas/15/hunter-x-hunter"),
+    )
+
+    def chapters(self, page):
+        data = {
+            "manga": text.unescape(text.extr(
+                page, 'class="my-3 font-bold text-3xl">', "</h1>")),
+            "lang": "en", "language": "English",
+        }
+
+        results = []
+        page = text.extr(page, 'class="col-span-2"', 'class="order-1')
+        for chapter in text.extract_iter(page, "<a", "</a>"):
+            url = text.extr(chapter, 'href="', '"')
+            data["title"] = text.unescape(text.extr(
+                chapter, 'text-gray-500">', "</div>"))
+            chapter = text.extr(
+                chapter, 'font-bold">', "</div>").rpartition(" Chapter ")[2]
+            chapter, sep, minor = chapter.partition(".")
+            data["chapter"] = text.parse_int(chapter)
+            data["chapter_minor"] = sep + minor
+            results.append((self.root + url, data.copy()))
+        return results
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index 8e9bf2c..5996268 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -12,7 +12,6 @@ from .. import text
 
 class TelegraphGalleryExtractor(GalleryExtractor):
     """Extractor for articles from telegra.ph"""
-
     category = "telegraph"
     root = "https://telegra.ph"
     directory_fmt = ("{category}", "{slug}")
@@ -52,6 +51,23 @@ class TelegraphGalleryExtractor(GalleryExtractor):
                 "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
             },
         }),
+        ("https://telegra.ph/Vsyo-o-druzyah-moej-sestricy-05-27", {
+            "url": "c1f3048e5d94bee53af30a8c27f70b0d3b15438e",
+            "pattern": r"^https://pith1\.ru/uploads"
+                       r"/posts/2019-12/\d+_\d+\.jpg$",
+            "keyword": {
+                "author": "Shotacon - заходи сюда",
+                "caption": "",
+                "count": 19,
+                "date": "dt:2022-05-27 16:17:27",
+                "description": "",
+                "num_formatted": r"re:^\d{2}$",
+                "post_url": "https://telegra.ph"
+                            "/Vsyo-o-druzyah-moej-sestricy-05-27",
+                "slug": "Vsyo-o-druzyah-moej-sestricy-05-27",
+                "title": "Всё о друзьях моей сестрицы",
+            },
+        }),
     )
 
     def metadata(self, page):
@@ -79,11 +95,12 @@ class TelegraphGalleryExtractor(GalleryExtractor):
 
         result = []
         for figure in figures:
-            src, pos = text.extract(figure, 'src="', '"')
-            if src.startswith("/embed/"):
+            url, pos = text.extract(figure, 'src="', '"')
+            if url.startswith("/embed/"):
                 continue
+            elif url.startswith("/"):
+                url = self.root + url
             caption, pos = text.extract(figure, "<figcaption>", "<", pos)
-            url = self.root + src
             num += 1
 
             result.append((url, {
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22aa78e..c2d8247 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -39,6 +39,7 @@ class TwitterExtractor(Extractor):
         self.videos = self.config("videos", True)
         self.cards = self.config("cards", False)
         self.cards_blacklist = self.config("cards-blacklist")
+        self.syndication = self.config("syndication")
         self._user = self._user_obj = None
         self._user_cache = {}
         self._init_sizes()
@@ -75,11 +76,6 @@ class TwitterExtractor(Extractor):
             else:
                 data = tweet
 
-            if seen_tweets is not None:
-                if data["id_str"] in seen_tweets:
-                    continue
-                seen_tweets.add(data["id_str"])
-
             if not self.retweets and "retweeted_status_id_str" in data:
                 self.log.debug("Skipping %s (retweet)", data["id_str"])
                 continue
@@ -97,6 +93,13 @@ class TwitterExtractor(Extractor):
                 self.log.debug("Skipping %s (reply)", data["id_str"])
                 continue
 
+            if seen_tweets is not None:
+                if data["id_str"] in seen_tweets:
+                    self.log.debug(
+                        "Skipping %s (previously seen)", data["id_str"])
+                    continue
+                seen_tweets.add(data["id_str"])
+
             files = []
             if "extended_entities" in data:
                 self._extract_media(
@@ -220,14 +223,16 @@ class TwitterExtractor(Extractor):
     def _extract_twitpic(self, tweet, files):
         for url in tweet["entities"].get("urls", ()):
             url = url["expanded_url"]
-            if "//twitpic.com/" in url and "/photos/" not in url:
-                response = self.request(url, fatal=False)
-                if response.status_code >= 400:
-                    continue
-                url = text.extr(
-                    response.text, 'name="twitter:image" value="', '"')
-                if url:
-                    files.append({"url": url})
+            if "//twitpic.com/" not in url or "/photos/" in url:
+                continue
+            if url.startswith("http:"):
+                url = "https" + url[4:]
+            response = self.request(url, fatal=False)
+            if response.status_code >= 400:
+                continue
+            url = text.extr(response.text, 'name="twitter:image" value="', '"')
+            if url:
+                files.append({"url": url})
 
     def _transform_tweet(self, tweet):
         if "author" in tweet:
@@ -299,6 +304,9 @@ class TwitterExtractor(Extractor):
 
         if "legacy" in user:
             user = user["legacy"]
+        elif "statuses_count" not in user and self.syndication == "extended":
+            # try to fetch extended user data
+            user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
 
         uget = user.get
         entities = user["entities"]
@@ -361,18 +369,22 @@ class TwitterExtractor(Extractor):
     def _expand_tweets(self, tweets):
         seen = set()
         for tweet in tweets:
-
-            if "legacy" in tweet:
-                cid = tweet["legacy"]["conversation_id_str"]
-            else:
-                cid = tweet["conversation_id_str"]
-
-            if cid not in seen:
-                seen.add(cid)
-                try:
-                    yield from self.api.tweet_detail(cid)
-                except Exception:
-                    yield tweet
+            obj = tweet["legacy"] if "legacy" in tweet else tweet
+            cid = obj.get("conversation_id_str")
+            if not cid:
+                tid = obj["id_str"]
+                self.log.warning(
+                    "Unable to expand %s (no 'conversation_id')", tid)
+                continue
+            if cid in seen:
+                self.log.debug(
+                    "Skipping expansion of %s (previously seen)", cid)
+                continue
+            seen.add(cid)
+            try:
+                yield from self.api.tweet_detail(cid)
+            except Exception:
+                yield tweet
 
     def _make_tweet(self, user, id_str, url, timestamp):
         return {
@@ -772,7 +784,7 @@ class TwitterTweetExtractor(TwitterExtractor):
         # age-restricted (#2354)
         ("https://twitter.com/mightbecursed/status/1492954264909479936", {
             "options": (("syndication", True),),
-            "keywords": {"date": "dt:2022-02-13 20:10:09"},
+            "keyword": {"date": "dt:2022-02-13 20:10:09"},
             "count": 1,
         }),
         # media alt texts / descriptions (#2617)
@@ -991,7 +1003,7 @@ class TwitterAPI():
         }
 
         self._nsfw_warning = True
-        self._syndication = extractor.config("syndication")
+        self._syndication = self.extractor.syndication
         self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
 
         cookies = extractor.session.cookies
@@ -1516,6 +1528,12 @@ class TwitterAPI():
         else:
             retweet_id = None
 
+        # assume 'conversation_id' is the same as 'id' when the tweet
+        # is not a reply
+        if "conversation_id_str" not in tweet and \
+                "in_reply_to_status_id_str" not in tweet:
+            tweet["conversation_id_str"] = tweet["id_str"]
+
         tweet["created_at"] = text.parse_datetime(
             tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
             "%a %b %d %H:%M:%S +0000 %Y")
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9b6831b..5692452 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -110,7 +110,7 @@ class VkPhotosExtractor(VkExtractor):
             "pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1"
                        r"/[\w-]+\.jpg\?size=\d+x\d+&quality=96&type=album",
             "count": ">= 35",
-            "keywords": {
+            "keyword": {
                 "id": r"re:\d+",
                 "user": {
                     "id": "398982326",
@@ -122,12 +122,11 @@ class VkPhotosExtractor(VkExtractor):
         }),
         ("https://vk.com/cosplayinrussia", {
             "range": "15-25",
-            "keywords": {
+            "keyword": {
                 "id": r"re:\d+",
                 "user": {
                     "id"  : "-165740836",
-                    "info": "Предложка открыта, кидайте ваши косплейчики. При "
-                            "правильном оформлении они будут опубликованы",
+                    "info": str,
                     "name": "cosplayinrussia",
                     "nick": "Косплей | Cosplay 18+",
                 },
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 74da615..03fd909 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -111,13 +111,15 @@ class ZerochanTagExtractor(ZerochanExtractor):
     test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
         "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
         "count": "> 24",
-        "keywords": {
+        "keyword": {
             "extension": r"re:jpg|png",
-            "file_url": "",
-            "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+            "file_url": r"re:https://static\.zerochan\.net"
+                        r"/.+\.full\.\d+\.(jpg|png)",
+            "filename": r"re:(Perth\.\(Kantai\.Collection\)"
+                        r"|Kantai\.Collection)\.full\.\d+",
             "height": r"re:^\d+$",
             "id": r"re:^\d+$",
-            "name": "Perth (Kantai Collection)",
+            "name": r"re:(Perth \(Kantai Collection\)|Kantai Collection)",
             "search_tags": "Perth (Kantai Collection)",
             "size": r"re:^\d+k$",
             "width": r"re:^\d+$",
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 8a45330..58bf48d 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -60,14 +60,21 @@ class StringFormatter():
     - "u": calls str.upper
     - "c": calls str.capitalize
     - "C": calls string.capwords
-    - "j". calls json.dumps
+    - "g": calls text.slugify()
+    - "j": calls json.dumps
     - "t": calls str.strip
+    - "T": calls util.datetime_to_timestamp_string()
     - "d": calls text.parse_timestamp
-    - "U": calls urllib.parse.unescape
+    - "s": calls str()
     - "S": calls util.to_string()
-    - "T": calls util.to_timestamü()
+    - "U": calls urllib.parse.unescape
+    - "r": calls repr()
+    - "a": calls ascii()
     - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
 
+    # Go to _CONVERSIONS and _SPECIFIERS below to se all of them, read:
+    # https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
+
     Extra Format Specifiers:
     - "?<before>/<after>/":
         Adds <before> and <after> to the actual value if it evaluates to True.
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 91e9169..32cac79 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -325,7 +325,7 @@ def build_parser():
     configuration.add_argument(
         "--ignore-config",
         dest="load_config", action="store_false",
-        help="Do not read the default configuration files",
+        help="Do not read default configuration files",
     )
 
     authentication = parser.add_argument_group("Authentication Options")
@@ -349,7 +349,7 @@ def build_parser():
     selection.add_argument(
         "--download-archive",
         dest="archive", metavar="FILE", action=ConfigAction,
-        help=("Record all downloaded files in the archive file and "
+        help=("Record all downloaded or skipped files in FILE and "
               "skip downloading any file already in it"),
     )
     selection.add_argument(
@@ -367,19 +367,20 @@ def build_parser():
     selection.add_argument(
         "--range",
         dest="image-range", metavar="RANGE", action=ConfigAction,
-        help=("Index-range(s) specifying which images to download. "
-              "For example '5-10' or '1,3-5,10-'"),
+        help=("Index range(s) specifying which files to download. "
+              "These can be either a constant value, range, or slice "
+              "(e.g. '5', '8-20', or '1:24:3')"),
     )
     selection.add_argument(
         "--chapter-range",
         dest="chapter-range", metavar="RANGE", action=ConfigAction,
-        help=("Like '--range', but applies to manga-chapters "
+        help=("Like '--range', but applies to manga chapters "
               "and other delegated URLs"),
     )
     selection.add_argument(
         "--filter",
         dest="image-filter", metavar="EXPR", action=ConfigAction,
-        help=("Python expression controlling which images to download. "
+        help=("Python expression controlling which files to download. "
               "Files for which the expression evaluates to False are ignored. "
               "Available keys are the filename-specific ones listed by '-K'. "
               "Example: --filter \"image_width >= 1000 and "
@@ -388,7 +389,7 @@ def build_parser():
     selection.add_argument(
         "--chapter-filter",
         dest="chapter-filter", metavar="EXPR", action=ConfigAction,
-        help=("Like '--filter', but applies to manga-chapters "
+        help=("Like '--filter', but applies to manga chapters "
               "and other delegated URLs"),
     )
 
@@ -472,7 +473,7 @@ def build_parser():
         dest="postprocessors", metavar="CMD",
         action=AppendCommandAction, const={"name": "exec"},
         help=("Execute CMD for each downloaded file. "
-              "Example: --exec 'convert {} {}.png && rm {}'"),
+              "Example: --exec \"convert {} {}.png && rm {}\""),
     )
     postprocessor.add_argument(
         "--exec-after",
@@ -480,7 +481,7 @@ def build_parser():
         action=AppendCommandAction, const={
             "name": "exec", "event": "finalize"},
         help=("Execute CMD after all files were downloaded successfully. "
-              "Example: --exec-after 'cd {} && convert * ../doc.pdf'"),
+              "Example: --exec-after \"cd {} && convert * ../doc.pdf\""),
     )
     postprocessor.add_argument(
         "-P", "--postprocessor",
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 23d5bc8..543fb10 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -714,74 +714,71 @@ def chain_predicates(predicates, url, kwdict):
 
 
 class RangePredicate():
-    """Predicate; True if the current index is in the given range"""
+    """Predicate; True if the current index is in the given range(s)"""
+
     def __init__(self, rangespec):
-        self.ranges = self.optimize_range(self.parse_range(rangespec))
+        self.ranges = ranges = self._parse(rangespec)
         self.index = 0
 
-        if self.ranges:
-            self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1]
+        if ranges:
+            # technically wrong, but good enough for now
+            # and evaluating min/max for a large range is slow
+            self.lower = min(r.start for r in ranges)
+            self.upper = max(r.stop for r in ranges) - 1
         else:
-            self.lower, self.upper = 0, 0
+            self.lower = self.upper = 0
 
-    def __call__(self, url, _):
-        self.index += 1
+    def __call__(self, _url, _kwdict):
+        self.index = index = self.index + 1
 
-        if self.index > self.upper:
+        if index > self.upper:
             raise exception.StopExtraction()
 
-        for lower, upper in self.ranges:
-            if lower <= self.index <= upper:
+        for range in self.ranges:
+            if index in range:
                 return True
         return False
 
     @staticmethod
-    def parse_range(rangespec):
+    def _parse(rangespec):
         """Parse an integer range string and return the resulting ranges
 
         Examples:
-            parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
-            parse_range(" - 3 , 4-  4, 2-6") -> [(1,3), (4,4), (2,6)]
+            _parse("-2,4,6-8,10-")      -> [(1,3), (4,5), (6,9), (10,INTMAX)]
+            _parse(" - 3 , 4-  4, 2-6") -> [(1,4), (4,5), (2,7)]
+            _parse("1:2,4:8:2")         -> [(1,1), (4,7,2)]
         """
         ranges = []
+        append = ranges.append
 
-        for group in rangespec.split(","):
+        if isinstance(rangespec, str):
+            rangespec = rangespec.split(",")
+
+        for group in rangespec:
             if not group:
                 continue
-            first, sep, last = group.partition("-")
-            if not sep:
-                beg = end = int(first)
-            else:
-                beg = int(first) if first.strip() else 1
-                end = int(last) if last.strip() else sys.maxsize
-            ranges.append((beg, end) if beg <= end else (end, beg))
 
-        return ranges
+            elif ":" in group:
+                start, _, stop = group.partition(":")
+                stop, _, step = stop.partition(":")
+                append(range(
+                    int(start) if start.strip() else 1,
+                    int(stop) if stop.strip() else sys.maxsize,
+                    int(step) if step.strip() else 1,
+                ))
+
+            elif "-" in group:
+                start, _, stop = group.partition("-")
+                append(range(
+                    int(start) if start.strip() else 1,
+                    int(stop) + 1 if stop.strip() else sys.maxsize,
+                ))
 
-    @staticmethod
-    def optimize_range(ranges):
-        """Simplify/Combine a parsed list of ranges
-
-        Examples:
-            optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
-            optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
-        """
-        if len(ranges) <= 1:
-            return ranges
-
-        ranges.sort()
-        riter = iter(ranges)
-        result = []
+            else:
+                start = int(group)
+                append(range(start, start+1))
 
-        beg, end = next(riter)
-        for lower, upper in riter:
-            if lower > end+1:
-                result.append((beg, end))
-                beg, end = lower, upper
-            elif upper > end:
-                end = upper
-        result.append((beg, end))
-        return result
+        return ranges
 
 
 class UniquePredicate():
@@ -802,6 +799,8 @@ class FilterPredicate():
     """Predicate; True if evaluating the given expression returns True"""
 
     def __init__(self, expr, target="image"):
+        if not isinstance(expr, str):
+            expr = "(" + ") and (".join(expr) + ")"
         name = "<{} filter>".format(target)
         self.expr = compile_expression(expr, name)
 
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d832185..5e3b507 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.24.2"
+__version__ = "1.24.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index db313c3..7b71349 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -199,13 +199,27 @@ def parse_command_line(module, argv):
                     action += args
                     yield action
 
-        if getattr(opts, "parse_metadata", None) is None:
-            opts.parse_metadata = []
-        if opts.metafromtitle is not None:
-            opts.parse_metadata.append("title:%s" % opts.metafromtitle)
-            opts.metafromtitle = None
-        opts.parse_metadata = list(itertools.chain.from_iterable(map(
-            metadataparser_actions, opts.parse_metadata)))
+        parse_metadata = getattr(opts, "parse_metadata", None)
+        if isinstance(parse_metadata, dict):
+            if opts.metafromtitle is not None:
+                if "pre_process" not in parse_metadata:
+                    parse_metadata["pre_process"] = []
+                parse_metadata["pre_process"].append(
+                    "title:%s" % opts.metafromtitle)
+            opts.parse_metadata = {
+                k: list(itertools.chain.from_iterable(map(
+                        metadataparser_actions, v)))
+                for k, v in parse_metadata.items()
+            }
+        else:
+            if parse_metadata is None:
+                parse_metadata = []
+            if opts.metafromtitle is not None:
+                parse_metadata.append("title:%s" % opts.metafromtitle)
+            opts.parse_metadata = list(itertools.chain.from_iterable(map(
+                metadataparser_actions, parse_metadata)))
+
+        opts.metafromtitle = None
     else:
         opts.parse_metadata = ()
author	Unit 193 <unit193@unit193.net>	2023-01-11 04:09:13 -0500
committer	Unit 193 <unit193@unit193.net>	2023-01-11 04:09:13 -0500
commit	fe385c3ff784ba3d19454a35446502c0ec295893 (patch)
tree	897982793ef2a0c0f349044bf4cf803ccd483e6e /gallery_dl
parent	ebdfcd3cd3f76534a590ba08933ff7ea54813316 (diff)