diff options
| author | 2026-01-06 04:24:52 -0500 | |
|---|---|---|
| committer | 2026-01-06 04:24:52 -0500 | |
| commit | 385e4bfb1e426d23417ac788a6f44d639e226c89 (patch) | |
| tree | e64f04e19d63014d48e3b5272ce112c637236ba7 /gallery_dl | |
| parent | a24ec1647aeac35a63b744ea856011ad6e06be3b (diff) | |
New upstream version 1.31.2.upstream/1.31.2upstream
Diffstat (limited to 'gallery_dl')
188 files changed, 2730 insertions, 1163 deletions
diff --git a/gallery_dl/actions.py b/gallery_dl/actions.py index 5d2f645..1e4626a 100644 --- a/gallery_dl/actions.py +++ b/gallery_dl/actions.py @@ -257,15 +257,21 @@ def action_raise(opts): def action_abort(opts): - return None, util.raises(exception.StopExtraction) + def _abort(_): + raise exception.StopExtraction(opts or None) + return None, _abort def action_terminate(opts): - return None, util.raises(exception.TerminateExtraction) + def _terminate(_): + raise exception.TerminateExtraction(opts) + return None, _terminate def action_restart(opts): - return None, util.raises(exception.RestartExtraction) + def _restart(_): + raise exception.RestartExtraction(opts) + return None, _restart def action_exit(opts): @@ -274,7 +280,7 @@ def action_exit(opts): except ValueError: pass - def _exit(args): + def _exit(_): raise SystemExit(opts) return None, _exit diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 703dcca..4f380fb 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -484,6 +484,8 @@ MIME_TYPES = { "audio/webm" : "webm", "audio/ogg" : "ogg", "audio/mpeg" : "mp3", + "audio/aac" : "aac", + "audio/x-aac": "aac", "application/vnd.apple.mpegurl": "m3u8", "application/x-mpegurl" : "m3u8", @@ -540,6 +542,7 @@ SIGNATURE_CHECKS = { s[8:12] == b"WAVE"), "mp3" : lambda s: (s[0:3] == b"ID3" or s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")), + "aac" : lambda s: s[0:2] in (b"\xFF\xF9", b"\xFF\xF1"), "m3u8": lambda s: s[0:7] == b"#EXTM3U", "mpd" : lambda s: b"<MPD" in s, "zip" : lambda s: s[0:4] in (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"), diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index e9b3294..32fb7b9 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -41,6 +41,7 @@ class YoutubeDLDownloader(DownloaderBase): kwdict = pathfmt.kwdict tries = 0 + kwdict["_mtime_http"] = None if ytdl_instance := kwdict.pop("_ytdl_instance", None): # 'ytdl' extractor self._prepare(ytdl_instance) @@ -68,7 +69,7 @@ class YoutubeDLDownloader(DownloaderBase): self.log.debug("Using %s version %s", module, ytdl_version) self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL( - module, self, self.ytdl_opts) + module, self, self.ytdl_opts, kwdict.get("_ytdl_params")) if self.outtmpl == "default": self.outtmpl = module.DEFAULT_OUTTMPL self._prepare(ytdl_instance) diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py index 1f17c99..81a0a69 100644 --- a/gallery_dl/extractor/2ch.py +++ b/gallery_dl/extractor/2ch.py @@ -20,12 +20,12 @@ class _2chThreadExtractor(Extractor): directory_fmt = ("{category}", "{board}", "{thread} {title}") filename_fmt = "{tim}{filename:? //}.{extension}" archive_fmt = "{board}_{thread}_{tim}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)" example = "https://2ch.org/a/res/12345.html" def __init__(self, match): tld = match[1] - self.root = f"https://2ch.{'org' if tld == 'hk' else tld}" + self.root = "https://2ch." + ("org" if tld == "hk" else tld) Extractor.__init__(self, match) def items(self): @@ -66,19 +66,19 @@ class _2chBoardExtractor(Extractor): category = "2ch" subcategory = "board" root = "https://2ch.org" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/([^/?#]+)/?$" example = "https://2ch.org/a/" def __init__(self, match): tld = match[1] - self.root = f"https://2ch.{'su' if tld == 'hk' else tld}" + self.root = "https://2ch." + ("org" if tld == "hk" else tld) Extractor.__init__(self, match) def items(self): base = f"{self.root}/{self.groups[1]}" # index page - url = f"{base}/index.json" + url = base + "/index.json" index = self.request_json(url) index["_extractor"] = _2chThreadExtractor for thread in index["threads"]: diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py index 4456fd6..78b0aae 100644 --- a/gallery_dl/extractor/2chen.py +++ b/gallery_dl/extractor/2chen.py @@ -34,7 +34,7 @@ class _2chenThreadExtractor(_2chenExtractor): directory_fmt = ("{category}", "{board}", "{thread} {title}") filename_fmt = "{time} {filename}.{extension}" archive_fmt = "{board}_{thread}_{no}_{time}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)" example = "https://sturdychan.help/a/12345/" def items(self): @@ -84,7 +84,7 @@ class _2chenThreadExtractor(_2chenExtractor): class _2chenBoardExtractor(_2chenExtractor): """Extractor for 2chen boards""" subcategory = "board" - pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/catalog|/?$)" + pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)" example = "https://sturdychan.help/a/" def items(self): diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index b74bc90..748f23c 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -92,7 +92,7 @@ class _500pxExtractor(Extractor): class _500pxUserExtractor(_500pxExtractor): """Extractor for photos from a user's photostream on 500px.com""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])" + pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])" example = "https://500px.com/USER" def __init__(self, match): @@ -121,8 +121,8 @@ class _500pxGalleryExtractor(_500pxExtractor): """Extractor for photo galleries on 500px.com""" subcategory = "gallery" directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}") - pattern = (rf"{BASE_PATTERN}/(?!photo/)(?:p/)?" - rf"([^/?#]+)/galleries/([^/?#]+)") + pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?" + r"([^/?#]+)/galleries/([^/?#]+)") example = "https://500px.com/USER/galleries/GALLERY" def __init__(self, match): @@ -178,7 +178,7 @@ class _500pxGalleryExtractor(_500pxExtractor): class _500pxFavoriteExtractor(_500pxExtractor): """Extractor for favorite 500px photos""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/liked/?$" + pattern = BASE_PATTERN + r"/liked/?$" example = "https://500px.com/liked" def photos(self): @@ -202,7 +202,7 @@ class _500pxFavoriteExtractor(_500pxExtractor): class _500pxImageExtractor(_500pxExtractor): """Extractor for individual images from 500px.com""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/photo/(\d+)" + pattern = BASE_PATTERN + r"/photo/(\d+)" example = "https://500px.com/photo/12345/TITLE" def __init__(self, match): diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index 3230182..f7e170f 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -69,7 +69,7 @@ class _8chanThreadExtractor(_8chanExtractor): "{threadId} {subject[:50]}") filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}" archive_fmt = "{boardUri}_{postId}_{num}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/(?:res|last)/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)" example = "https://8chan.moe/a/res/12345.html" def items(self): @@ -107,7 +107,7 @@ class _8chanThreadExtractor(_8chanExtractor): class _8chanBoardExtractor(_8chanExtractor): """Extractor for 8chan boards""" subcategory = "board" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/(?:(\d+)\.html)?$" + pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$" example = "https://8chan.moe/a/" def items(self): diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 64134d0..9f18204 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -23,6 +23,7 @@ modules = [ "8muses", "adultempire", "agnph", + "ahottie", "ao3", "arcalive", "architizer", @@ -233,6 +234,7 @@ modules = [ "weebcentral", "weebdex", "weibo", + "whyp", "wikiart", "wikifeet", "wikimedia", @@ -242,6 +244,7 @@ modules = [ "xhamster", "xvideos", "yiffverse", + "yourlesbians", "zerochan", "booru", "moebooru", diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py index 55b17c7..be4517a 100644 --- a/gallery_dl/extractor/agnph.py +++ b/gallery_dl/extractor/agnph.py @@ -81,7 +81,7 @@ class AgnphTagExtractor(AgnphExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/gallery/post/(?:\?([^#]+))?$" + pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$" example = "https://agn.ph/gallery/post/?search=TAG" def __init__(self, match): @@ -99,7 +99,7 @@ class AgnphTagExtractor(AgnphExtractor): class AgnphPostExtractor(AgnphExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/gallery/post/show/(\d+)" + pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)" example = "https://agn.ph/gallery/post/show/12345/" def posts(self): diff --git a/gallery_dl/extractor/ahottie.py b/gallery_dl/extractor/ahottie.py new file mode 100644 index 0000000..f8db0d4 --- /dev/null +++ b/gallery_dl/extractor/ahottie.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://ahottie.top/""" + +from .common import Extractor, GalleryExtractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?ahottie\.top" + + +class AhottieExtractor(Extractor): + """Base class for ahottie extractors""" + category = "ahottie" + root = "https://ahottie.top" + + def items(self): + for album in self.albums(): + yield Message.Queue, album["url"], album + + def _pagination(self, url, params): + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + + for album in text.extract_iter( + page, '<div class="relative">', '</div>'): + yield { + "url" : text.extr(album, ' href="', '"'), + "title": text.unquote(text.extr( + album, ' alt="', '"')), + "date" : self.parse_datetime_iso(text.extr( + album, ' datetime="', '"')), + "_extractor": AhottieGalleryExtractor, + } + + if 'rel="next"' not in page: + break + params["page"] += 1 + + +class AhottieGalleryExtractor(GalleryExtractor, AhottieExtractor): + directory_fmt = ("{category}", "{date:%Y-%m-%d} {title} ({gallery_id})") + filename_fmt = "{num:>03}.{extension}" + archive_fmt = "{gallery_id}_{num}_{filename}" + pattern = BASE_PATTERN + r"(/albums/(\w+))" + example = "https://ahottie.top/albums/1234567890" + + def metadata(self, page): + extr = text.extract_from(page) + return { + "gallery_id": self.groups[1], + "title": text.unescape(extr("<title>", "<").rpartition(" | ")[0]), + "date" : self.parse_datetime_iso(extr('datetime="', '"')), + "tags" : text.split_html(extr('<i ', '</div>'))[1:], + } + + def images(self, page): + pos = page.find("<time ") + 1 + data = { + "_http_headers" : {"Referer": None}, + "_http_validate": self._validate, + } + return [ + (url, data) + for url in text.extract_iter(page, '" src="', '"', pos) + ] + + def _validate(self, response): + hget = response.headers.get + return not ( + hget("content-length") == "2421" and + hget("content-type") == "image/jpeg" + ) + + +class AhottieTagExtractor(AhottieExtractor): + subcategory = "tag" + pattern = BASE_PATTERN + r"/tags/([^/?#]+)" + example = "https://ahottie.top/tags/TAG" + + def albums(self): + tag = self.groups[0] + self.kwdict["search_tags"] = text.unquote(tag) + return self._pagination(f"{self.root}/tags/{tag}", {}) + + +class AhottieSearchExtractor(AhottieExtractor): + subcategory = "search" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" + example = "https://ahottie.top/search?kw=QUERY" + + def albums(self): + params = text.parse_query(self.groups[0]) + self.kwdict["search_tags"] = params.get("kw") + return self._pagination(f"{self.root}/search", params) diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py index 716492e..6c044a6 100644 --- a/gallery_dl/extractor/ao3.py +++ b/gallery_dl/extractor/ao3.py @@ -118,7 +118,7 @@ class Ao3WorkExtractor(Ao3Extractor): directory_fmt = ("{category}", "{author}") filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}.{extension}" - pattern = rf"{BASE_PATTERN}/works/(\d+)" + pattern = BASE_PATTERN + r"/works/(\d+)" example = "https://archiveofourown.org/works/12345" def _init(self): @@ -233,28 +233,28 @@ class Ao3WorkExtractor(Ao3Extractor): class Ao3SeriesExtractor(Ao3Extractor): """Extractor for AO3 works of a series""" subcategory = "series" - pattern = rf"{BASE_PATTERN}(/series/(\d+))" + pattern = BASE_PATTERN + r"(/series/(\d+))" example = "https://archiveofourown.org/series/12345" class Ao3TagExtractor(Ao3Extractor): """Extractor for AO3 works by tag""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}(/tags/([^/?#]+)/works(?:/?\?.+)?)" + pattern = BASE_PATTERN + r"(/tags/([^/?#]+)/works(?:/?\?.+)?)" example = "https://archiveofourown.org/tags/TAG/works" class Ao3SearchExtractor(Ao3Extractor): """Extractor for AO3 search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}(/works/search/?\?.+)" + pattern = BASE_PATTERN + r"(/works/search/?\?.+)" example = "https://archiveofourown.org/works/search?work_search[query]=air" class Ao3UserExtractor(Dispatch, Ao3Extractor): """Extractor for an AO3 user profile""" - pattern = (rf"{BASE_PATTERN}/users/([^/?#]+(?:/pseuds/[^/?#]+)?)" - rf"(?:/profile)?/?(?:$|\?|#)") + pattern = (BASE_PATTERN + r"/users/([^/?#]+(?:/pseuds/[^/?#]+)?)" + r"(?:/profile)?/?(?:$|\?|#)") example = "https://archiveofourown.org/users/USER" def items(self): @@ -269,16 +269,16 @@ class Ao3UserExtractor(Dispatch, Ao3Extractor): class Ao3UserWorksExtractor(Ao3Extractor): """Extractor for works of an AO3 user""" subcategory = "user-works" - pattern = (rf"{BASE_PATTERN}(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?" - rf"works(?:/?\?.+)?)") + pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?" + r"works(?:/?\?.+)?)") example = "https://archiveofourown.org/users/USER/works" class Ao3UserSeriesExtractor(Ao3Extractor): """Extractor for series of an AO3 user""" subcategory = "user-series" - pattern = (rf"{BASE_PATTERN}(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?" - rf"series(?:/?\?.+)?)") + pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?" + r"series(?:/?\?.+)?)") example = "https://archiveofourown.org/users/USER/series" def items(self): @@ -297,8 +297,8 @@ class Ao3UserSeriesExtractor(Ao3Extractor): class Ao3UserBookmarkExtractor(Ao3Extractor): """Extractor for bookmarked works of an AO3 user""" subcategory = "user-bookmark" - pattern = (rf"{BASE_PATTERN}(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?" - rf"bookmarks(?:/?\?.+)?)") + pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?" + r"bookmarks(?:/?\?.+)?)") example = "https://archiveofourown.org/users/USER/bookmarks" def items(self): @@ -308,7 +308,7 @@ class Ao3UserBookmarkExtractor(Ao3Extractor): class Ao3SubscriptionsExtractor(Ao3Extractor): """Extractor for your AO3 account's subscriptions""" subcategory = "subscriptions" - pattern = rf"{BASE_PATTERN}(/users/([^/?#]+)/subscriptions(?:/?\?.+)?)" + pattern = BASE_PATTERN + r"(/users/([^/?#]+)/subscriptions(?:/?\?.+)?)" example = "https://archiveofourown.org/users/USER/subscriptions" def items(self): diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py index f950d14..d48e4f5 100644 --- a/gallery_dl/extractor/arcalive.py +++ b/gallery_dl/extractor/arcalive.py @@ -36,7 +36,7 @@ class ArcalivePostExtractor(ArcaliveExtractor): directory_fmt = ("{category}", "{boardSlug}") filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}" archive_fmt = "{id}_{num}" - pattern = rf"{BASE_PATTERN}/b/(?:\w+)/(\d+)" + pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)" example = "https://arca.live/b/breaking/123456789" def items(self): @@ -84,7 +84,7 @@ class ArcalivePostExtractor(ArcaliveExtractor): url = src fallback = () - query = f"?type=orig&{query}" + query = "?type=orig&" + query if orig := text.extr(media, 'data-orig="', '"'): path, _, ext = url.rpartition(".") if ext != orig: @@ -115,7 +115,7 @@ class ArcalivePostExtractor(ArcaliveExtractor): class ArcaliveBoardExtractor(ArcaliveExtractor): """Extractor for an arca.live board's posts""" subcategory = "board" - pattern = rf"{BASE_PATTERN}/b/([^/?#]+)/?(?:\?([^#]+))?$" + pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$" example = "https://arca.live/b/breaking" def articles(self): @@ -127,7 +127,7 @@ class ArcaliveBoardExtractor(ArcaliveExtractor): class ArcaliveUserExtractor(ArcaliveExtractor): """Extractor for an arca.live users's posts""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/u/@([^/?#]+)/?(?:\?([^#]+))?$" + pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$" example = "https://arca.live/u/@USER" def articles(self): @@ -169,8 +169,11 @@ class ArcaliveAPI(): return data self.log.debug("Server response: %s", data) - msg = f": {msg}" if (msg := data.get("message")) else "" - raise exception.AbortExtraction(f"API request failed{msg}") + if msg := data.get("message"): + msg = "API request failed: " + msg + else: + msg = "API request failed" + raise exception.AbortExtraction(msg) def _pagination(self, endpoint, params, key): while True: diff --git a/gallery_dl/extractor/arena.py b/gallery_dl/extractor/arena.py index ada2fa1..6f3fa96 100644 --- a/gallery_dl/extractor/arena.py +++ b/gallery_dl/extractor/arena.py @@ -24,8 +24,8 @@ class ArenaChannelExtractor(GalleryExtractor): example = "https://are.na/evan-collins-1522646491/cassette-futurism" def metadata(self, page): - channel = self.request_json( - f"https://api.are.na/v2/channels/{self.groups[0]}") + url = "https://api.are.na/v2/channels/" + self.groups[0] + channel = self.request_json(url) channel["date"] = self.parse_datetime_iso( channel["created_at"]) diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index f1b55ce..9c0f5ed 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -95,7 +95,7 @@ class ArtstationExtractor(Extractor): if not self.external: return asset["extension"] = "mp4" - return f"ytdl:{url}" + return "ytdl:" + url self.log.debug(player) self.log.warning("Failed to extract embedded player URL (%s)", @@ -328,9 +328,9 @@ class ArtstationChallengeExtractor(ArtstationExtractor): def items(self): base = f"{self.root}/contests/_/challenges/{self.challenge_id}" - challenge_url = f"{base}.json" - submission_url = f"{base}/submissions.json" - update_url = f"{self.root}/contests/submission_updates.json" + challenge_url = base + ".json" + submission_url = base + "/submissions.json" + update_url = self.root + "/contests/submission_updates.json" challenge = self.request_json(challenge_url) yield Message.Directory, "", {"challenge": challenge} @@ -388,7 +388,7 @@ class ArtstationSearchExtractor(ArtstationExtractor): "value" : value.split(","), }) - url = f"{self.root}/api/v2/search/projects.json" + url = self.root + "/api/v2/search/projects.json" data = { "query" : self.query, "page" : None, @@ -419,7 +419,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor): return {"artwork": self.query} def projects(self): - url = f"{self.root}/projects.json" + url = self.root + "/projects.json" return self._pagination(url, self.query.copy()) diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 5e5d1f2..686adb8 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -211,7 +211,7 @@ class AryionGalleryExtractor(AryionExtractor): """Extractor for a user's gallery on eka's portal""" subcategory = "gallery" categorytransfer = True - pattern = rf"{BASE_PATTERN}/(?:gallery/|user/|latest.php\?name=)([^/?#]+)" + pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)" example = "https://aryion.com/g4/gallery/USER" def _init(self): @@ -238,7 +238,7 @@ class AryionFavoriteExtractor(AryionExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user!l}", "favorites", "{folder}") archive_fmt = "f_{user}_{id}" - pattern = rf"{BASE_PATTERN}/favorites/([^/?#]+)(?:/([^?#]+))?" + pattern = BASE_PATTERN + r"/favorites/([^/?#]+)(?:/([^?#]+))?" example = "https://aryion.com/g4/favorites/USER" def _init(self): @@ -253,7 +253,7 @@ class AryionWatchExtractor(AryionExtractor): """Extractor for your watched users and tags""" subcategory = "watch" directory_fmt = ("{category}", "{user!l}",) - pattern = rf"{BASE_PATTERN}/messagepage\.php()" + pattern = BASE_PATTERN + r"/messagepage\.php()" example = "https://aryion.com/g4/messagepage.php" def posts(self): @@ -271,7 +271,7 @@ class AryionTagExtractor(AryionExtractor): subcategory = "tag" directory_fmt = ("{category}", "tags", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/tags\.php\?([^#]+)" + pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)" example = "https://aryion.com/g4/tags.php?tag=TAG" def _init(self): @@ -293,7 +293,7 @@ class AryionSearchExtractor(AryionExtractor): "{search[q]|search[tags]|search[user]}") archive_fmt = ("s_{search[prefix]}" "{search[q]|search[tags]|search[user]}_{id}") - pattern = rf"{BASE_PATTERN}/search\.php\?([^#]+)" + pattern = BASE_PATTERN + r"/search\.php\?([^#]+)" example = "https://aryion.com/g4/search.php?q=TEXT&tags=TAGS&user=USER" def metadata(self): @@ -313,7 +313,7 @@ class AryionSearchExtractor(AryionExtractor): class AryionPostExtractor(AryionExtractor): """Extractor for individual posts on eka's portal""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/view/(\d+)" + pattern = BASE_PATTERN + r"/view/(\d+)" example = "https://aryion.com/g4/view/12345" def posts(self): diff --git a/gallery_dl/extractor/audiochan.py b/gallery_dl/extractor/audiochan.py index b708ce7..ccd05f3 100644 --- a/gallery_dl/extractor/audiochan.py +++ b/gallery_dl/extractor/audiochan.py @@ -103,7 +103,7 @@ class AudiochanExtractor(Extractor): class AudiochanAudioExtractor(AudiochanExtractor): subcategory = "audio" - pattern = rf"{BASE_PATTERN}/a/([^/?#]+)" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://audiochan.com/a/SLUG" def posts(self): @@ -114,7 +114,7 @@ class AudiochanAudioExtractor(AudiochanExtractor): class AudiochanUserExtractor(AudiochanExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/u/([^/?#]+)" + pattern = BASE_PATTERN + r"/u/([^/?#]+)" example = "https://audiochan.com/u/USER" def posts(self): @@ -130,7 +130,7 @@ class AudiochanUserExtractor(AudiochanExtractor): class AudiochanCollectionExtractor(AudiochanExtractor): subcategory = "collection" - pattern = rf"{BASE_PATTERN}/c/([^/?#]+)" + pattern = BASE_PATTERN + r"/c/([^/?#]+)" example = "https://audiochan.com/c/SLUG" def posts(self): @@ -146,7 +146,7 @@ class AudiochanCollectionExtractor(AudiochanExtractor): class AudiochanSearchExtractor(AudiochanExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/search/?\?([^#]+)" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" example = "https://audiochan.com/search?q=QUERY" def posts(self): diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index f8e803b..ecbe9eb 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -80,7 +80,7 @@ class BatotoBase(): class BatotoChapterExtractor(BatotoBase, ChapterExtractor): """Extractor for batoto manga chapters""" archive_fmt = "{chapter_id}_{page}" - pattern = rf"{BASE_PATTERN}/(?:title/[^/?#]+|chapter)/(\d+)" + pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): @@ -127,11 +127,13 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor): } def images(self, page): - images_container = text.extr(page, 'pageOpts', ':[0,0]}"') - images_container = text.unescape(images_container) + container = text.unescape(text.extr(page, 'pageOpts', ':[0,0]}"')) + return [ - (url, None) - for url in text.extract_iter(images_container, r"\"", r"\"") + ((url.replace("://k", "://n", 1) + if url.startswith("https://k") and ".mb" in url else + url), None) + for url in text.extract_iter(container, r"\"", r"\"") ] @@ -139,8 +141,8 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor): """Extractor for batoto manga""" reverse = False chapterclass = BatotoChapterExtractor - pattern = (rf"{BASE_PATTERN}" - rf"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$") + pattern = (BASE_PATTERN + + r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$") example = "https://xbato.org/title/12345-MANGA/" def __init__(self, match): diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index cb357d1..9d4854d 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -21,7 +21,7 @@ class BbcGalleryExtractor(GalleryExtractor): directory_fmt = ("{category}", "{path:I}") filename_fmt = "{num:>02}.{extension}" archive_fmt = "{programme}_{num}" - pattern = rf"{BASE_PATTERN}[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$" + pattern = BASE_PATTERN + r"[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$" example = "https://www.bbc.co.uk/programmes/PATH" def metadata(self, page): @@ -71,7 +71,7 @@ class BbcProgrammeExtractor(Extractor): category = "bbc" subcategory = "programme" root = "https://www.bbc.co.uk" - pattern = rf"{BASE_PATTERN}[^/?#]+/galleries)(?:/?\?page=(\d+))?" + pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?" example = "https://www.bbc.co.uk/programmes/ID/galleries" def items(self): diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py index 33f4ad3..b9ec3d9 100644 --- a/gallery_dl/extractor/bellazon.py +++ b/gallery_dl/extractor/bellazon.py @@ -24,7 +24,7 @@ class BellazonExtractor(Extractor): archive_fmt = "{post[id]}/{id}_{filename}" def items(self): - native = (f"{self.root}/", f"{self.root[6:]}/") + native = (self.root + "/", self.root[6:] + "/") extract_urls = text.re( r'(?s)<(' r'(?:video .*?<source [^>]*?src|a [^>]*?href)="([^"]+).*?</a>' @@ -82,7 +82,7 @@ class BellazonExtractor(Extractor): dc["extension"] = text.ext_from_url(url) if url[0] == "/": - url = f"https:{url}" + url = "https:" + url yield Message.Url, url, dc else: @@ -91,10 +91,10 @@ class BellazonExtractor(Extractor): yield Message.Queue, url, data def _pagination(self, base, pnum=None): - base = f"{self.root}{base}" + base = self.root + base if pnum is None: - url = f"{base}/" + url = base + "/" pnum = 1 else: url = f"{base}/page/{pnum}/" @@ -112,7 +112,7 @@ class BellazonExtractor(Extractor): url = f"{base}/page/{pnum}/" def _pagination_reverse(self, base, pnum=None): - base = f"{self.root}{base}" + base = self.root + base url = f"{base}/page/{'9999' if pnum is None else pnum}/" with self.request(url) as response: @@ -127,7 +127,7 @@ class BellazonExtractor(Extractor): if pnum > 1: url = f"{base}/page/{pnum}/" elif pnum == 1: - url = f"{base}/" + url = base + "/" else: return @@ -192,15 +192,15 @@ class BellazonExtractor(Extractor): class BellazonPostExtractor(BellazonExtractor): subcategory = "post" - pattern = (rf"{BASE_PATTERN}(/topic/\d+-[\w-]+(?:/page/\d+)?)" - rf"/?#(?:findC|c)omment-(\d+)") + pattern = (BASE_PATTERN + r"(/topic/\d+-[\w-]+(?:/page/\d+)?)" + r"/?#(?:findC|c)omment-(\d+)") example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345" def posts(self): path, post_id = self.groups - page = self.request(f"{self.root}{path}").text + page = self.request(self.root + path).text - pos = page.find(f'id="elComment_{post_id}') + pos = page.find('id="elComment_' + post_id) if pos < 0: raise exception.NotFoundError("post") html = text.extract(page, "<article ", "</article>", pos-100)[0] @@ -211,7 +211,7 @@ class BellazonPostExtractor(BellazonExtractor): class BellazonThreadExtractor(BellazonExtractor): subcategory = "thread" - pattern = rf"{BASE_PATTERN}(/topic/\d+-[\w-]+)(?:/page/(\d+))?" + pattern = BASE_PATTERN + r"(/topic/\d+-[\w-]+)(?:/page/(\d+))?" example = "https://www.bellazon.com/main/topic/123-SLUG/" def posts(self): @@ -236,7 +236,7 @@ class BellazonThreadExtractor(BellazonExtractor): class BellazonForumExtractor(BellazonExtractor): subcategory = "forum" - pattern = rf"{BASE_PATTERN}(/forum/\d+-[\w-]+)(?:/page/(\d+))?" + pattern = BASE_PATTERN + r"(/forum/\d+-[\w-]+)(?:/page/(\d+))?" example = "https://www.bellazon.com/main/forum/123-SLUG/" def items(self): diff --git a/gallery_dl/extractor/bilibili.py b/gallery_dl/extractor/bilibili.py index fe10150..d9a942d 100644 --- a/gallery_dl/extractor/bilibili.py +++ b/gallery_dl/extractor/bilibili.py @@ -146,7 +146,7 @@ class BilibiliAPI(): except Exception: if "window._riskdata_" not in page: raise exception.AbortExtraction( - f"{article_id}: Unable to extract INITIAL_STATE data") + article_id + ": Unable to extract INITIAL_STATE data") self.extractor.wait(seconds=300) def user_favlist(self): diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 766272f..513a768 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -117,7 +117,7 @@ BASE_PATTERN = BloggerExtractor.update({ class BloggerPostExtractor(BloggerExtractor): """Extractor for a single blog post""" subcategory = "post" - pattern = rf"{BASE_PATTERN}(/\d\d\d\d/\d\d/[^/?#]+\.html)" + pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)" example = "https://BLOG.blogspot.com/1970/01/TITLE.html" def posts(self, blog): @@ -127,7 +127,7 @@ class BloggerPostExtractor(BloggerExtractor): class BloggerBlogExtractor(BloggerExtractor): """Extractor for an entire Blogger blog""" subcategory = "blog" - pattern = rf"{BASE_PATTERN}/?$" + pattern = BASE_PATTERN + r"/?$" example = "https://BLOG.blogspot.com/" def posts(self, blog): @@ -137,7 +137,7 @@ class BloggerBlogExtractor(BloggerExtractor): class BloggerSearchExtractor(BloggerExtractor): """Extractor for Blogger search resuls""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/search/?\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)" example = "https://BLOG.blogspot.com/search?q=QUERY" def metadata(self): @@ -151,7 +151,7 @@ class BloggerSearchExtractor(BloggerExtractor): class BloggerLabelExtractor(BloggerExtractor): """Extractor for Blogger posts by label""" subcategory = "label" - pattern = rf"{BASE_PATTERN}/search/label/([^/?#]+)" + pattern = BASE_PATTERN + r"/search/label/([^/?#]+)" example = "https://BLOG.blogspot.com/search/label/LABEL" def metadata(self): diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index c981608..6e7b344 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -14,7 +14,7 @@ from ..cache import cache, memcache BASE_PATTERN = (r"(?:https?://)?" r"(?:(?:www\.)?(?:c|[fv]x)?bs[ky]y[ex]?\.app|main\.bsky\.dev)") -USER_PATTERN = rf"{BASE_PATTERN}/profile/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)" class BlueskyExtractor(Extractor): @@ -216,7 +216,7 @@ class BlueskyExtractor(Extractor): class BlueskyUserExtractor(Dispatch, BlueskyExtractor): - pattern = rf"{USER_PATTERN}$" + pattern = USER_PATTERN + r"$" example = "https://bsky.app/profile/HANDLE" def items(self): @@ -237,7 +237,7 @@ class BlueskyUserExtractor(Dispatch, BlueskyExtractor): class BlueskyPostsExtractor(BlueskyExtractor): subcategory = "posts" - pattern = rf"{USER_PATTERN}/posts" + pattern = USER_PATTERN + r"/posts" example = "https://bsky.app/profile/HANDLE/posts" def posts(self): @@ -247,7 +247,7 @@ class BlueskyPostsExtractor(BlueskyExtractor): class BlueskyRepliesExtractor(BlueskyExtractor): subcategory = "replies" - pattern = rf"{USER_PATTERN}/replies" + pattern = USER_PATTERN + r"/replies" example = "https://bsky.app/profile/HANDLE/replies" def posts(self): @@ -257,7 +257,7 @@ class BlueskyRepliesExtractor(BlueskyExtractor): class BlueskyMediaExtractor(BlueskyExtractor): subcategory = "media" - pattern = rf"{USER_PATTERN}/media" + pattern = USER_PATTERN + r"/media" example = "https://bsky.app/profile/HANDLE/media" def posts(self): @@ -267,7 +267,7 @@ class BlueskyMediaExtractor(BlueskyExtractor): class BlueskyVideoExtractor(BlueskyExtractor): subcategory = "video" - pattern = rf"{USER_PATTERN}/video" + pattern = USER_PATTERN + r"/video" example = "https://bsky.app/profile/HANDLE/video" def posts(self): @@ -277,7 +277,7 @@ class BlueskyVideoExtractor(BlueskyExtractor): class BlueskyLikesExtractor(BlueskyExtractor): subcategory = "likes" - pattern = rf"{USER_PATTERN}/likes" + pattern = USER_PATTERN + r"/likes" example = "https://bsky.app/profile/HANDLE/likes" def posts(self): @@ -288,7 +288,7 @@ class BlueskyLikesExtractor(BlueskyExtractor): class BlueskyFeedExtractor(BlueskyExtractor): subcategory = "feed" - pattern = rf"{USER_PATTERN}/feed/([^/?#]+)" + pattern = USER_PATTERN + r"/feed/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/feed/NAME" def posts(self): @@ -298,7 +298,7 @@ class BlueskyFeedExtractor(BlueskyExtractor): class BlueskyListExtractor(BlueskyExtractor): subcategory = "list" - pattern = rf"{USER_PATTERN}/lists/([^/?#]+)" + pattern = USER_PATTERN + r"/lists/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/lists/ID" def posts(self): @@ -308,7 +308,7 @@ class BlueskyListExtractor(BlueskyExtractor): class BlueskyFollowingExtractor(BlueskyExtractor): subcategory = "following" - pattern = rf"{USER_PATTERN}/follows" + pattern = USER_PATTERN + r"/follows" example = "https://bsky.app/profile/HANDLE/follows" def items(self): @@ -320,7 +320,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor): class BlueskyPostExtractor(BlueskyExtractor): subcategory = "post" - pattern = rf"{USER_PATTERN}/post/([^/?#]+)" + pattern = USER_PATTERN + r"/post/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/post/ID" def posts(self): @@ -330,7 +330,7 @@ class BlueskyPostExtractor(BlueskyExtractor): class BlueskyInfoExtractor(BlueskyExtractor): subcategory = "info" - pattern = rf"{USER_PATTERN}/info" + pattern = USER_PATTERN + r"/info" example = "https://bsky.app/profile/HANDLE/info" def items(self): @@ -342,7 +342,7 @@ class BlueskyInfoExtractor(BlueskyExtractor): class BlueskyAvatarExtractor(BlueskyExtractor): subcategory = "avatar" filename_fmt = "avatar_{post_id}.{extension}" - pattern = rf"{USER_PATTERN}/avatar" + pattern = USER_PATTERN + r"/avatar" example = "https://bsky.app/profile/HANDLE/avatar" def posts(self): @@ -352,7 +352,7 @@ class BlueskyAvatarExtractor(BlueskyExtractor): class BlueskyBackgroundExtractor(BlueskyExtractor): subcategory = "background" filename_fmt = "background_{post_id}.{extension}" - pattern = rf"{USER_PATTERN}/ba(?:nner|ckground)" + pattern = USER_PATTERN + r"/ba(?:nner|ckground)" example = "https://bsky.app/profile/HANDLE/banner" def posts(self): @@ -361,7 +361,7 @@ class BlueskyBackgroundExtractor(BlueskyExtractor): class BlueskySearchExtractor(BlueskyExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/search(?:/|\?q=)(.+)" + pattern = BASE_PATTERN + r"/search(?:/|\?q=)(.+)" example = "https://bsky.app/search?q=QUERY" def posts(self): @@ -371,7 +371,7 @@ class BlueskySearchExtractor(BlueskyExtractor): class BlueskyHashtagExtractor(BlueskyExtractor): subcategory = "hashtag" - pattern = rf"{BASE_PATTERN}/hashtag/([^/?#]+)(?:/(top|latest))?" + pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)(?:/(top|latest))?" example = "https://bsky.app/hashtag/NAME" def posts(self): @@ -381,7 +381,7 @@ class BlueskyHashtagExtractor(BlueskyExtractor): class BlueskyBookmarkExtractor(BlueskyExtractor): subcategory = "bookmark" - pattern = rf"{BASE_PATTERN}/saved" + pattern = BASE_PATTERN + r"/saved" example = "https://bsky.app/saved" def posts(self): diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py index 5add768..01ecf59 100644 --- a/gallery_dl/extractor/boosty.py +++ b/gallery_dl/extractor/boosty.py @@ -163,7 +163,7 @@ class BoostyExtractor(Extractor): class BoostyUserExtractor(BoostyExtractor): """Extractor for boosty.to user profiles""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:\?([^#]+))?$" + pattern = BASE_PATTERN + r"/([^/?#]+)(?:\?([^#]+))?$" example = "https://boosty.to/USER" def posts(self): @@ -179,7 +179,7 @@ class BoostyMediaExtractor(BoostyExtractor): subcategory = "media" directory_fmt = "{category}", "{user[blogUrl]} ({user[id]})", "media" filename_fmt = "{post[id]}_{num}.{extension}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/media/([^/?#]+)(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/([^/?#]+)/media/([^/?#]+)(?:\?([^#]+))?" example = "https://boosty.to/USER/media/all" def posts(self): @@ -192,7 +192,7 @@ class BoostyMediaExtractor(BoostyExtractor): class BoostyFeedExtractor(BoostyExtractor): """Extractor for your boosty.to subscription feed""" subcategory = "feed" - pattern = rf"{BASE_PATTERN}/(?:\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/(?:\?([^#]+))?(?:$|#)" example = "https://boosty.to/" def posts(self): @@ -203,7 +203,7 @@ class BoostyFeedExtractor(BoostyExtractor): class BoostyPostExtractor(BoostyExtractor): """Extractor for boosty.to posts""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/posts/([0-9a-f-]+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/posts/([0-9a-f-]+)" example = "https://boosty.to/USER/posts/01234567-89ab-cdef-0123-456789abcd" def posts(self): @@ -216,7 +216,7 @@ class BoostyPostExtractor(BoostyExtractor): class BoostyFollowingExtractor(BoostyExtractor): """Extractor for your boosty.to subscribed users""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/app/settings/subscriptions" + pattern = BASE_PATTERN + r"/app/settings/subscriptions" example = "https://boosty.to/app/settings/subscriptions" def items(self): @@ -231,7 +231,7 @@ class BoostyDirectMessagesExtractor(BoostyExtractor): subcategory = "direct-messages" directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})", "Direct Messages") - pattern = rf"{BASE_PATTERN}/app/messages/?\?dialogId=(\d+)" + pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)" example = "https://boosty.to/app/messages?dialogId=12345" def items(self): @@ -424,7 +424,7 @@ class BoostyAPI(): params["offset"] = offset def dialog(self, dialog_id): - endpoint = f"/v1/dialog/{dialog_id}" + endpoint = "/v1/dialog/" + dialog_id return self._call(endpoint) def dialog_messages(self, dialog_id, limit=300, offset=None): diff --git a/gallery_dl/extractor/booth.py b/gallery_dl/extractor/booth.py index 3c000b1..c232c58 100644 --- a/gallery_dl/extractor/booth.py +++ b/gallery_dl/extractor/booth.py @@ -116,7 +116,7 @@ class BoothShopExtractor(BoothExtractor): BoothExtractor.__init__(self, match) def shop_items(self): - return self._pagination(f"{self.root}/items") + return self._pagination(self.root + "/items") def _fallback(url): diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index ed9cd0f..93df645 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -64,7 +64,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): root_dl = "https://get.bunkrr.su" root_api = "https://apidl.bunkr.ru" archive_fmt = "{album_id}_{id|id_url|slug}" - pattern = rf"{BASE_PATTERN}/a/([^/?#]+)" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://bunkr.si/a/ID" def __init__(self, match): @@ -189,7 +189,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): json={"id": data_id}) if data.get("encrypted"): - key = f"SECRET_KEY_{data['timestamp'] // 3600}" + key = "SECRET_KEY_" + str(data["timestamp"] // 3600) file_url = util.decrypt_xor(data["url"], key.encode()) else: file_url = data["url"] @@ -202,7 +202,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): } def _validate(self, response): - if response.history and response.url.endswith("/maintenance-vid.mp4"): + if response.history and response.url.endswith( + ("/maint.mp4", "/maintenance-vid.mp4")): self.log.warning("File server in maintenance mode") return False return True @@ -216,12 +217,12 @@ class BunkrMediaExtractor(BunkrAlbumExtractor): """Extractor for bunkr.si media links""" subcategory = "media" directory_fmt = ("{category}",) - pattern = rf"{BASE_PATTERN}(/[fvid]/[^/?#]+)" + pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)" example = "https://bunkr.si/f/FILENAME" def fetch_album(self, album_id): try: - page = self.request(f"{self.root}{album_id}").text + page = self.request(self.root + album_id).text data_id = text.extr(page, 'data-file-id="', '"') file = self._extract_file(data_id) file["name"] = text.unquote(text.unescape(text.extr( diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py index 9a766d0..0b40fb9 100644 --- a/gallery_dl/extractor/chevereto.py +++ b/gallery_dl/extractor/chevereto.py @@ -60,7 +60,7 @@ BASE_PATTERN = CheveretoExtractor.update({ class CheveretoImageExtractor(CheveretoExtractor): """Extractor for chevereto images""" subcategory = "image" - pattern = rf"{BASE_PATTERN}(/im(?:g|age)/[^/?#]+)" + pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)" example = "https://jpg7.cr/img/TITLE.ID" def items(self): @@ -98,7 +98,7 @@ class CheveretoImageExtractor(CheveretoExtractor): class CheveretoVideoExtractor(CheveretoExtractor): """Extractor for chevereto videos""" subcategory = "video" - pattern = rf"{BASE_PATTERN}(/video/[^/?#]+)" + pattern = BASE_PATTERN + r"(/video/[^/?#]+)" example = "https://imagepond.net/video/TITLE.ID" def items(self): @@ -145,7 +145,7 @@ class CheveretoVideoExtractor(CheveretoExtractor): class CheveretoAlbumExtractor(CheveretoExtractor): """Extractor for chevereto albums""" subcategory = "album" - pattern = rf"{BASE_PATTERN}(/a(?:lbum)?/[^/?#]+(?:/sub)?)" + pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)" example = "https://jpg7.cr/album/TITLE.ID" def items(self): @@ -182,7 +182,7 @@ class CheveretoAlbumExtractor(CheveretoExtractor): class CheveretoCategoryExtractor(CheveretoExtractor): """Extractor for chevereto galleries""" subcategory = "category" - pattern = rf"{BASE_PATTERN}(/category/[^/?#]+)" + pattern = BASE_PATTERN + r"(/category/[^/?#]+)" example = "https://imglike.com/category/TITLE" def items(self): @@ -194,7 +194,7 @@ class CheveretoCategoryExtractor(CheveretoExtractor): class CheveretoUserExtractor(CheveretoExtractor): """Extractor for chevereto users""" subcategory = "user" - pattern = rf"{BASE_PATTERN}(/[^/?#]+(?:/albums)?)" + pattern = BASE_PATTERN + r"(/[^/?#]+(?:/albums)?)" example = "https://jpg7.cr/USER" def items(self): diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py index c68af2e..d7dab48 100644 --- a/gallery_dl/extractor/cien.py +++ b/gallery_dl/extractor/cien.py @@ -48,7 +48,7 @@ class CienArticleExtractor(CienExtractor): filename_fmt = "{num:>02} {filename}.{extension}" directory_fmt = ("{category}", "{author[name]}", "{post_id} {name}") archive_fmt = "{post_id}_{num}" - pattern = rf"{BASE_PATTERN}/creator/(\d+)/article/(\d+)" + pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)" example = "https://ci-en.net/creator/123/article/12345" def items(self): @@ -160,7 +160,7 @@ class CienArticleExtractor(CienExtractor): class CienCreatorExtractor(CienExtractor): subcategory = "creator" - pattern = rf"{BASE_PATTERN}/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$" + pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$" example = "https://ci-en.net/creator/123" def items(self): @@ -172,7 +172,7 @@ class CienCreatorExtractor(CienExtractor): class CienRecentExtractor(CienExtractor): subcategory = "recent" - pattern = rf"{BASE_PATTERN}/mypage/recent(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?" example = "https://ci-en.net/mypage/recent" def items(self): @@ -183,7 +183,7 @@ class CienRecentExtractor(CienExtractor): class CienFollowingExtractor(CienExtractor): subcategory = "following" - pattern = rf"{BASE_PATTERN}/mypage/subscription(/following)?" + pattern = BASE_PATTERN + r"/mypage/subscription(/following)?" example = "https://ci-en.net/mypage/subscription" def items(self): diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 742c561..3498396 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -15,7 +15,7 @@ import itertools import time BASE_PATTERN = r"(?:https?://)?civitai\.com" -USER_PATTERN = rf"{BASE_PATTERN}/user/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/user/([^/?#]+)" class CivitaiExtractor(Extractor): @@ -54,7 +54,7 @@ class CivitaiExtractor(Extractor): elif quality_video is not None and quality: self._video_quality = self._image_quality else: - self._video_quality = "quality=100" + self._video_quality = "original=true,quality=100" self._video_ext = "webm" if metadata := self.config("metadata"): @@ -258,7 +258,7 @@ class CivitaiModelExtractor(CivitaiExtractor): directory_fmt = ("{category}", "{user[username]}", "{model[id]}{model[name]:? //}", "{version[id]}{version[name]:? //}") - pattern = rf"{BASE_PATTERN}/models/(\d+)(?:/?\?modelVersionId=(\d+))?" + pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?" example = "https://civitai.com/models/12345/TITLE" def items(self): @@ -375,7 +375,7 @@ class CivitaiModelExtractor(CivitaiExtractor): class CivitaiImageExtractor(CivitaiExtractor): subcategory = "image" - pattern = rf"{BASE_PATTERN}/images/(\d+)" + pattern = BASE_PATTERN + r"/images/(\d+)" example = "https://civitai.com/images/12345" def images(self): @@ -386,7 +386,7 @@ class CivitaiCollectionExtractor(CivitaiExtractor): subcategory = "collection" directory_fmt = ("{category}", "{user_collection[username]}", "collections", "{collection[id]}{collection[name]:? //}") - pattern = rf"{BASE_PATTERN}/collections/(\d+)" + pattern = BASE_PATTERN + r"/collections/(\d+)" example = "https://civitai.com/collections/12345" def images(self): @@ -408,7 +408,7 @@ class CivitaiPostExtractor(CivitaiExtractor): subcategory = "post" directory_fmt = ("{category}", "{username|user[username]}", "posts", "{post[id]}{post[title]:? //}") - pattern = rf"{BASE_PATTERN}/posts/(\d+)" + pattern = BASE_PATTERN + r"/posts/(\d+)" example = "https://civitai.com/posts/12345" def posts(self): @@ -417,7 +417,7 @@ class CivitaiPostExtractor(CivitaiExtractor): class CivitaiTagExtractor(CivitaiExtractor): subcategory = "tag" - pattern = rf"{BASE_PATTERN}/tag/([^/?&#]+)" + pattern = BASE_PATTERN + r"/tag/([^/?&#]+)" example = "https://civitai.com/tag/TAG" def models(self): @@ -427,7 +427,7 @@ class CivitaiTagExtractor(CivitaiExtractor): class CivitaiSearchModelsExtractor(CivitaiExtractor): subcategory = "search-models" - pattern = rf"{BASE_PATTERN}/search/models\?([^#]+)" + pattern = BASE_PATTERN + r"/search/models\?([^#]+)" example = "https://civitai.com/search/models?query=QUERY" def models(self): @@ -438,7 +438,7 @@ class CivitaiSearchModelsExtractor(CivitaiExtractor): class CivitaiSearchImagesExtractor(CivitaiExtractor): subcategory = "search-images" - pattern = rf"{BASE_PATTERN}/search/images\?([^#]+)" + pattern = BASE_PATTERN + r"/search/images\?([^#]+)" example = "https://civitai.com/search/images?query=QUERY" def images(self): @@ -449,7 +449,7 @@ class CivitaiSearchImagesExtractor(CivitaiExtractor): class CivitaiModelsExtractor(CivitaiExtractor): subcategory = "models" - pattern = rf"{BASE_PATTERN}/models(?:/?\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/models(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/models" def models(self): @@ -459,7 +459,7 @@ class CivitaiModelsExtractor(CivitaiExtractor): class CivitaiImagesExtractor(CivitaiExtractor): subcategory = "images" - pattern = rf"{BASE_PATTERN}/images(?:/?\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/images(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/images" def images(self): @@ -470,7 +470,7 @@ class CivitaiImagesExtractor(CivitaiExtractor): class CivitaiVideosExtractor(CivitaiExtractor): subcategory = "videos" - pattern = rf"{BASE_PATTERN}/videos(?:/?\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/videos(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/videos" def images(self): @@ -481,7 +481,7 @@ class CivitaiVideosExtractor(CivitaiExtractor): class CivitaiPostsExtractor(CivitaiExtractor): subcategory = "posts" - pattern = rf"{BASE_PATTERN}/posts(?:/?\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/posts" def posts(self): @@ -490,7 +490,7 @@ class CivitaiPostsExtractor(CivitaiExtractor): class CivitaiUserExtractor(Dispatch, CivitaiExtractor): - pattern = rf"{USER_PATTERN}/?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:$|\?|#)" example = "https://civitai.com/user/USER" def items(self): @@ -506,7 +506,7 @@ class CivitaiUserExtractor(Dispatch, CivitaiExtractor): class CivitaiUserModelsExtractor(CivitaiExtractor): subcategory = "user-models" - pattern = rf"{USER_PATTERN}/models/?(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/models/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/models" def models(self): @@ -520,7 +520,7 @@ class CivitaiUserPostsExtractor(CivitaiExtractor): subcategory = "user-posts" directory_fmt = ("{category}", "{username|user[username]}", "posts", "{post[id]}{post[title]:? //}") - pattern = rf"{USER_PATTERN}/posts/?(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/posts/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/posts" def posts(self): @@ -532,7 +532,7 @@ class CivitaiUserPostsExtractor(CivitaiExtractor): class CivitaiUserImagesExtractor(CivitaiExtractor): subcategory = "user-images" - pattern = rf"{USER_PATTERN}/images/?(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/images" def __init__(self, match): @@ -553,7 +553,7 @@ class CivitaiUserImagesExtractor(CivitaiExtractor): class CivitaiUserVideosExtractor(CivitaiExtractor): subcategory = "user-videos" directory_fmt = ("{category}", "{username|user[username]}", "videos") - pattern = rf"{USER_PATTERN}/videos/?(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/videos/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/videos" def __init__(self, match): @@ -572,7 +572,7 @@ class CivitaiUserVideosExtractor(CivitaiExtractor): class CivitaiUserCollectionsExtractor(CivitaiExtractor): subcategory = "user-collections" - pattern = rf"{USER_PATTERN}/collections/?(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/collections/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/collections" def items(self): @@ -580,10 +580,10 @@ class CivitaiUserCollectionsExtractor(CivitaiExtractor): params = self._parse_query(query) params["userId"] = self.api.user(text.unquote(user))[0]["id"] - base = f"{self.root}/collections/" + base = self.root + "/collections/" for collection in self.api.collections(params): collection["_extractor"] = CivitaiCollectionExtractor - yield Message.Queue, f"{base}{collection['id']}", collection + yield Message.Queue, base + str(collection["id"]), collection class CivitaiGeneratedExtractor(CivitaiExtractor): @@ -591,7 +591,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor): subcategory = "generated" filename_fmt = "{filename}.{extension}" directory_fmt = ("{category}", "generated") - pattern = rf"{BASE_PATTERN}/generate" + pattern = BASE_PATTERN + "/generate" example = "https://civitai.com/generate" def items(self): @@ -647,12 +647,12 @@ class CivitaiRestAPI(): }) def model(self, model_id): - endpoint = f"/v1/models/{model_id}" + endpoint = "/v1/models/" + str(model_id) return self._call(endpoint) @memcache(keyarg=1) def model_version(self, model_version_id): - endpoint = f"/v1/model-versions/{model_version_id}" + endpoint = "/v1/model-versions/" + str(model_version_id) return self._call(endpoint) def models(self, params): @@ -945,7 +945,7 @@ class CivitaiSearchAPI(): if auth := extractor.config("token"): if " " not in auth: - auth = f"Bearer {auth}" + auth = "Bearer " + auth else: auth = ("Bearer 8c46eb2508e21db1e9828a97968d" "91ab1ca1caa5f70a00e88a2ba1e286603b61") diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py index 9816786..c119b2e 100644 --- a/gallery_dl/extractor/comick.py +++ b/gallery_dl/extractor/comick.py @@ -27,7 +27,7 @@ class ComickCoversExtractor(ComickBase, GalleryExtractor): directory_fmt = ("{category}", "{manga}", "Covers") filename_fmt = "{volume:>02}_{lang}.{extension}" archive_fmt = "c_{id}" - pattern = rf"{BASE_PATTERN}/comic/([\w-]+)/cover" + pattern = BASE_PATTERN + r"/comic/([\w-]+)/cover" example = "https://comick.io/comic/MANGA/cover" def metadata(self, page): @@ -44,7 +44,7 @@ class ComickCoversExtractor(ComickBase, GalleryExtractor): covers.reverse() return [ - (f"https://meo.comick.pictures/{cover['b2key']}", { + ("https://meo.comick.pictures/" + cover["b2key"], { "id" : cover["id"], "width" : cover["w"], "height": cover["h"], @@ -60,7 +60,7 @@ class ComickCoversExtractor(ComickBase, GalleryExtractor): class ComickChapterExtractor(ComickBase, ChapterExtractor): """Extractor for comick.io manga chapters""" archive_fmt = "{chapter_hid}_{page}" - pattern = (rf"{BASE_PATTERN}/comic/([\w-]+)" + pattern = (BASE_PATTERN + r"/comic/([\w-]+)" r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)") example = "https://comick.io/comic/MANGA/ID-chapter-123-en" @@ -128,7 +128,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor): return () return [ - (f"https://meo.comick.pictures/{img['b2key']}", { + ("https://meo.comick.pictures/" + img["b2key"], { "width" : img["w"], "height" : img["h"], "size" : img["s"], @@ -140,7 +140,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor): class ComickMangaExtractor(ComickBase, MangaExtractor): """Extractor for comick.io manga""" - pattern = rf"{BASE_PATTERN}/comic/([\w-]+)/?(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?" example = "https://comick.io/comic/MANGA" def items(self): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 13c7bbe..a6379a3 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -766,7 +766,7 @@ class GalleryExtractor(Extractor): Extractor.__init__(self, match) if url is None and (path := self.groups[0]) and path[0] == "/": - self.page_url = f"{self.root}{path}" + self.page_url = self.root + path else: self.page_url = url @@ -863,7 +863,7 @@ class MangaExtractor(Extractor): Extractor.__init__(self, match) if url is None and (path := self.groups[0]) and path[0] == "/": - self.page_url = f"{self.root}{path}" + self.page_url = self.root + path else: self.page_url = url diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index 93d3953..a2cb0ca 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -18,7 +18,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor): category = "cyberdrop" root = "https://cyberdrop.cr" root_api = "https://api.cyberdrop.cr" - pattern = rf"{BASE_PATTERN}/a/([^/?#]+)" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://cyberdrop.cr/a/ID" def items(self): @@ -76,7 +76,7 @@ class CyberdropMediaExtractor(CyberdropAlbumExtractor): """Extractor for cyberdrop media links""" subcategory = "media" directory_fmt = ("{category}",) - pattern = rf"{BASE_PATTERN}/f/([^/?#]+)" + pattern = BASE_PATTERN + r"/f/([^/?#]+)" example = "https://cyberdrop.cr/f/ID" def fetch_album(self, album_id): diff --git a/gallery_dl/extractor/cyberfile.py b/gallery_dl/extractor/cyberfile.py index e8c0061..49cc6db 100644 --- a/gallery_dl/extractor/cyberfile.py +++ b/gallery_dl/extractor/cyberfile.py @@ -20,7 +20,7 @@ class CyberfileExtractor(Extractor): root = "https://cyberfile.me" def request_api(self, endpoint, data): - url = f"{self.root}{endpoint}" + url = self.root + endpoint headers = { "X-Requested-With": "XMLHttpRequest", "Origin": self.root, @@ -29,7 +29,7 @@ class CyberfileExtractor(Extractor): url, method="POST", headers=headers, data=data) if "albumPasswordModel" in resp.get("javascript", ""): - url_pw = f"{self.root}/ajax/folder_password_process" + url_pw = self.root + "/ajax/folder_password_process" data_pw = { "folderPassword": self._get_auth_info(password=True)[1], "folderId": text.extr( @@ -48,7 +48,7 @@ class CyberfileExtractor(Extractor): class CyberfileFolderExtractor(CyberfileExtractor): subcategory = "folder" - pattern = rf"{BASE_PATTERN}/folder/([0-9a-f]+)" + pattern = BASE_PATTERN + r"/folder/([0-9a-f]+)" example = "https://cyberfile.me/folder/0123456789abcdef/NAME" def items(self): @@ -97,7 +97,7 @@ class CyberfileFolderExtractor(CyberfileExtractor): class CyberfileSharedExtractor(CyberfileExtractor): subcategory = "shared" - pattern = rf"{BASE_PATTERN}/shared/([a-zA-Z0-9]+)" + pattern = BASE_PATTERN + r"/shared/([a-zA-Z0-9]+)" example = "https://cyberfile.me/shared/AbCdEfGhIjK" def items(self): @@ -129,7 +129,7 @@ class CyberfileSharedExtractor(CyberfileExtractor): class CyberfileFileExtractor(CyberfileExtractor): subcategory = "file" directory_fmt = ("{category}", "{uploader}", "{folder}") - pattern = rf"{BASE_PATTERN}/([a-zA-Z0-9]+)" + pattern = BASE_PATTERN + r"/([a-zA-Z0-9]+)" example = "https://cyberfile.me/AbCdE" def items(self): diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 5ea33c4..588d94b 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -155,7 +155,7 @@ class DanbooruExtractor(BaseExtractor): return if prefix: - params["page"] = f"{prefix}{posts[-1]['id']}" + params["page"] = prefix + str(posts[-1]["id"]) elif params["page"]: params["page"] += 1 else: @@ -174,9 +174,8 @@ class DanbooruExtractor(BaseExtractor): else: ext = data["ZIP:ZipFileName"].rpartition(".")[2] - fmt = ("{:>06}." + ext).format delays = data["Ugoira:FrameDelays"] - return [{"file": fmt(index), "delay": delay} + return [{"file": f"{index:>06}.{ext}", "delay": delay} for index, delay in enumerate(delays)] def _collection_posts(self, cid, ctype): @@ -251,7 +250,7 @@ class DanbooruTagExtractor(DanbooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/posts\?(?:[^&#]*&)*tags=([^&#]*)" + pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)" example = "https://danbooru.donmai.us/posts?tags=TAG" def metadata(self): @@ -279,7 +278,7 @@ class DanbooruTagExtractor(DanbooruExtractor): class DanbooruRandomExtractor(DanbooruTagExtractor): """Extractor for a random danbooru post""" subcategory = "random" - pattern = rf"{BASE_PATTERN}/posts/random(?:\?(?:[^&#]*&)*tags=([^&#]*))?" + pattern = BASE_PATTERN + r"/posts/random(?:\?(?:[^&#]*&)*tags=([^&#]*))?" example = "https://danbooru.donmai.us/posts/random?tags=TAG" def metadata(self): @@ -299,7 +298,7 @@ class DanbooruPoolExtractor(DanbooruExtractor): directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}") filename_fmt = "{num:>04}_{id}_{filename}.{extension}" archive_fmt = "p_{pool[id]}_{id}" - pattern = rf"{BASE_PATTERN}/pool(?:s|/show)/(\d+)" + pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" example = "https://danbooru.donmai.us/pools/12345" def metadata(self): @@ -317,7 +316,7 @@ class DanbooruFavgroupExtractor(DanbooruExtractor): "{favgroup[id]} {favgroup[name]}") filename_fmt = "{num:>04}_{id}_{filename}.{extension}" archive_fmt = "fg_{favgroup[id]}_{id}" - pattern = rf"{BASE_PATTERN}/favorite_group(?:s|/show)/(\d+)" + pattern = BASE_PATTERN + r"/favorite_group(?:s|/show)/(\d+)" example = "https://danbooru.donmai.us/favorite_groups/12345" def metadata(self): @@ -332,7 +331,7 @@ class DanbooruPostExtractor(DanbooruExtractor): """Extractor for single danbooru posts""" subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/post(?:s|/show)/(\d+)" + pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" example = "https://danbooru.donmai.us/posts/12345" def posts(self): @@ -349,7 +348,7 @@ class DanbooruMediaassetExtractor(DanbooruExtractor): subcategory = "media-asset" filename_fmt = "{category}_ma{id}_{filename}.{extension}" archive_fmt = "m{id}" - pattern = rf"{BASE_PATTERN}/media_assets/(\d+)" + pattern = BASE_PATTERN + r"/media_assets/(\d+)" example = "https://danbooru.donmai.us/media_assets/12345" def posts(self): @@ -375,7 +374,7 @@ class DanbooruPopularExtractor(DanbooruExtractor): subcategory = "popular" directory_fmt = ("{category}", "popular", "{scale}", "{date}") archive_fmt = "P_{scale[0]}_{date}_{id}" - pattern = rf"{BASE_PATTERN}/(?:explore/posts/)?popular(?:\?([^#]*))?" + pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?" example = "https://danbooru.donmai.us/explore/posts/popular" def metadata(self): @@ -398,7 +397,7 @@ class DanbooruPopularExtractor(DanbooruExtractor): class DanbooruArtistExtractor(DanbooruExtractor): """Extractor for danbooru artists""" subcategory = "artist" - pattern = rf"{BASE_PATTERN}/artists/(\d+)" + pattern = BASE_PATTERN + r"/artists/(\d+)" example = "https://danbooru.donmai.us/artists/12345" items = DanbooruExtractor.items_artists @@ -411,7 +410,7 @@ class DanbooruArtistExtractor(DanbooruExtractor): class DanbooruArtistSearchExtractor(DanbooruExtractor): """Extractor for danbooru artist searches""" subcategory = "artist-search" - pattern = rf"{BASE_PATTERN}/artists/?\?([^#]+)" + pattern = BASE_PATTERN + r"/artists/?\?([^#]+)" example = "https://danbooru.donmai.us/artists?QUERY" items = DanbooruExtractor.items_artists diff --git a/gallery_dl/extractor/dandadan.py b/gallery_dl/extractor/dandadan.py index 48dc0b7..9349d5f 100644 --- a/gallery_dl/extractor/dandadan.py +++ b/gallery_dl/extractor/dandadan.py @@ -22,7 +22,7 @@ class DandadanBase(): class DandadanChapterExtractor(DandadanBase, ChapterExtractor): """Extractor for dandadan manga chapters""" - pattern = rf"{BASE_PATTERN}(/manga/dandadan-chapter-([^/?#]+)/?)" + pattern = BASE_PATTERN + r"(/manga/dandadan-chapter-([^/?#]+)/?)" example = "https://dandadan.net/manga/dandadan-chapter-123/" def metadata(self, page): @@ -31,7 +31,7 @@ class DandadanChapterExtractor(DandadanBase, ChapterExtractor): return { "manga" : "Dandadan", "chapter" : text.parse_int(chapter), - "chapter_minor": f"{sep}{minor}", + "chapter_minor": sep + minor, "lang" : "en", } @@ -54,7 +54,7 @@ class DandadanChapterExtractor(DandadanBase, ChapterExtractor): class DandadanMangaExtractor(DandadanBase, MangaExtractor): """Extractor for dandadan manga""" chapterclass = DandadanChapterExtractor - pattern = rf"{BASE_PATTERN}(/)" + pattern = BASE_PATTERN + r"(/)" example = "https://dandadan.net/" def chapters(self, page): diff --git a/gallery_dl/extractor/dankefuerslesen.py b/gallery_dl/extractor/dankefuerslesen.py index ed7e40b..baebb6f 100644 --- a/gallery_dl/extractor/dankefuerslesen.py +++ b/gallery_dl/extractor/dankefuerslesen.py @@ -28,13 +28,13 @@ class DankefuerslesenBase(): class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor): """Extractor for Danke fürs Lesen manga chapters""" - pattern = rf"{BASE_PATTERN}/read/manga/([\w-]+)/([\w-]+)" + pattern = BASE_PATTERN + r"/read/manga/([\w-]+)/([\w-]+)" example = "https://danke.moe/read/manga/TITLE/123/1/" def _init(self): self.zip = self.config("zip", False) if self.zip: - self.filename_fmt = f"{self.directory_fmt[-1]}.{{extension}}" + self.filename_fmt = self.directory_fmt[-1] + ".{extension}" self.directory_fmt = self.directory_fmt[:-1] def metadata(self, page): @@ -95,7 +95,7 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor): """Extractor for Danke fürs Lesen manga""" chapterclass = DankefuerslesenChapterExtractor reverse = False - pattern = rf"{BASE_PATTERN}/read/manga/([^/?#]+)" + pattern = BASE_PATTERN + r"/read/manga/([^/?#]+)" example = "https://danke.moe/read/manga/TITLE/" def chapters(self, page): diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py index be25053..816816a 100644 --- a/gallery_dl/extractor/desktopography.py +++ b/gallery_dl/extractor/desktopography.py @@ -22,7 +22,7 @@ class DesktopographyExtractor(Extractor): class DesktopographySiteExtractor(DesktopographyExtractor): """Extractor for all desktopography exhibitions """ subcategory = "site" - pattern = rf"{BASE_PATTERN}/$" + pattern = BASE_PATTERN + r"/$" example = "https://desktopography.net/" def items(self): @@ -41,7 +41,7 @@ class DesktopographySiteExtractor(DesktopographyExtractor): class DesktopographyExhibitionExtractor(DesktopographyExtractor): """Extractor for a yearly desktopography exhibition""" subcategory = "exhibition" - pattern = rf"{BASE_PATTERN}/exhibition-([^/?#]+)/" + pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/" example = "https://desktopography.net/exhibition-2020/" def __init__(self, match): @@ -70,7 +70,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor): class DesktopographyEntryExtractor(DesktopographyExtractor): """Extractor for all resolutions of a desktopography wallpaper""" subcategory = "entry" - pattern = rf"{BASE_PATTERN}/portfolios/([\w-]+)" + pattern = BASE_PATTERN + r"/portfolios/([\w-]+)" example = "https://desktopography.net/portfolios/NAME/" def __init__(self, match): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 5bd43d4..0467108 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -66,7 +66,7 @@ class DeviantartExtractor(Extractor): self.quality = "-fullview.png?" self.quality_sub = text.re(r"-fullview\.[a-z0-9]+\?").sub else: - self.quality = f",q_{self.quality}" + self.quality = ",q_" + str(self.quality) self.quality_sub = text.re(r",q_\d+").sub if self.intermediary: @@ -864,7 +864,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ class DeviantartUserExtractor(Dispatch, DeviantartExtractor): """Extractor for an artist's user profile""" - pattern = rf"{BASE_PATTERN}/?$" + pattern = BASE_PATTERN + r"/?$" example = "https://www.deviantart.com/USER" def items(self): @@ -887,7 +887,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" subcategory = "gallery" archive_fmt = "g_{_username}_{index}.{extension}" - pattern = (rf"{BASE_PATTERN}/gallery" + pattern = (BASE_PATTERN + r"/gallery" r"(?:/all|/recommended-for-you)?/?(\?(?!q=).*)?$") example = "https://www.deviantart.com/USER/gallery/" @@ -902,7 +902,7 @@ class DeviantartAvatarExtractor(DeviantartExtractor): """Extractor for an artist's avatar""" subcategory = "avatar" archive_fmt = "a_{_username}_{index}" - pattern = rf"{BASE_PATTERN}/avatar" + pattern = BASE_PATTERN + r"/avatar" example = "https://www.deviantart.com/USER/avatar/" def deviations(self): @@ -956,7 +956,7 @@ class DeviantartBackgroundExtractor(DeviantartExtractor): """Extractor for an artist's banner""" subcategory = "background" archive_fmt = "b_{index}" - pattern = rf"{BASE_PATTERN}/ba(?:nner|ckground)" + pattern = BASE_PATTERN + r"/ba(?:nner|ckground)" example = "https://www.deviantart.com/USER/banner/" def deviations(self): @@ -972,7 +972,7 @@ class DeviantartFolderExtractor(DeviantartExtractor): subcategory = "folder" directory_fmt = ("{category}", "{username}", "{folder[title]}") archive_fmt = "F_{folder[uuid]}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)/([^/?#]+)" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)" example = "https://www.deviantart.com/USER/gallery/12345/TITLE" def __init__(self, match): @@ -1074,6 +1074,8 @@ class DeviantartStashExtractor(DeviantartExtractor): return if stash_data := text.extr(page, ',\\"stash\\":', ',\\"@@'): + if stash_data.endswith(":{}"): + stash_data = stash_data[:stash_data.rfind("}", None, -2)+1] stash_data = util.json_loads(self._unescape_json(stash_data)) for sid in text.extract_iter( @@ -1088,7 +1090,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{username}", "Favourites") archive_fmt = "f_{_username}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/favourites(?:/all|/?\?catpath=)?/?$" + pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$" example = "https://www.deviantart.com/USER/favourites/" def deviations(self): @@ -1105,7 +1107,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor): directory_fmt = ("{category}", "{username}", "Favourites", "{collection[title]}") archive_fmt = "C_{collection[uuid]}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/favourites/([^/?#]+)/([^/?#]+)" + pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)" example = "https://www.deviantart.com/USER/favourites/12345/TITLE" def __init__(self, match): @@ -1136,7 +1138,7 @@ class DeviantartJournalExtractor(DeviantartExtractor): subcategory = "journal" directory_fmt = ("{category}", "{username}", "Journal") archive_fmt = "j_{_username}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$" + pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$" example = "https://www.deviantart.com/USER/posts/journals/" def deviations(self): @@ -1149,7 +1151,7 @@ class DeviantartStatusExtractor(DeviantartExtractor): directory_fmt = ("{category}", "{username}", "Status") filename_fmt = "{category}_{index}_{title}_{date}.{extension}" archive_fmt = "S_{_username}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/posts/statuses" + pattern = BASE_PATTERN + r"/posts/statuses" example = "https://www.deviantart.com/USER/posts/statuses/" def deviations(self): @@ -1253,7 +1255,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): """Extractor for single deviations""" subcategory = "deviation" archive_fmt = "g_{_username}_{index}.{extension}" - pattern = (rf"{BASE_PATTERN}/(art|journal)/(?:[^/?#]+-)?(\d+)" + pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)" r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/" r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)" r"(\d+)" # bare deviation ID without slug @@ -1315,7 +1317,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor): subcategory = "scraps" directory_fmt = ("{category}", "{username}", "Scraps") archive_fmt = "s_{_username}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/gallery/(?:\?catpath=)?scraps\b" + pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b" example = "https://www.deviantart.com/USER/gallery/scraps" def deviations(self): @@ -1382,7 +1384,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor): """Extractor for deviantart gallery searches""" subcategory = "gallery-search" archive_fmt = "g_{_username}_{index}.{extension}" - pattern = rf"{BASE_PATTERN}/gallery/?\?(q=[^#]+)" + pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)" example = "https://www.deviantart.com/USER/gallery?q=QUERY" def __init__(self, match): @@ -1412,7 +1414,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor): class DeviantartFollowingExtractor(DeviantartExtractor): """Extractor for user's watched users""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/(?:about#)?watching" + pattern = BASE_PATTERN + "/(?:about#)?watching" example = "https://www.deviantart.com/USER/about#watching" def items(self): diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py index 0e7f309..576b0cc 100644 --- a/gallery_dl/extractor/discord.py +++ b/gallery_dl/extractor/discord.py @@ -9,7 +9,6 @@ from .common import Extractor, Message from .. import text, exception - BASE_PATTERN = r"(?:https?://)?discord\.com" @@ -203,29 +202,47 @@ class DiscordExtractor(Extractor): def parse_server(self, server): self.server_metadata = { - "server": server["name"], + "server" : server["name"], "server_id": server["id"], - "server_files": [], - "owner_id": server["owner_id"] + "owner_id" : server["owner_id"], + "server_files": self.collect_server_assets(server), } - for icon_type, icon_path in ( - ("icon", "icons"), - ("banner", "banners"), - ("splash", "splashes"), - ("discovery_splash", "discovery-splashes") - ): - if server.get(icon_type): - self.server_metadata["server_files"].append({ - "url": (f"https://cdn.discordapp.com/{icon_path}/" - f"{self.server_metadata['server_id']}/" - f"{server[icon_type]}.png?size=4096"), - "filename": icon_type, - "extension": "png", - }) - return self.server_metadata + def collect_server_assets(self, server, asset_type=None): + if asset_type and asset_type != "general": + return [ + { + **asset, + "url": (f"https://cdn.discordapp.com/{asset_type}/" + f"{asset['id']}.png?size=4096"), + "label" : asset_type, + "filename" : f"{asset['name']} ({asset['id']})", + "extension": "png", + } + for asset in assets + ] if (assets := server.get(asset_type)) else () + else: + return [ + { + "url": (f"https://cdn.discordapp.com/{asset_path}/" + f"{server['id']}/{asset_id}.png?size=4096"), + "id" : f"{server['id']}/{asset_id}", + "label" : "", + "name" : asset_type, + "filename" : asset_type, + "extension": "png", + } + for asset_type, asset_path in ( + ("icon" , "icons"), + ("banner", "banners"), + ("splash", "splashes"), + ("discovery_splash", "discovery-splashes") + ) + if (asset_id := server.get(asset_type)) + ] + def build_server_and_channels(self, server_id): self.parse_server(self.api.get_server(server_id)) @@ -238,7 +255,7 @@ class DiscordExtractor(Extractor): class DiscordChannelExtractor(DiscordExtractor): subcategory = "channel" - pattern = rf"{BASE_PATTERN}/channels/(\d+)/(?:\d+/threads/)?(\d+)/?$" + pattern = BASE_PATTERN + r"/channels/(\d+)/(?:\d+/threads/)?(\d+)/?$" example = "https://discord.com/channels/1234567890/9876543210" def items(self): @@ -251,7 +268,7 @@ class DiscordChannelExtractor(DiscordExtractor): class DiscordMessageExtractor(DiscordExtractor): subcategory = "message" - pattern = rf"{BASE_PATTERN}/channels/(\d+)/(\d+)/(\d+)/?$" + pattern = BASE_PATTERN + r"/channels/(\d+)/(\d+)/(\d+)/?$" example = "https://discord.com/channels/1234567890/9876543210/2468013579" def items(self): @@ -266,9 +283,38 @@ class DiscordMessageExtractor(DiscordExtractor): self.api.get_message(channel_id, message_id)) +class DiscordServerAssetsExtractor(DiscordExtractor): + subcategory = "server-assets" + filename_fmt = "{name} ({id}).{extension}" + directory_fmt = ["{category}", "{server_id}_{server}", "Assets", "{label}"] + archive_fmt = "asset_{server_id}_{id}" + pattern = (BASE_PATTERN + + r"/channels/(\d+)/(?:assets?|files)(?:/([\w-]+))?/?$") + example = "https://discord.com/channels/1234567890/assets" + + def items(self): + server_id, asset_type = self.groups + server = self.api.get_server(server_id) + parsed = self.parse_server(server) + + if asset_type is None: + asset_types = ("", "emojis", "stickers") + else: + asset_types = asset_type.split(",") + + for asset_type in asset_types: + assets = self.collect_server_assets(server, asset_type) + parsed["count"] = len(assets) + parsed["label"] = asset_type + yield Message.Directory, "", parsed + for asset in assets: + asset.update(parsed) + yield Message.Url, asset["url"], asset + + class DiscordServerExtractor(DiscordExtractor): subcategory = "server" - pattern = rf"{BASE_PATTERN}/channels/(\d+)/?$" + pattern = BASE_PATTERN + r"/channels/(\d+)/?$" example = "https://discord.com/channels/1234567890" def items(self): @@ -286,7 +332,7 @@ class DiscordDirectMessagesExtractor(DiscordExtractor): subcategory = "direct-messages" directory_fmt = ("{category}", "Direct Messages", "{channel_id}_{recipients:J,}") - pattern = rf"{BASE_PATTERN}/channels/@me/(\d+)/?$" + pattern = BASE_PATTERN + r"/channels/@me/(\d+)/?$" example = "https://discord.com/channels/@me/1234567890" def items(self): @@ -297,7 +343,7 @@ class DiscordDirectMessageExtractor(DiscordExtractor): subcategory = "direct-message" directory_fmt = ("{category}", "Direct Messages", "{channel_id}_{recipients:J,}") - pattern = rf"{BASE_PATTERN}/channels/@me/(\d+)/(\d+)/?$" + pattern = BASE_PATTERN + r"/channels/@me/(\d+)/(\d+)/?$" example = "https://discord.com/channels/@me/1234567890/9876543210" def items(self): diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index 36423db..e079d85 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -41,7 +41,7 @@ class DynastyscansBase(): class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): """Extractor for manga-chapters from dynasty-scans.com""" - pattern = rf"{BASE_PATTERN}(/chapters/[^/?#]+)" + pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)" example = "https://dynasty-scans.com/chapters/NAME" def metadata(self, page): @@ -81,7 +81,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor): chapterclass = DynastyscansChapterExtractor reverse = False - pattern = rf"{BASE_PATTERN}(/series/[^/?#]+)" + pattern = BASE_PATTERN + r"(/series/[^/?#]+)" example = "https://dynasty-scans.com/series/NAME" def chapters(self, page): @@ -97,7 +97,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor): directory_fmt = ("{category}", "Images") filename_fmt = "{image_id}.{extension}" archive_fmt = "i_{image_id}" - pattern = rf"{BASE_PATTERN}/images/?(?:\?([^#]+))?$" + pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$" example = "https://dynasty-scans.com/images?QUERY" def __init__(self, match): @@ -126,7 +126,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor): class DynastyscansImageExtractor(DynastyscansSearchExtractor): """Extractor for individual images on dynasty-scans.com""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/images/(\d+)" + pattern = BASE_PATTERN + r"/images/(\d+)" example = "https://dynasty-scans.com/images/12345" def images(self): @@ -136,7 +136,7 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor): class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor): """Extractor for dynasty-scans anthologies""" subcategory = "anthology" - pattern = rf"{BASE_PATTERN}/anthologies/([^/?#]+)" + pattern = BASE_PATTERN + r"/anthologies/([^/?#]+)" example = "https://dynasty-scans.com/anthologies/TITLE" def items(self): diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index cc6708d..08cab00 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -94,13 +94,13 @@ BASE_PATTERN = E621Extractor.update({ class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor): """Extractor for e621 posts from tag searches""" - pattern = rf"{BASE_PATTERN}/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)" + pattern = BASE_PATTERN + r"/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)" example = "https://e621.net/posts?tags=TAG" class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor): """Extractor for e621 pools""" - pattern = rf"{BASE_PATTERN}/pool(?:s|/show)/(\d+)" + pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" example = "https://e621.net/pools/12345" def posts(self): @@ -125,7 +125,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor): class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): """Extractor for single e621 posts""" - pattern = rf"{BASE_PATTERN}/post(?:s|/show)/(\d+)" + pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" example = "https://e621.net/posts/12345" def posts(self): @@ -135,7 +135,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): """Extractor for popular images from e621""" - pattern = rf"{BASE_PATTERN}/explore/posts/popular(?:\?([^#]*))?" + pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?" example = "https://e621.net/explore/posts/popular" def posts(self): @@ -145,7 +145,7 @@ class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): class E621ArtistExtractor(E621Extractor, danbooru.DanbooruArtistExtractor): """Extractor for e621 artists""" subcategory = "artist" - pattern = rf"{BASE_PATTERN}/artists/(\d+)" + pattern = BASE_PATTERN + r"/artists/(\d+)" example = "https://e621.net/artists/12345" items = E621Extractor.items_artists @@ -155,7 +155,7 @@ class E621ArtistSearchExtractor(E621Extractor, danbooru.DanbooruArtistSearchExtractor): """Extractor for e621 artist searches""" subcategory = "artist-search" - pattern = rf"{BASE_PATTERN}/artists/?\?([^#]+)" + pattern = BASE_PATTERN + r"/artists/?\?([^#]+)" example = "https://e621.net/artists?QUERY" items = E621Extractor.items_artists @@ -166,7 +166,7 @@ class E621FavoriteExtractor(E621Extractor): subcategory = "favorite" directory_fmt = ("{category}", "Favorites", "{user_id}") archive_fmt = "f_{user_id}_{id}" - pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" example = "https://e621.net/favorites" def metadata(self): diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py index 2c9ab47..d01d00a 100644 --- a/gallery_dl/extractor/erome.py +++ b/gallery_dl/extractor/erome.py @@ -25,10 +25,10 @@ class EromeExtractor(Extractor): _cookies = True def items(self): - base = f"{self.root}/a/" + base = self.root + "/a/" data = {"_extractor": EromeAlbumExtractor} for album_id in self.albums(): - yield Message.Queue, f"{base}{album_id}", data + yield Message.Queue, base + album_id, data def albums(self): return () @@ -64,7 +64,7 @@ class EromeExtractor(Extractor): class EromeAlbumExtractor(EromeExtractor): """Extractor for albums on erome.com""" subcategory = "album" - pattern = rf"{BASE_PATTERN}/a/(\w+)" + pattern = BASE_PATTERN + r"/a/(\w+)" example = "https://www.erome.com/a/ID" def items(self): @@ -121,7 +121,7 @@ class EromeAlbumExtractor(EromeExtractor): class EromeUserExtractor(EromeExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?!a/|search\?)([^/?#]+)(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)(?:/?\?([^#]+))?" example = "https://www.erome.com/USER" def albums(self): @@ -137,11 +137,11 @@ class EromeUserExtractor(EromeExtractor): class EromeSearchExtractor(EromeExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/search/?\?(q=[^#]+)" + pattern = BASE_PATTERN + r"/search/?\?(q=[^#]+)" example = "https://www.erome.com/search?q=QUERY" def albums(self): - url = f"{self.root}/search" + url = self.root + "/search" params = text.parse_query(self.groups[0]) return self._pagination(url, params) diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py index ce29800..8f1bece 100644 --- a/gallery_dl/extractor/everia.py +++ b/gallery_dl/extractor/everia.py @@ -45,7 +45,7 @@ class EveriaPostExtractor(EveriaExtractor): subcategory = "post" directory_fmt = ("{category}", "{title}") archive_fmt = "{post_url}_{num}" - pattern = rf"{BASE_PATTERN}(/\d{{4}}/\d{{2}}/\d{{2}}/[^/?#]+)" + pattern = BASE_PATTERN + r"(/\d{4}/\d{2}/\d{2}/[^/?#]+)" example = "https://everia.club/0000/00/00/TITLE" def items(self): @@ -72,26 +72,26 @@ class EveriaPostExtractor(EveriaExtractor): class EveriaTagExtractor(EveriaExtractor): subcategory = "tag" - pattern = rf"{BASE_PATTERN}(/tag/[^/?#]+)" + pattern = BASE_PATTERN + r"(/tag/[^/?#]+)" example = "https://everia.club/tag/TAG" class EveriaCategoryExtractor(EveriaExtractor): subcategory = "category" - pattern = rf"{BASE_PATTERN}(/category/[^/?#]+)" + pattern = BASE_PATTERN + r"(/category/[^/?#]+)" example = "https://everia.club/category/CATEGORY" class EveriaDateExtractor(EveriaExtractor): subcategory = "date" - pattern = (rf"{BASE_PATTERN}" - rf"(/\d{{4}}(?:/\d{{2}})?(?:/\d{{2}})?)(?:/page/\d+)?/?$") + pattern = (BASE_PATTERN + + r"(/\d{4}(?:/\d{2})?(?:/\d{2})?)(?:/page/\d+)?/?$") example = "https://everia.club/0000/00/00" class EveriaSearchExtractor(EveriaExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/(?:page/\d+/)?\?s=([^&#]+)" + pattern = BASE_PATTERN + r"/(?:page/\d+/)?\?s=([^&#]+)" example = "https://everia.club/?s=SEARCH" def posts(self): diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 9dab923..937e863 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -115,9 +115,9 @@ class ExhentaiExtractor(Extractor): class ExhentaiGalleryExtractor(ExhentaiExtractor): """Extractor for image galleries from exhentai.org""" subcategory = "gallery" - pattern = (rf"{BASE_PATTERN}/(?:" - rf"g/(\d+)/([\da-f]{{10}})|" - rf"s/([\da-f]{{10}})/(\d+)-(\d+))") + pattern = (BASE_PATTERN + + r"(?:/g/(\d+)/([\da-f]{10})" + r"|/s/([\da-f]{10})/(\d+)-(\d+))") example = "https://e-hentai.org/g/12345/67890abcde/" def __init__(self, match): @@ -150,11 +150,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.original = self.config("original", True) def finalize(self): - if self.data: + if self.data and (token := self.data.get("image_token")): self.log.info("Use '%s/s/%s/%s-%s' as input URL " "to continue downloading from the current position", - self.root, self.data["image_token"], - self.gallery_id, self.data["num"]) + self.root, token, self.gallery_id, self.data["num"]) def favorite(self, slot="0"): url = self.root + "/gallerypopups.php" @@ -563,7 +562,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): class ExhentaiSearchExtractor(ExhentaiExtractor): """Extractor for exhentai search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/(?:\?([^#]*)|tag/([^/?#]+))" + pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))" example = "https://e-hentai.org/?f_search=QUERY" def __init__(self, match): @@ -620,7 +619,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): """Extractor for favorited exhentai galleries""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/favorites\.php(?:\?([^#]*)())?" + pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?" example = "https://e-hentai.org/favorites.php" def _init(self): diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 5d56a5f..7f03f84 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -11,9 +11,9 @@ from .. import text, util, exception from ..cache import memcache BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com" -USER_PATTERN = (rf"{BASE_PATTERN}/" - rf"(?!media/|photo/|photo.php|watch/|permalink.php)" - rf"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)") +USER_PATTERN = (BASE_PATTERN + + r"/(?!media/|photo/|photo.php|watch/|permalink.php)" + r"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)") class FacebookExtractor(Extractor): @@ -237,16 +237,14 @@ class FacebookExtractor(Extractor): if res.url.startswith(self.root + "/login"): raise exception.AuthRequired( - message=(f"You must be logged in to continue viewing images." - f"{LEFT_OFF_TXT}") - ) + message=("You must be logged in to continue viewing images." + + LEFT_OFF_TXT)) if b'{"__dr":"CometErrorRoot.react"}' in res.content: raise exception.AbortExtraction( - f"You've been temporarily blocked from viewing images.\n" - f"Please try using a different account, " - f"using a VPN or waiting before you retry.{LEFT_OFF_TXT}" - ) + "You've been temporarily blocked from viewing images.\n" + "Please try using a different account, " + "using a VPN or waiting before you retry." + LEFT_OFF_TXT) return res @@ -306,6 +304,12 @@ class FacebookExtractor(Extractor): "Detected a loop in the set, it's likely finished. " "Extraction is over." ) + elif int(photo["next_photo_id"]) > int(photo["id"]) + i*120: + self.log.info( + "Detected jump to the beginning of the set. (%s -> %s)", + photo["id"], photo["next_photo_id"]) + if self.config("loop", False): + all_photo_ids.append(photo["next_photo_id"]) else: all_photo_ids.append(photo["next_photo_id"]) @@ -389,9 +393,9 @@ class FacebookExtractor(Extractor): class FacebookPhotoExtractor(FacebookExtractor): """Base class for Facebook Photo extractors""" subcategory = "photo" - pattern = (rf"{BASE_PATTERN}/" - rf"(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?" - rf"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$") + pattern = (BASE_PATTERN + + r"/(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?" + r"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$") example = "https://www.facebook.com/photo/?fbid=PHOTO_ID" def items(self): @@ -427,11 +431,12 @@ class FacebookSetExtractor(FacebookExtractor): """Base class for Facebook Set extractors""" subcategory = "set" pattern = ( - rf"{BASE_PATTERN}/" - rf"(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)" - rf"[^/?#]*(?<!&setextract)$" - rf"|([^/?#]+/posts/[^/?#]+)" - rf"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)") + BASE_PATTERN + + r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)" + r"[^/?#]*(?<!&setextract)$" + r"|([^/?#]+/posts/[^/?#]+)" + r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)" + ) example = "https://www.facebook.com/media/set/?set=SET_ID" def items(self): @@ -454,7 +459,7 @@ class FacebookVideoExtractor(FacebookExtractor): """Base class for Facebook Video extractors""" subcategory = "video" directory_fmt = ("{category}", "{username}", "{subcategory}") - pattern = rf"{BASE_PATTERN}/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)" + pattern = BASE_PATTERN + r"/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)" example = "https://www.facebook.com/watch/?v=VIDEO_ID" def items(self): @@ -481,7 +486,7 @@ class FacebookInfoExtractor(FacebookExtractor): """Extractor for Facebook Profile data""" subcategory = "info" directory_fmt = ("{category}", "{username}") - pattern = rf"{USER_PATTERN}/info" + pattern = USER_PATTERN + r"/info" example = "https://www.facebook.com/USERNAME/info" def items(self): @@ -492,7 +497,7 @@ class FacebookInfoExtractor(FacebookExtractor): class FacebookAlbumsExtractor(FacebookExtractor): """Extractor for Facebook Profile albums""" subcategory = "albums" - pattern = rf"{USER_PATTERN}/photos_albums(?:/([^/?#]+))?" + pattern = USER_PATTERN + r"/photos_albums(?:/([^/?#]+))?" example = "https://www.facebook.com/USERNAME/photos_albums" def items(self): @@ -525,7 +530,7 @@ class FacebookAlbumsExtractor(FacebookExtractor): class FacebookPhotosExtractor(FacebookExtractor): """Extractor for Facebook Profile Photos""" subcategory = "photos" - pattern = rf"{USER_PATTERN}/photos(?:_by)?" + pattern = USER_PATTERN + r"/photos(?:_by)?" example = "https://www.facebook.com/USERNAME/photos" def items(self): @@ -542,7 +547,7 @@ class FacebookPhotosExtractor(FacebookExtractor): class FacebookAvatarExtractor(FacebookExtractor): """Extractor for Facebook Profile Avatars""" subcategory = "avatar" - pattern = rf"{USER_PATTERN}/avatar" + pattern = USER_PATTERN + r"/avatar" example = "https://www.facebook.com/USERNAME/avatar" def items(self): @@ -564,7 +569,7 @@ class FacebookAvatarExtractor(FacebookExtractor): class FacebookUserExtractor(Dispatch, FacebookExtractor): """Extractor for Facebook Profiles""" - pattern = rf"{USER_PATTERN}/?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:$|\?|#)" example = "https://www.facebook.com/USERNAME" def items(self): diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 036b388..5af6044 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -340,7 +340,7 @@ class FanboxExtractor(Extractor): url = (f"https://docs.google.com/forms/d/e/" f"{content_id}/viewform?usp=sf_link") else: - self.log.warning(f"service not recognized: {provider}") + self.log.warning("service not recognized: %s", provider) if url: final_post["embed"] = embed @@ -355,7 +355,7 @@ class FanboxExtractor(Extractor): class FanboxCreatorExtractor(FanboxExtractor): """Extractor for a Fanbox creator's works""" subcategory = "creator" - pattern = rf"{USER_PATTERN}(?:/posts)?/?$" + pattern = USER_PATTERN + r"(?:/posts)?/?$" example = "https://USER.fanbox.cc/" def posts(self): @@ -384,7 +384,7 @@ class FanboxCreatorExtractor(FanboxExtractor): class FanboxPostExtractor(FanboxExtractor): """Extractor for media from a single Fanbox post""" subcategory = "post" - pattern = rf"{USER_PATTERN}/posts/(\d+)" + pattern = USER_PATTERN + r"/posts/(\d+)" example = "https://USER.fanbox.cc/posts/12345" def posts(self): @@ -394,7 +394,7 @@ class FanboxPostExtractor(FanboxExtractor): class FanboxHomeExtractor(FanboxExtractor): """Extractor for your Fanbox home feed""" subcategory = "home" - pattern = rf"{BASE_PATTERN}/?$" + pattern = BASE_PATTERN + r"/?$" example = "https://fanbox.cc/" def posts(self): @@ -405,7 +405,7 @@ class FanboxHomeExtractor(FanboxExtractor): class FanboxSupportingExtractor(FanboxExtractor): """Extractor for your supported Fanbox users feed""" subcategory = "supporting" - pattern = rf"{BASE_PATTERN}/home/supporting" + pattern = BASE_PATTERN + r"/home/supporting" example = "https://fanbox.cc/home/supporting" def posts(self): diff --git a/gallery_dl/extractor/fansly.py b/gallery_dl/extractor/fansly.py index ba60b15..8848501 100644 --- a/gallery_dl/extractor/fansly.py +++ b/gallery_dl/extractor/fansly.py @@ -25,11 +25,12 @@ class FanslyExtractor(Extractor): def _init(self): self.api = FanslyAPI(self) + self.previews = self.config("previews", True) if fmts := self.config("formats"): self.formats = set(fmts) else: - self.formats = {1, 2, 3, 4, 302, 303} + self.formats = None def items(self): for post in self.posts(): @@ -88,23 +89,29 @@ class FanslyExtractor(Extractor): exc.__class__.__name__, exc) return files - def _extract_attachment(self, files, post, attachment): - media = attachment["media"] + def _extract_attachment(self, files, post, attachment, preview=False): + media = attachment["preview" if preview else "media"] variants = media.pop("variants") or [] if media.get("locations"): variants.append(media) + fmts = self.formats formats = [ (variant["width"], (type-500 if type > 256 else type), variant) for variant in variants if variant.get("locations") and - (type := variant["type"]) in self.formats + (type := variant["type"]) and + (fmts is None or type in fmts) ] try: variant = max(formats)[-1] except Exception: + if self.previews and "preview" in attachment and not preview: + self.log.info("%s/%s: Downloading Preview", + post["id"], attachment["id"]) + return self._extract_attachment(files, post, attachment, True) return self.log.warning("%s/%s: No format available", post["id"], attachment["id"]) @@ -118,6 +125,7 @@ class FanslyExtractor(Extractor): file = { **variant, + "preview": preview, "format": variant["type"], "date": self.parse_timestamp(media["createdAt"]), "date_updated": self.parse_timestamp(media["updatedAt"]), @@ -135,7 +143,7 @@ class FanslyExtractor(Extractor): files.append({ "file": file, - "url": f"ytdl:{location['location']}", + "url": "ytdl:" + location["location"], "_fallback": fallback, "_ytdl_manifest": "dash" if mime == "application/dash+xml" else "hls", @@ -155,7 +163,7 @@ class FanslyExtractor(Extractor): class FanslyPostExtractor(FanslyExtractor): subcategory = "post" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://fansly.com/post/1234567890" def posts(self): @@ -164,7 +172,7 @@ class FanslyPostExtractor(FanslyExtractor): class FanslyHomeExtractor(FanslyExtractor): subcategory = "home" - pattern = rf"{BASE_PATTERN}/home(?:/(?:subscribed()|list/(\d+)))?" + pattern = BASE_PATTERN + r"/home(?:/(?:subscribed()|list/(\d+)))?" example = "https://fansly.com/home" def posts(self): @@ -180,11 +188,11 @@ class FanslyHomeExtractor(FanslyExtractor): class FanslyListExtractor(FanslyExtractor): subcategory = "list" - pattern = rf"{BASE_PATTERN}/lists/(\d+)" + pattern = BASE_PATTERN + r"/lists/(\d+)" example = "https://fansly.com/lists/1234567890" def items(self): - base = f"{self.root}/" + base = self.root + "/" for account in self.api.lists_itemsnew(self.groups[0]): account["_extractor"] = FanslyCreatorPostsExtractor url = f"{base}{account['username']}/posts" @@ -193,11 +201,11 @@ class FanslyListExtractor(FanslyExtractor): class FanslyListsExtractor(FanslyExtractor): subcategory = "lists" - pattern = rf"{BASE_PATTERN}/lists" + pattern = BASE_PATTERN + r"/lists" example = "https://fansly.com/lists" def items(self): - base = f"{self.root}/lists/" + base = self.root + "/lists/" for list in self.api.lists_account(): list["_extractor"] = FanslyListExtractor url = f"{base}{list['id']}#{list['label']}" @@ -206,7 +214,7 @@ class FanslyListsExtractor(FanslyExtractor): class FanslyCreatorPostsExtractor(FanslyExtractor): subcategory = "creator-posts" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/posts(?:/wall/(\d+))?" + pattern = BASE_PATTERN + r"/([^/?#]+)/posts(?:/wall/(\d+))?" example = "https://fansly.com/CREATOR/posts" def posts_wall(self, account, wall): @@ -215,7 +223,7 @@ class FanslyCreatorPostsExtractor(FanslyExtractor): class FanslyCreatorMediaExtractor(FanslyExtractor): subcategory = "creator-media" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/media(?:/wall/(\d+))?" + pattern = BASE_PATTERN + r"/([^/?#]+)/media(?:/wall/(\d+))?" example = "https://fansly.com/CREATOR/media" def posts_wall(self, account, wall): @@ -308,7 +316,7 @@ class FanslyAPI(): return self._pagination(endpoint, params) def timeline_new(self, account_id, wall_id): - endpoint = f"/v1/timelinenew/{account_id}" + endpoint = "/v1/timelinenew/" + str(account_id) params = { "before" : "0", "after" : "0", diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py index afef942..2bd7a1f 100644 --- a/gallery_dl/extractor/fapello.py +++ b/gallery_dl/extractor/fapello.py @@ -20,7 +20,7 @@ class FapelloPostExtractor(Extractor): directory_fmt = ("{category}", "{model}") filename_fmt = "{model}_{id}.{extension}" archive_fmt = "{type}_{model}_{id}" - pattern = rf"{BASE_PATTERN}/(?!search/|popular_videos/)([^/?#]+)/(\d+)" + pattern = BASE_PATTERN + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)" example = "https://fapello.com/MODEL/12345/" def __init__(self, match): @@ -52,9 +52,9 @@ class FapelloModelExtractor(Extractor): """Extractor for all posts from a fapello model""" category = "fapello" subcategory = "model" - pattern = (rf"{BASE_PATTERN}/(?!top-(?:likes|followers)|popular_videos" - rf"|videos|trending|search/?$)" - rf"([^/?#]+)/?$") + pattern = (BASE_PATTERN + r"/(?!top-(?:likes|followers)|popular_videos" + r"|videos|trending|search/?$)" + r"([^/?#]+)/?$") example = "https://fapello.com/model/" def __init__(self, match): @@ -85,9 +85,9 @@ class FapelloPathExtractor(Extractor): """Extractor for models and posts from fapello.com paths""" category = "fapello" subcategory = "path" - pattern = (rf"{BASE_PATTERN}/(?!search/?$)" - rf"(top-(?:likes|followers)|videos|trending" - rf"|popular_videos/[^/?#]+)/?$") + pattern = (BASE_PATTERN + + r"/(?!search/?$)(top-(?:likes|followers)|videos|trending" + r"|popular_videos/[^/?#]+)/?$") example = "https://fapello.com/trending/" def __init__(self, match): diff --git a/gallery_dl/extractor/fikfap.py b/gallery_dl/extractor/fikfap.py index 75071c5..5a0f434 100644 --- a/gallery_dl/extractor/fikfap.py +++ b/gallery_dl/extractor/fikfap.py @@ -67,7 +67,7 @@ class FikfapExtractor(Extractor): class FikfapPostExtractor(FikfapExtractor): subcategory = "post" - pattern = rf"{BASE_PATTERN}/user/(\w+)/post/(\d+)" + pattern = BASE_PATTERN + r"/user/(\w+)/post/(\d+)" example = "https://fikfap.com/user/USER/post/12345" def posts(self): @@ -86,7 +86,7 @@ class FikfapPostExtractor(FikfapExtractor): class FikfapUserExtractor(FikfapExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/user/(\w+)" + pattern = BASE_PATTERN + r"/user/(\w+)" example = "https://fikfap.com/user/USER" def posts(self): diff --git a/gallery_dl/extractor/fitnakedgirls.py b/gallery_dl/extractor/fitnakedgirls.py index d252ec4..a1ebf48 100644 --- a/gallery_dl/extractor/fitnakedgirls.py +++ b/gallery_dl/extractor/fitnakedgirls.py @@ -50,7 +50,7 @@ class FitnakedgirlsGalleryExtractor(GalleryExtractor, FitnakedgirlsExtractor): directory_fmt = ("{category}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{gallery_id}_{filename}" - pattern = rf"{BASE_PATTERN}/photos/gallery/([\w-]+)/?$" + pattern = BASE_PATTERN + r"/photos/gallery/([\w-]+)/?$" example = "https://fitnakedgirls.com/photos/gallery/MODEL-nude/" def __init__(self, match): @@ -110,7 +110,7 @@ class FitnakedgirlsGalleryExtractor(GalleryExtractor, FitnakedgirlsExtractor): class FitnakedgirlsCategoryExtractor(FitnakedgirlsExtractor): """Extractor for fitnakedgirls category pages""" subcategory = "category" - pattern = rf"{BASE_PATTERN}/photos/gallery/category/([\w-]+)" + pattern = BASE_PATTERN + r"/photos/gallery/category/([\w-]+)" example = "https://fitnakedgirls.com/photos/gallery/category/CATEGORY/" def galleries(self): @@ -121,7 +121,7 @@ class FitnakedgirlsCategoryExtractor(FitnakedgirlsExtractor): class FitnakedgirlsTagExtractor(FitnakedgirlsExtractor): """Extractor for fitnakedgirls tag pages""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}/photos/gallery/tag/([\w-]+)" + pattern = BASE_PATTERN + r"/photos/gallery/tag/([\w-]+)" example = "https://fitnakedgirls.com/photos/gallery/tag/TAG/" def galleries(self): @@ -135,7 +135,7 @@ class FitnakedgirlsVideoExtractor(FitnakedgirlsExtractor): directory_fmt = ("{category}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{video_id}_{filename}" - pattern = rf"{BASE_PATTERN}/videos/(\d+)/(\d+)/([\w-]+)" + pattern = BASE_PATTERN + r"/videos/(\d+)/(\d+)/([\w-]+)" example = "https://fitnakedgirls.com/videos/2025/08/VIDEO-TITLE/" def items(self): @@ -168,7 +168,7 @@ class FitnakedgirlsBlogExtractor(FitnakedgirlsExtractor): directory_fmt = ("{category}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{post_id}_{filename}" - pattern = rf"{BASE_PATTERN}/fitblog/([\w-]+)" + pattern = BASE_PATTERN + r"/fitblog/([\w-]+)" example = "https://fitnakedgirls.com/fitblog/MODEL-NAME/" def items(self): diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index 1446eb8..5071de7 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -124,7 +124,7 @@ class FlickrAlbumExtractor(FlickrExtractor): directory_fmt = ("{category}", "{user[username]}", "Albums", "{album[id]} {album[title]}") archive_fmt = "a_{album[id]}_{id}" - pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?" + pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?" example = "https://www.flickr.com/photos/USER/albums/12345" def items(self): @@ -166,7 +166,7 @@ class FlickrGalleryExtractor(FlickrExtractor): directory_fmt = ("{category}", "{user[username]}", "Galleries", "{gallery[gallery_id]} {gallery[title]}") archive_fmt = "g_{gallery[id]}_{id}" - pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/galleries/(\d+)" + pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)" example = "https://www.flickr.com/photos/USER/galleries/12345/" def metadata(self): @@ -184,7 +184,7 @@ class FlickrGroupExtractor(FlickrExtractor): subcategory = "group" directory_fmt = ("{category}", "Groups", "{group[groupname]}") archive_fmt = "G_{group[nsid]}_{id}" - pattern = rf"{BASE_PATTERN}/groups/([^/?#]+)" + pattern = BASE_PATTERN + r"/groups/([^/?#]+)" example = "https://www.flickr.com/groups/NAME/" def metadata(self): @@ -199,7 +199,7 @@ class FlickrUserExtractor(FlickrExtractor): """Extractor for the photostream of a flickr user""" subcategory = "user" archive_fmt = "u_{user[nsid]}_{id}" - pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/photos/([^/?#]+)/?$" example = "https://www.flickr.com/photos/USER/" def photos(self): @@ -211,7 +211,7 @@ class FlickrFavoriteExtractor(FlickrExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user[username]}", "Favorites") archive_fmt = "f_{user[nsid]}_{id}" - pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/favorites" + pattern = BASE_PATTERN + r"/photos/([^/?#]+)/favorites" example = "https://www.flickr.com/photos/USER/favorites" def photos(self): @@ -223,7 +223,7 @@ class FlickrSearchExtractor(FlickrExtractor): subcategory = "search" directory_fmt = ("{category}", "Search", "{search[text]}") archive_fmt = "s_{search}_{id}" - pattern = rf"{BASE_PATTERN}/search/?\?([^#]+)" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" example = "https://flickr.com/search/?text=QUERY" def metadata(self): @@ -456,7 +456,7 @@ class FlickrAPI(oauth.OAuth1API): except ValueError: data = {"code": -1, "message": response.content} if "code" in data: - msg = data.get("message") + msg = data.get("message", "") self.log.debug("Server response: %s", data) if data["code"] == 1: raise exception.NotFoundError(self.extractor.subcategory) @@ -466,7 +466,7 @@ class FlickrAPI(oauth.OAuth1API): raise exception.AuthenticationError(msg) elif data["code"] == 99: raise exception.AuthorizationError(msg) - raise exception.AbortExtraction(f"API request failed: {msg}") + raise exception.AbortExtraction("API request failed: " + msg) return data def _pagination(self, method, params, key="photos"): diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 3c69489..88053b4 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -147,7 +147,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): subcategory = "thread" directory_fmt = ("{category}", "{board[shortname]}", "{thread_num} {title|comment[:50]}") - pattern = rf"{BASE_PATTERN}/([^/?#]+)/thread/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)" example = "https://archived.moe/a/thread/12345/" def __init__(self, match): @@ -174,7 +174,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): class FoolfuukaBoardExtractor(FoolfuukaExtractor): """Base extractor for FoolFuuka based boards/archives""" subcategory = "board" - pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/(?:page/)?(\d*))?$" + pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$" example = "https://archived.moe/a/" def __init__(self, match): @@ -210,7 +210,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): """Base extractor for search results on FoolFuuka based boards/archives""" subcategory = "search" directory_fmt = ("{category}", "search", "{search}") - pattern = rf"{BASE_PATTERN}/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)" example = "https://archived.moe/_/search/text/QUERY/" request_interval = (0.5, 1.5) @@ -265,7 +265,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor): """Base extractor for FoolFuuka galleries""" subcategory = "gallery" directory_fmt = ("{category}", "{board}", "gallery") - pattern = rf"{BASE_PATTERN}/([^/?#]+)/gallery(?:/(\d+))?" + pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?" example = "https://archived.moe/a/gallery" def metadata(self): @@ -278,7 +278,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor): base = f"{self.root}/_/api/chan/gallery/?board={self.board}&page=" for pnum in pages: - posts = self.request_json(f"{base}{pnum}") + posts = self.request_json(base + str(pnum)) if not posts: return yield from posts diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index d932174..1f1f839 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -47,7 +47,7 @@ class FoolslideChapterExtractor(FoolslideExtractor): filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}") archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" + pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" example = "https://read.powermanga.org/read/MANGA/en/0/123/" def items(self): @@ -91,7 +91,7 @@ class FoolslideMangaExtractor(FoolslideExtractor): """Base class for manga extractors for FoOlSlide based sites""" subcategory = "manga" categorytransfer = True - pattern = rf"{BASE_PATTERN}(/series/[^/?#]+)" + pattern = BASE_PATTERN + r"(/series/[^/?#]+)" example = "https://read.powermanga.org/series/MANGA/" def items(self): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index ad57a6b..0a046c9 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -231,7 +231,7 @@ class FuraffinityExtractor(Extractor): class FuraffinityGalleryExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's gallery""" subcategory = "gallery" - pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)(?:$|/(?!folder/))" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)(?:$|/(?!folder/))" example = "https://www.furaffinity.net/gallery/USER/" def posts(self): @@ -243,7 +243,7 @@ class FuraffinityFolderExtractor(FuraffinityExtractor): subcategory = "folder" directory_fmt = ("{category}", "{user!l}", "Folders", "{folder_id}{folder_name:? //}") - pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?" example = "https://www.furaffinity.net/gallery/USER/folder/12345/FOLDER" def metadata(self): @@ -260,7 +260,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's scraps""" subcategory = "scraps" directory_fmt = ("{category}", "{user!l}", "Scraps") - pattern = rf"{BASE_PATTERN}/scraps/([^/?#]+)" + pattern = BASE_PATTERN + r"/scraps/([^/?#]+)" example = "https://www.furaffinity.net/scraps/USER/" def posts(self): @@ -271,7 +271,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's favorites""" subcategory = "favorite" directory_fmt = ("{category}", "{user!l}", "Favorites") - pattern = rf"{BASE_PATTERN}/favorites/([^/?#]+)" + pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" example = "https://www.furaffinity.net/favorites/USER/" def posts(self): @@ -287,7 +287,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): """Extractor for furaffinity search results""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = rf"{BASE_PATTERN}/search(?:/([^/?#]+))?/?[?&]([^#]+)" + pattern = BASE_PATTERN + r"/search(?:/([^/?#]+))?/?[?&]([^#]+)" example = "https://www.furaffinity.net/search/?q=QUERY" def __init__(self, match): @@ -306,7 +306,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): class FuraffinityPostExtractor(FuraffinityExtractor): """Extractor for individual posts on furaffinity""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(?:view|full)/(\d+)" + pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" example = "https://www.furaffinity.net/view/12345/" def posts(self): @@ -317,12 +317,12 @@ class FuraffinityPostExtractor(FuraffinityExtractor): class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor): """Extractor for furaffinity user profiles""" - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)" + pattern = BASE_PATTERN + r"/user/([^/?#]+)" example = "https://www.furaffinity.net/user/USER/" def items(self): base = self.root - user = f"{self.user}/" + user = self.user + "/" return self._dispatch_extractors(( (FuraffinityGalleryExtractor , f"{base}/gallery/{user}"), (FuraffinityScrapsExtractor , f"{base}/scraps/{user}"), @@ -333,7 +333,7 @@ class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor): class FuraffinityFollowingExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's watched users""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/watchlist/by/([^/?#]+)" + pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)" example = "https://www.furaffinity.net/watchlist/by/USER/" def items(self): @@ -355,7 +355,7 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor): class FuraffinitySubmissionsExtractor(FuraffinityExtractor): """Extractor for new furaffinity submissions""" subcategory = "submissions" - pattern = rf"{BASE_PATTERN}(/msg/submissions(?:/[^/?#]+)?)" + pattern = BASE_PATTERN + r"(/msg/submissions(?:/[^/?#]+)?)" example = "https://www.furaffinity.net/msg/submissions" def posts(self): diff --git a/gallery_dl/extractor/furry34.py b/gallery_dl/extractor/furry34.py index 95b98db..3673edb 100644 --- a/gallery_dl/extractor/furry34.py +++ b/gallery_dl/extractor/furry34.py @@ -97,7 +97,7 @@ class Furry34Extractor(BooruExtractor): class Furry34PostExtractor(Furry34Extractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://furry34.com/post/12345" def posts(self): @@ -108,7 +108,7 @@ class Furry34PlaylistExtractor(Furry34Extractor): subcategory = "playlist" directory_fmt = ("{category}", "{playlist_id}") archive_fmt = "p_{playlist_id}_{id}" - pattern = rf"{BASE_PATTERN}/playlists/view/(\d+)" + pattern = BASE_PATTERN + r"/playlists/view/(\d+)" example = "https://furry34.com/playlists/view/12345" def metadata(self): @@ -123,7 +123,7 @@ class Furry34TagExtractor(Furry34Extractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)" example = "https://furry34.com/TAG" def _init(self): diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 0571fcd..ffba38a 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -148,7 +148,7 @@ class GelbooruBase(): class GelbooruTagExtractor(GelbooruBase, gelbooru_v02.GelbooruV02TagExtractor): """Extractor for images from gelbooru.com based on search-tags""" - pattern = rf"{BASE_PATTERN}page=post&s=list&tags=([^&#]*)" + pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]*)" example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG" @@ -156,7 +156,7 @@ class GelbooruPoolExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PoolExtractor): """Extractor for gelbooru pools""" per_page = 45 - pattern = rf"{BASE_PATTERN}page=pool&s=show&id=(\d+)" + pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)" example = "https://gelbooru.com/index.php?page=pool&s=show&id=12345" skip = GelbooruBase._skip_offset @@ -187,7 +187,7 @@ class GelbooruFavoriteExtractor(GelbooruBase, gelbooru_v02.GelbooruV02FavoriteExtractor): """Extractor for gelbooru favorites""" per_page = 100 - pattern = rf"{BASE_PATTERN}page=favorites&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)" example = "https://gelbooru.com/index.php?page=favorites&s=view&id=12345" skip = GelbooruBase._skip_offset @@ -284,10 +284,10 @@ class GelbooruFavoriteExtractor(GelbooruBase, class GelbooruPostExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PostExtractor): """Extractor for single images from gelbooru.com""" - pattern = (rf"{BASE_PATTERN}" - rf"(?=(?:[^#]+&)?page=post(?:&|#|$))" - rf"(?=(?:[^#]+&)?s=view(?:&|#|$))" - rf"(?:[^#]+&)?id=(\d+)") + pattern = (BASE_PATTERN + + r"(?=(?:[^#]+&)?page=post(?:&|#|$))" + r"(?=(?:[^#]+&)?s=view(?:&|#|$))" + r"(?:[^#]+&)?id=(\d+)") example = "https://gelbooru.com/index.php?page=post&s=view&id=12345" diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py index 7b9c732..1348272 100644 --- a/gallery_dl/extractor/gelbooru_v01.py +++ b/gallery_dl/extractor/gelbooru_v01.py @@ -87,7 +87,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)" example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG" def metadata(self): @@ -104,7 +104,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor): directory_fmt = ("{category}", "favorites", "{favorite_id}") archive_fmt = "f_{favorite_id}_{id}" per_page = 50 - pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1" def metadata(self): @@ -120,7 +120,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor): class GelbooruV01PostExtractor(GelbooruV01Extractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345" def posts(self): diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 122f5a9..3a4b920 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -190,7 +190,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]*)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)" example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG" def posts(self): @@ -206,7 +206,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool}") archive_fmt = "p_{pool}_{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=pool&s=show&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)" example = "https://safebooru.org/index.php?page=pool&s=show&id=12345" def __init__(self, match): @@ -257,7 +257,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor): directory_fmt = ("{category}", "favorites", "{favorite_id}") archive_fmt = "f_{favorite_id}_{id}" per_page = 50 - pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345" def metadata(self): @@ -275,7 +275,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor): class GelbooruV02PostExtractor(GelbooruV02Extractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" example = "https://safebooru.org/index.php?page=post&s=view&id=12345" def posts(self): diff --git a/gallery_dl/extractor/girlsreleased.py b/gallery_dl/extractor/girlsreleased.py index 0fbdeff..da1add9 100644 --- a/gallery_dl/extractor/girlsreleased.py +++ b/gallery_dl/extractor/girlsreleased.py @@ -22,14 +22,14 @@ class GirlsreleasedExtractor(Extractor): def items(self): data = {"_extractor": GirlsreleasedSetExtractor} - base = f"{self.root}/set/" + base = self.root + "/set/" for set in self._pagination(): - yield Message.Queue, f"{base}{set[0]}", data + yield Message.Queue, base + set[0], data def _pagination(self): base = f"{self.root}/api/0.2/sets/{self._path}/{self.groups[0]}/page/" for pnum in itertools.count(): - sets = self.request_json(f"{base}{pnum}")["sets"] + sets = self.request_json(base + str(pnum))["sets"] if not sets: return @@ -41,7 +41,7 @@ class GirlsreleasedExtractor(Extractor): class GirlsreleasedSetExtractor(GirlsreleasedExtractor): """Extractor for girlsreleased galleries""" subcategory = "set" - pattern = rf"{BASE_PATTERN}/set/(\d+)" + pattern = BASE_PATTERN + r"/set/(\d+)" example = "https://girlsreleased.com/set/12345" def items(self): @@ -65,12 +65,12 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor): class GirlsreleasedModelExtractor(GirlsreleasedExtractor): """Extractor for girlsreleased models""" subcategory = _path = "model" - pattern = rf"{BASE_PATTERN}/model/(\d+(?:/.+)?)" + pattern = BASE_PATTERN + r"/model/(\d+(?:/.+)?)" example = "https://girlsreleased.com/model/12345/MODEL" class GirlsreleasedSiteExtractor(GirlsreleasedExtractor): """Extractor for girlsreleased sites""" subcategory = _path = "site" - pattern = rf"{BASE_PATTERN}/site/([^/?#]+(?:/model/\d+/?.*)?)" + pattern = BASE_PATTERN + r"/site/([^/?#]+(?:/model/\d+/?.*)?)" example = "https://girlsreleased.com/site/SITE" diff --git a/gallery_dl/extractor/girlswithmuscle.py b/gallery_dl/extractor/girlswithmuscle.py index e61e472..420b2ac 100644 --- a/gallery_dl/extractor/girlswithmuscle.py +++ b/gallery_dl/extractor/girlswithmuscle.py @@ -60,7 +60,7 @@ class GirlswithmuscleExtractor(Extractor): class GirlswithmusclePostExtractor(GirlswithmuscleExtractor): """Extractor for individual posts on girlswithmuscle.com""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(\d+)" + pattern = BASE_PATTERN + r"/(\d+)" example = "https://www.girlswithmuscle.com/12345/" def items(self): @@ -143,7 +143,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor): class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor): """Extractor for search results on girlswithmuscle.com""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/images/(.*)" + pattern = BASE_PATTERN + r"/images/(.*)" example = "https://www.girlswithmuscle.com/images/?name=MODEL" def pages(self): diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py index 7065d7b..7740eda 100644 --- a/gallery_dl/extractor/hatenablog.py +++ b/gallery_dl/extractor/hatenablog.py @@ -123,7 +123,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor): class HatenablogEntryExtractor(HatenablogExtractor): """Extractor for a single entry URL""" subcategory = "entry" - pattern = rf"{BASE_PATTERN}/entry/([^?#]+){QUERY_RE}" + pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE example = "https://BLOG.hatenablog.com/entry/PATH" def __init__(self, match): @@ -146,21 +146,21 @@ class HatenablogEntryExtractor(HatenablogExtractor): class HatenablogHomeExtractor(HatenablogEntriesExtractor): """Extractor for a blog's home page""" subcategory = "home" - pattern = rf"{BASE_PATTERN}(/?){QUERY_RE}" + pattern = BASE_PATTERN + r"(/?)" + QUERY_RE example = "https://BLOG.hatenablog.com" class HatenablogArchiveExtractor(HatenablogEntriesExtractor): """Extractor for a blog's archive page""" subcategory = "archive" - pattern = (rf"{BASE_PATTERN}(/archive(?:/\d+(?:/\d+(?:/\d+)?)?" - rf"|/category/[^?#]+)?){QUERY_RE}") + pattern = (BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?" + r"|/category/[^?#]+)?)" + QUERY_RE) example = "https://BLOG.hatenablog.com/archive/2024" class HatenablogSearchExtractor(HatenablogEntriesExtractor): """Extractor for a blog's search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}(/search){QUERY_RE}" + pattern = BASE_PATTERN + r"(/search)" + QUERY_RE example = "https://BLOG.hatenablog.com/search?q=QUERY" allowed_parameters = ("q",) diff --git a/gallery_dl/extractor/hdoujin.py b/gallery_dl/extractor/hdoujin.py index 080b899..927aea2 100644 --- a/gallery_dl/extractor/hdoujin.py +++ b/gallery_dl/extractor/hdoujin.py @@ -23,19 +23,19 @@ class HdoujinBase(): class HdoujinGalleryExtractor( HdoujinBase, schalenetwork.SchalenetworkGalleryExtractor): - pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)" + pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)" example = "https://hdoujin.org/g/12345/67890abcdef/" class HdoujinSearchExtractor( HdoujinBase, schalenetwork.SchalenetworkSearchExtractor): - pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$" + pattern = BASE_PATTERN + r"/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$" example = "https://hdoujin.org/browse?s=QUERY" class HdoujinFavoriteExtractor( HdoujinBase, schalenetwork.SchalenetworkFavoriteExtractor): - pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" example = "https://hdoujin.org/favorites" diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py index b5f3d0e..5c2628f 100644 --- a/gallery_dl/extractor/hentaicosplays.py +++ b/gallery_dl/extractor/hentaicosplays.py @@ -38,7 +38,7 @@ class HentaicosplaysGalleryExtractor( directory_fmt = ("{site}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{title}_{filename}" - pattern = rf"{BASE_PATTERN}/(?:image|story)/([\w-]+)" + pattern = BASE_PATTERN + r"/(?:image|story)/([\w-]+)" example = "https://hentai-cosplay-xxx.com/image/TITLE/" def __init__(self, match): diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 882183b..f276ce4 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -214,7 +214,7 @@ class HentaifoundryExtractor(Extractor): class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor): """Extractor for a hentaifoundry user profile""" - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/profile" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile" example = "https://www.hentai-foundry.com/user/USER/profile" def items(self): @@ -235,7 +235,7 @@ class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor): class HentaifoundryPicturesExtractor(HentaifoundryExtractor): """Extractor for all pictures of a hentaifoundry user""" subcategory = "pictures" - pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$" + pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$" example = "https://www.hentai-foundry.com/pictures/user/USER" def __init__(self, match): @@ -247,7 +247,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor): """Extractor for scraps of a hentaifoundry user""" subcategory = "scraps" directory_fmt = ("{category}", "{user}", "Scraps") - pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)/scraps" + pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps" example = "https://www.hentai-foundry.com/pictures/user/USER/scraps" def __init__(self, match): @@ -260,7 +260,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") archive_fmt = "f_{user}_{index}" - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/faves/pictures" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures" example = "https://www.hentai-foundry.com/user/USER/faves/pictures" def __init__(self, match): @@ -273,7 +273,7 @@ class HentaifoundryTagExtractor(HentaifoundryExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{index}" - pattern = rf"{BASE_PATTERN}/pictures/tagged/([^/?#]+)" + pattern = BASE_PATTERN + r"/pictures/tagged/([^/?#]+)" example = "https://www.hentai-foundry.com/pictures/tagged/TAG" def __init__(self, match): @@ -289,7 +289,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor): subcategory = "recent" directory_fmt = ("{category}", "Recent Pictures", "{date}") archive_fmt = "r_{index}" - pattern = rf"{BASE_PATTERN}/pictures/recent/(\d\d\d\d-\d\d-\d\d)" + pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)" example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01" def __init__(self, match): @@ -305,7 +305,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor): subcategory = "popular" directory_fmt = ("{category}", "Popular Pictures") archive_fmt = "p_{index}" - pattern = rf"{BASE_PATTERN}/pictures/popular()" + pattern = BASE_PATTERN + r"/pictures/popular()" example = "https://www.hentai-foundry.com/pictures/popular" def __init__(self, match): @@ -339,7 +339,7 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor): """Extractor for stories of a hentaifoundry user""" subcategory = "stories" archive_fmt = "s_{index}" - pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)(?:/page/(\d+))?/?$" + pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$" example = "https://www.hentai-foundry.com/stories/user/USER" def items(self): @@ -358,7 +358,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor): """Extractor for a hentaifoundry story""" subcategory = "story" archive_fmt = "s_{index}" - pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)/(\d+)" + pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)" example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE" skip = Extractor.skip diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 0eaf798..a07a6b5 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -67,7 +67,7 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for hiperdex manga chapters""" - pattern = rf"{BASE_PATTERN}(/mangas?/([^/?#]+)/([^/?#]+))" + pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))" example = "https://hiperdex.com/manga/MANGA/CHAPTER/" def __init__(self, match): @@ -89,7 +89,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for hiperdex manga""" chapterclass = HiperdexChapterExtractor - pattern = rf"{BASE_PATTERN}(/mangas?/([^/?#]+))/?$" + pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$" example = "https://hiperdex.com/manga/MANGA/" def __init__(self, match): @@ -125,7 +125,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): categorytransfer = False chapterclass = HiperdexMangaExtractor reverse = False - pattern = rf"{BASE_PATTERN}(/manga-a(?:rtist|uthor)/(?:[^/?#]+))" + pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))" example = "https://hiperdex.com/manga-artist/NAME/" def __init__(self, match): diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index b05a9a7..fd3d00e 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -32,14 +32,14 @@ class HitomiExtractor(Extractor): language = tag tag = "index" else: - ns = f"{ns}/" + ns += "/" url = (f"https://ltn.{self.domain}/n/{ns}" f"/{tag.replace('_', ' ')}-{language}.nozomi") if headers is None: headers = {} headers["Origin"] = self.root - headers["Referer"] = f"{self.root}/" + headers["Referer"] = self.root + "/" return decode_nozomi(self.request(url, headers=headers).content) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index 953cf4e..600984e 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -61,7 +61,7 @@ def decode_video_url(url): class HotleakPostExtractor(HotleakExtractor): """Extractor for individual posts on hotleak""" subcategory = "post" - pattern = (rf"{BASE_PATTERN}/(?!(?:hot|creators|videos|photos)(?:$|/))" + pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))" r"([^/]+)/(photo|video)/(\d+)") example = "https://hotleak.vip/MODEL/photo/12345" @@ -96,7 +96,7 @@ class HotleakPostExtractor(HotleakExtractor): class HotleakCreatorExtractor(HotleakExtractor): """Extractor for all posts from a hotleak creator""" subcategory = "creator" - pattern = (rf"{BASE_PATTERN}/(?!(?:hot|creators|videos|photos)(?:$|/))" + pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))" r"([^/?#]+)/?$") example = "https://hotleak.vip/MODEL" @@ -150,7 +150,7 @@ class HotleakCreatorExtractor(HotleakExtractor): class HotleakCategoryExtractor(HotleakExtractor): """Extractor for hotleak categories""" subcategory = "category" - pattern = rf"{BASE_PATTERN}/(hot|creators|videos|photos)(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?" example = "https://hotleak.vip/photos" def __init__(self, match): @@ -172,7 +172,7 @@ class HotleakCategoryExtractor(HotleakExtractor): class HotleakSearchExtractor(HotleakExtractor): """Extractor for hotleak search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/search(?:/?\?([^#]+))" + pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))" example = "https://hotleak.vip/search?search=QUERY" def __init__(self, match): diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index a8f1298..2f38802 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -29,17 +29,17 @@ class IdolcomplexBase(): class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor): """Extractor for idolcomplex tag searches""" - pattern = rf"{BASE_PATTERN}(?:/posts)?/?\?([^#]*)" + pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)" example = "https://www.idolcomplex.com/en/posts?tags=TAGS" class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor): """Extractor for idolcomplex pools""" - pattern = rf"{BASE_PATTERN}/pools?/(?:show/)?(\w+)" + pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)" example = "https://www.idolcomplex.com/en/pools/0123456789abcdef" class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor): """Extractor for individual idolcomplex posts""" - pattern = rf"{BASE_PATTERN}/posts?(?:/show)?/(\w+)" + pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)" example = "https://www.idolcomplex.com/en/posts/0123456789abcdef" diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index 074b4ae..2191958 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -19,7 +19,7 @@ class ImagechestGalleryExtractor(GalleryExtractor): """Extractor for image galleries from imgchest.com""" category = "imagechest" root = "https://imgchest.com" - pattern = rf"{BASE_PATTERN}/p/([A-Za-z0-9]{{11}})" + pattern = BASE_PATTERN + r"/p/([A-Za-z0-9]{11})" example = "https://imgchest.com/p/abcdefghijk" def __init__(self, match): @@ -78,7 +78,7 @@ class ImagechestUserExtractor(Extractor): category = "imagechest" subcategory = "user" root = "https://imgchest.com" - pattern = rf"{BASE_PATTERN}/u/([^/?#]+)" + pattern = BASE_PATTERN + r"/u/([^/?#]+)" example = "https://imgchest.com/u/USER" def items(self): diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index f727969..06848e5 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -39,7 +39,7 @@ class ImagefapExtractor(Extractor): class ImagefapGalleryExtractor(ImagefapExtractor): """Extractor for image galleries from imagefap.com""" subcategory = "gallery" - pattern = rf"{BASE_PATTERN}/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)" + pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)" example = "https://www.imagefap.com/gallery/12345" def __init__(self, match): @@ -110,7 +110,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): class ImagefapImageExtractor(ImagefapExtractor): """Extractor for single images from imagefap.com""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/photo/(\d+)" + pattern = BASE_PATTERN + r"/photo/(\d+)" example = "https://www.imagefap.com/photo/12345" def __init__(self, match): @@ -148,9 +148,9 @@ class ImagefapImageExtractor(ImagefapExtractor): class ImagefapFolderExtractor(ImagefapExtractor): """Extractor for imagefap user folders""" subcategory = "folder" - pattern = (rf"{BASE_PATTERN}/(?:organizer/|" - rf"(?:usergallery\.php\?user(id)?=([^&#]+)&" - rf"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)") + pattern = (BASE_PATTERN + r"/(?:organizer/|" + r"(?:usergallery\.php\?user(id)?=([^&#]+)&" + r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)") example = "https://www.imagefap.com/organizer/12345" def __init__(self, match): @@ -206,9 +206,9 @@ class ImagefapFolderExtractor(ImagefapExtractor): class ImagefapUserExtractor(ImagefapExtractor): """Extractor for an imagefap user profile""" subcategory = "user" - pattern = (rf"{BASE_PATTERN}/(?:" - rf"profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?|" - rf"usergallery\.php\?userid=(\d+))(?:$|#)") + pattern = (BASE_PATTERN + + r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?" + r"|usergallery\.php\?userid=(\d+))(?:$|#)") example = "https://www.imagefap.com/profile/USER" def __init__(self, match): diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 21e6cf8..d1abe01 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, exception from ..cache import memcache -from os.path import splitext class ImagehostImageExtractor(Extractor): @@ -20,7 +19,6 @@ class ImagehostImageExtractor(Extractor): subcategory = "image" archive_fmt = "{token}" parent = True - _https = True _params = None _cookies = None _encoding = None @@ -28,10 +26,7 @@ class ImagehostImageExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - if self.root: - self.page_url = f"{self.root}{match[1]}" - else: - self.page_url = f"http{'s' if self._https else ''}://{match[1]}" + self.page_url = (self.root or "https://") + match[1] self.token = match[2] if self._params == "simple": @@ -70,7 +65,7 @@ class ImagehostImageExtractor(Extractor): data["post_url"] = self.page_url data.update(self.metadata(page)) - if self._https and url.startswith("http:"): + if url.startswith("http:"): url = "https:" + url[5:] if self._validate is not None: data["_http_validate"] = self._validate @@ -102,9 +97,6 @@ class ImxtoImageExtractor(ImagehostImageExtractor): ImagehostImageExtractor.__init__(self, match) if "/img-" in self.page_url: self.page_url = self.page_url.replace("img.yt", "imx.to") - self.url_ext = True - else: - self.url_ext = False def get_info(self, page): url, pos = text.extract( @@ -112,9 +104,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor): if not url: self.not_found() filename, pos = text.extract(page, ' title="', '"', pos) - if self.url_ext and filename: - filename += splitext(url)[1] - return url, filename or url + return url, filename or None def metadata(self, page): extr = text.extract_from(page, page.index("[ FILESIZE <")) @@ -176,7 +166,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor): if not filename: filename, pos = text.extract(page, 'alt="', '"', pos) - return url, (filename + splitext(url)[1]) if filename else url + return url, filename or None class ImagevenueImageExtractor(ImagehostImageExtractor): @@ -407,7 +397,6 @@ class ImgclickImageExtractor(ImagehostImageExtractor): category = "imgclick" pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))" example = "http://imgclick.net/abc123/NAME.EXT.html" - _https = False _params = "complex" def get_info(self, page): @@ -461,8 +450,8 @@ class ImgdriveImageExtractor(ImagehostImageExtractor): def __init__(self, match): path, category, self.token = match.groups() - self.page_url = f"https://{path}" - self.category = f"img{category}" + self.page_url = "https://" + path + self.category = "img" + category Extractor.__init__(self, match) def get_info(self, page): @@ -496,3 +485,28 @@ class SilverpicImageExtractor(ImagehostImageExtractor): "width" : text.parse_int(width), "height": text.parse_int(height), } + + +class ImgpvImageExtractor(ImagehostImageExtractor): + """Extractor for imgpv.com images""" + category = "imgpv" + root = "https://imgpv.com" + pattern = (r"(?:https?://)?(?:www\.)?imgpv\.com" + r"(/([a-z0-9]{10,})/[\S]+\.html)") + example = "https://www.imgpv.com/a1b2c3d4f5g6/NAME.EXT.html" + + def get_info(self, page): + url, pos = text.extract(page, 'id="img-preview" src="', '"') + alt, pos = text.extract(page, 'alt="', '"', pos) + return url, text.unescape(alt) + + def metadata(self, page): + pos = page.find('class="upinfo">') + date, pos = text.extract(page, '<b>', 'by', pos) + user, pos = text.extract(page, '>', '<', pos) + + date = date.split() + return { + "date": self.parse_datetime_iso(f"{date[0][:10]} {date[1]}"), + "user": text.unescape(user), + } diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index d957328..ed06c1c 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -136,8 +136,8 @@ class ImgbbAlbumExtractor(ImgbbExtractor): 'data-text="image-count">', "<")), } - url = f"{self.root}/json" - params["pathname"] = f"/album/{album['id']}" + url = self.root + "/json" + params["pathname"] = "/album/" + album["id"] return self._pagination(page, url, params) @@ -190,11 +190,11 @@ class ImgbbUserExtractor(ImgbbExtractor): if response.status_code < 300: params["pathname"] = "/" - return self._pagination(response.text, f"{url}json", params) + return self._pagination(response.text, url + "json", params) if response.status_code == 301: raise exception.NotFoundError("user") - redirect = f"HTTP redirect to {response.headers.get('Location')}" + redirect = "HTTP redirect to " + response.headers.get("Location", "") if response.status_code == 302: raise exception.AuthRequired( ("username & password", "authenticated cookies"), diff --git a/gallery_dl/extractor/imgpile.py b/gallery_dl/extractor/imgpile.py index f634203..c73926a 100644 --- a/gallery_dl/extractor/imgpile.py +++ b/gallery_dl/extractor/imgpile.py @@ -22,13 +22,10 @@ class ImgpileExtractor(Extractor): "{post[title]} ({post[id_slug]})") archive_fmt = "{post[id_slug]}_{id}" - def items(self): - pass - class ImgpilePostExtractor(ImgpileExtractor): subcategory = "post" - pattern = rf"{BASE_PATTERN}/p/(\w+)" + pattern = BASE_PATTERN + r"/p/(\w+)" example = "https://imgpile.com/p/AbCdEfG" def items(self): @@ -71,24 +68,23 @@ class ImgpilePostExtractor(ImgpileExtractor): "id_slug": text.extr(media, 'data-id="', '"'), "id" : text.parse_int(text.extr( media, 'data-media-id="', '"')), - "url": f"""http{text.extr(media, '<a href="http', '"')}""", + "url": "http" + text.extr(media, '<a href="http', '"'), }) return files class ImgpileUserExtractor(ImgpileExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/u/([^/?#]+)" + pattern = BASE_PATTERN + r"/u/([^/?#]+)" example = "https://imgpile.com/u/USER" def items(self): - url = f"{self.root}/api/v1/posts" + url = self.root + "/api/v1/posts" params = { "limit" : "100", "sort" : "latest", "period" : "all", "visibility": "public", - # "moderation_status": "approved", "username" : self.groups[0], } headers = { @@ -101,7 +97,7 @@ class ImgpileUserExtractor(ImgpileExtractor): "Sec-Fetch-Site": "same-origin", } - base = f"{self.root}/p/" + base = self.root + "/p/" while True: data = self.request_json(url, params=params, headers=headers) @@ -111,7 +107,7 @@ class ImgpileUserExtractor(ImgpileExtractor): for item in data["data"]: item["_extractor"] = ImgpilePostExtractor - url = f"{base}{item['slug']}" + url = base + item["slug"] yield Message.Queue, url, item url = data["links"].get("next") diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 4755388..d80caf6 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -67,7 +67,7 @@ class ImgurImageExtractor(ImgurExtractor): subcategory = "image" filename_fmt = "{category}_{id}{title:?_//}.{extension}" archive_fmt = "{id}" - pattern = (rf"{BASE_PATTERN}/(?!gallery|search)" + pattern = (BASE_PATTERN + r"/(?!gallery|search)" r"(?:r/\w+/)?(?:[^/?#]+-)?(\w{7}|\w{5})[sbtmlh]?") example = "https://imgur.com/abcdefg" @@ -93,7 +93,7 @@ class ImgurAlbumExtractor(ImgurExtractor): directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}") filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}" archive_fmt = "{album[id]}_{id}" - pattern = rf"{BASE_PATTERN}/a/(?:[^/?#]+-)?(\w{{7}}|\w{{5}})" + pattern = BASE_PATTERN + r"/a/(?:[^/?#]+-)?(\w{7}|\w{5})" example = "https://imgur.com/a/abcde" def items(self): @@ -126,8 +126,7 @@ class ImgurAlbumExtractor(ImgurExtractor): class ImgurGalleryExtractor(ImgurExtractor): """Extractor for imgur galleries""" subcategory = "gallery" - pattern = (rf"{BASE_PATTERN}/" - rf"(?:gallery|t/\w+)/(?:[^/?#]+-)?(\w{{7}}|\w{{5}})") + pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(?:[^/?#]+-)?(\w{7}|\w{5})" example = "https://imgur.com/gallery/abcde" def items(self): @@ -143,7 +142,7 @@ class ImgurGalleryExtractor(ImgurExtractor): class ImgurUserExtractor(ImgurExtractor): """Extractor for all images posted by a user""" subcategory = "user" - pattern = (rf"{BASE_PATTERN}/user/(?!me(?:/|$|\?|#))" + pattern = (BASE_PATTERN + r"/user/(?!me(?:/|$|\?|#))" r"([^/?#]+)(?:/posts|/submitted)?/?$") example = "https://imgur.com/user/USER" @@ -154,7 +153,7 @@ class ImgurUserExtractor(ImgurExtractor): class ImgurFavoriteExtractor(ImgurExtractor): """Extractor for a user's favorites""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/favorites/?$" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$" example = "https://imgur.com/user/USER/favorites" def items(self): @@ -164,7 +163,7 @@ class ImgurFavoriteExtractor(ImgurExtractor): class ImgurFavoriteFolderExtractor(ImgurExtractor): """Extractor for a user's favorites folder""" subcategory = "favorite-folder" - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/favorites/folder/(\d+)" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)" example = "https://imgur.com/user/USER/favorites/folder/12345/TITLE" def __init__(self, match): @@ -179,7 +178,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor): class ImgurMeExtractor(ImgurExtractor): """Extractor for your personal uploads""" subcategory = "me" - pattern = rf"{BASE_PATTERN}/user/me(?:/posts)?(/hidden)?" + pattern = BASE_PATTERN + r"/user/me(?:/posts)?(/hidden)?" example = "https://imgur.com/user/me" def items(self): @@ -196,7 +195,7 @@ class ImgurMeExtractor(ImgurExtractor): class ImgurSubredditExtractor(ImgurExtractor): """Extractor for a subreddits's imgur links""" subcategory = "subreddit" - pattern = rf"{BASE_PATTERN}/r/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$" example = "https://imgur.com/r/SUBREDDIT" def items(self): @@ -206,7 +205,7 @@ class ImgurSubredditExtractor(ImgurExtractor): class ImgurTagExtractor(ImgurExtractor): """Extractor for imgur tag searches""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}/t/([^/?#]+)$" + pattern = BASE_PATTERN + r"/t/([^/?#]+)$" example = "https://imgur.com/t/TAG" def items(self): @@ -216,7 +215,7 @@ class ImgurTagExtractor(ImgurExtractor): class ImgurSearchExtractor(ImgurExtractor): """Extractor for imgur search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/search(?:/[^?#]+)?/?\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)" example = "https://imgur.com/search?q=UERY" def items(self): @@ -270,11 +269,11 @@ class ImgurAPI(): return self._pagination(endpoint, params) def gallery_subreddit(self, subreddit): - endpoint = f"/3/gallery/r/{subreddit}" + endpoint = "/3/gallery/r/" + subreddit return self._pagination(endpoint) def gallery_tag(self, tag): - endpoint = f"/3/gallery/t/{tag}" + endpoint = "/3/gallery/t/" + tag return self._pagination(endpoint, key="items") def image(self, image_hash): diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py index d83dcc8..c996fb8 100644 --- a/gallery_dl/extractor/imhentai.py +++ b/gallery_dl/extractor/imhentai.py @@ -79,7 +79,7 @@ BASE_PATTERN = ImhentaiExtractor.update({ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): """Extractor for imhentai galleries""" - pattern = rf"{BASE_PATTERN}/(?:gallery|view)/(\d+)" + pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" example = "https://imhentai.xxx/gallery/12345/" def __init__(self, match): @@ -141,7 +141,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): class ImhentaiTagExtractor(ImhentaiExtractor): """Extractor for imhentai tag searches""" subcategory = "tag" - pattern = (rf"{BASE_PATTERN}(/(?:" + pattern = (BASE_PATTERN + r"(/(?:" r"artist|category|character|group|language|parody|tag" r")/([^/?#]+))") example = "https://imhentai.xxx/tag/TAG/" @@ -154,7 +154,7 @@ class ImhentaiTagExtractor(ImhentaiExtractor): class ImhentaiSearchExtractor(ImhentaiExtractor): """Extractor for imhentai search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}(/(?:advanced-)?search/?\?[^#]+|/[^/?#]+/?)" + pattern = BASE_PATTERN + r"(/(?:advanced-)?search/?\?[^#]+|/[^/?#]+/?)" example = "https://imhentai.xxx/search/?key=QUERY" def items(self): diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index 547d4ee..4158891 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -71,7 +71,7 @@ class InkbunnyExtractor(Extractor): class InkbunnyUserExtractor(InkbunnyExtractor): """Extractor for inkbunny user profiles""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])" + pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])" example = "https://inkbunny.net/USER" def __init__(self, match): @@ -101,7 +101,7 @@ class InkbunnyUserExtractor(InkbunnyExtractor): class InkbunnyPoolExtractor(InkbunnyExtractor): """Extractor for inkbunny pools""" subcategory = "pool" - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"poolview_process\.php\?pool_id=(\d+)|" r"submissionsviewall\.php" r"\?((?:[^#]+&)?mode=pool(?:&[^#]+)?))") @@ -132,7 +132,7 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor): """Extractor for inkbunny user favorites""" subcategory = "favorite" directory_fmt = ("{category}", "{favs_username!l}", "Favorites") - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"userfavorites_process\.php\?favs_user_id=(\d+)|" r"submissionsviewall\.php" r"\?((?:[^#]+&)?mode=userfavs(?:&[^#]+)?))") @@ -175,7 +175,7 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor): class InkbunnyUnreadExtractor(InkbunnyExtractor): """Extractor for unread inkbunny submissions""" subcategory = "unread" - pattern = (rf"{BASE_PATTERN}/submissionsviewall\.php" + pattern = (BASE_PATTERN + r"/submissionsviewall\.php" r"\?((?:[^#]+&)?mode=unreadsubs(?:&[^#]+)?)") example = ("https://inkbunny.net/submissionsviewall.php" "?text=&mode=unreadsubs&type=") @@ -195,7 +195,7 @@ class InkbunnyUnreadExtractor(InkbunnyExtractor): class InkbunnySearchExtractor(InkbunnyExtractor): """Extractor for inkbunny search results""" subcategory = "search" - pattern = (rf"{BASE_PATTERN}/submissionsviewall\.php" + pattern = (BASE_PATTERN + r"/submissionsviewall\.php" r"\?((?:[^#]+&)?mode=search(?:&[^#]+)?)") example = ("https://inkbunny.net/submissionsviewall.php" "?text=TAG&mode=search&type=") @@ -229,7 +229,7 @@ class InkbunnySearchExtractor(InkbunnyExtractor): class InkbunnyFollowingExtractor(InkbunnyExtractor): """Extractor for inkbunny user watches""" subcategory = "following" - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"watchlist_process\.php\?mode=watching&user_id=(\d+)|" r"usersviewall\.php" r"\?((?:[^#]+&)?mode=watching(?:&[^#]+)?))") @@ -268,7 +268,7 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor): class InkbunnyPostExtractor(InkbunnyExtractor): """Extractor for individual Inkbunny posts""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/s/(\d+)" + pattern = BASE_PATTERN + r"/s/(\d+)" example = "https://inkbunny.net/s/12345" def __init__(self, match): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index b89369f..27fa777 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -16,7 +16,7 @@ import itertools import binascii BASE_PATTERN = r"(?:https?://)?(?:www\.)?instagram\.com" -USER_PATTERN = rf"{BASE_PATTERN}/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)" class InstagramExtractor(Extractor): @@ -90,13 +90,11 @@ class InstagramExtractor(Extractor): post["count"] = len(files) yield Message.Directory, "", post - if "date" in post: - del post["date"] if reverse: files.reverse() for file in files: - file.update(post) + file = {**post, **file} if url := file.get("video_url"): if videos: @@ -139,6 +137,8 @@ class InstagramExtractor(Extractor): page = "login" elif "/challenge/" in url: page = "challenge" + elif 24 < len(url) < 28 and url[-1] == "/": + page = "home" else: page = None @@ -269,8 +269,8 @@ class InstagramExtractor(Extractor): width = image["width"] height = image["height"] - if self._warn_image < ((width < width_orig) + - (height < height_orig)): + if self._warn_image < ((width * 1.1 < width_orig) + + (height * 1.1 < height_orig)): self.log.warning( "%s: Available image resolutions lower than the " "original (%sx%s < %sx%s). " @@ -301,7 +301,7 @@ class InstagramExtractor(Extractor): if "reshared_story_media_author" in item: media["author"] = item["reshared_story_media_author"] if "expiring_at" in item: - media["expires"] = self.parse_timestamp(post["expiring_at"]) + media["expires"] = self.parse_timestamp(item["expiring_at"]) if "subscription_media_visibility" in item: media["subscription"] = item["subscription_media_visibility"] @@ -505,7 +505,7 @@ class InstagramPostExtractor(InstagramExtractor): class InstagramUserExtractor(Dispatch, InstagramExtractor): """Extractor for an Instagram user profile""" - pattern = rf"{USER_PATTERN}/?(?:$|[?#])" + pattern = USER_PATTERN + r"/?(?:$|[?#])" example = "https://www.instagram.com/USER/" def items(self): @@ -525,7 +525,7 @@ class InstagramUserExtractor(Dispatch, InstagramExtractor): class InstagramPostsExtractor(InstagramExtractor): """Extractor for an Instagram user's posts""" subcategory = "posts" - pattern = rf"{USER_PATTERN}/posts" + pattern = USER_PATTERN + r"/posts" example = "https://www.instagram.com/USER/posts/" def posts(self): @@ -542,7 +542,7 @@ class InstagramPostsExtractor(InstagramExtractor): class InstagramReelsExtractor(InstagramExtractor): """Extractor for an Instagram user's reels""" subcategory = "reels" - pattern = rf"{USER_PATTERN}/reels" + pattern = USER_PATTERN + r"/reels" example = "https://www.instagram.com/USER/reels/" def posts(self): @@ -559,7 +559,7 @@ class InstagramReelsExtractor(InstagramExtractor): class InstagramTaggedExtractor(InstagramExtractor): """Extractor for an Instagram user's tagged posts""" subcategory = "tagged" - pattern = rf"{USER_PATTERN}/tagged" + pattern = USER_PATTERN + r"/tagged" example = "https://www.instagram.com/USER/tagged/" def metadata(self): @@ -585,7 +585,7 @@ class InstagramTaggedExtractor(InstagramExtractor): class InstagramGuideExtractor(InstagramExtractor): """Extractor for an Instagram guide""" subcategory = "guide" - pattern = rf"{USER_PATTERN}/guide/[^/?#]+/(\d+)" + pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)" example = "https://www.instagram.com/USER/guide/NAME/12345" def __init__(self, match): @@ -602,7 +602,7 @@ class InstagramGuideExtractor(InstagramExtractor): class InstagramSavedExtractor(InstagramExtractor): """Extractor for an Instagram user's saved media""" subcategory = "saved" - pattern = rf"{USER_PATTERN}/saved(?:/all-posts)?/?$" + pattern = USER_PATTERN + r"/saved(?:/all-posts)?/?$" example = "https://www.instagram.com/USER/saved/" def posts(self): @@ -612,7 +612,7 @@ class InstagramSavedExtractor(InstagramExtractor): class InstagramCollectionExtractor(InstagramExtractor): """Extractor for Instagram collection""" subcategory = "collection" - pattern = rf"{USER_PATTERN}/saved/([^/?#]+)/([^/?#]+)" + pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)" example = "https://www.instagram.com/USER/saved/COLLECTION/12345" def __init__(self, match): @@ -632,11 +632,11 @@ class InstagramCollectionExtractor(InstagramExtractor): class InstagramStoriesTrayExtractor(InstagramExtractor): """Extractor for your Instagram account's stories tray""" subcategory = "stories-tray" - pattern = rf"{BASE_PATTERN}/stories/me/?$()" + pattern = BASE_PATTERN + r"/stories/me/?$()" example = "https://www.instagram.com/stories/me/" def items(self): - base = f"{self.root}/stories/id:" + base = self.root + "/stories/id:" for story in self.api.reels_tray(): story["date"] = self.parse_timestamp(story["latest_reel_media"]) story["_extractor"] = InstagramStoriesExtractor @@ -696,7 +696,7 @@ class InstagramStoriesExtractor(InstagramExtractor): class InstagramHighlightsExtractor(InstagramExtractor): """Extractor for an Instagram user's story highlights""" subcategory = "highlights" - pattern = rf"{USER_PATTERN}/highlights" + pattern = USER_PATTERN + r"/highlights" example = "https://www.instagram.com/USER/highlights/" def posts(self): @@ -707,7 +707,7 @@ class InstagramHighlightsExtractor(InstagramExtractor): class InstagramFollowersExtractor(InstagramExtractor): """Extractor for an Instagram user's followers""" subcategory = "followers" - pattern = rf"{USER_PATTERN}/followers" + pattern = USER_PATTERN + r"/followers" example = "https://www.instagram.com/USER/followers/" def items(self): @@ -721,7 +721,7 @@ class InstagramFollowersExtractor(InstagramExtractor): class InstagramFollowingExtractor(InstagramExtractor): """Extractor for an Instagram user's followed users""" subcategory = "following" - pattern = rf"{USER_PATTERN}/following" + pattern = USER_PATTERN + r"/following" example = "https://www.instagram.com/USER/following/" def items(self): @@ -736,7 +736,7 @@ class InstagramTagExtractor(InstagramExtractor): """Extractor for Instagram tags""" subcategory = "tag" directory_fmt = ("{category}", "{subcategory}", "{tag}") - pattern = rf"{BASE_PATTERN}/explore/tags/([^/?#]+)" + pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)" example = "https://www.instagram.com/explore/tags/TAG/" def metadata(self): @@ -749,7 +749,7 @@ class InstagramTagExtractor(InstagramExtractor): class InstagramInfoExtractor(InstagramExtractor): """Extractor for an Instagram user's profile data""" subcategory = "info" - pattern = rf"{USER_PATTERN}/info" + pattern = USER_PATTERN + r"/info" example = "https://www.instagram.com/USER/info/" def items(self): @@ -765,7 +765,7 @@ class InstagramInfoExtractor(InstagramExtractor): class InstagramAvatarExtractor(InstagramExtractor): """Extractor for an Instagram user's avatar""" subcategory = "avatar" - pattern = rf"{USER_PATTERN}/avatar" + pattern = USER_PATTERN + r"/avatar" example = "https://www.instagram.com/USER/avatar/" def posts(self): diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index 566ee8b..5e0afb3 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -13,7 +13,7 @@ from ..cache import memcache from .. import text, util BASE_PATTERN = r"(?:https?://)?itaku\.ee" -USER_PATTERN = rf"{BASE_PATTERN}/profile/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)" class ItakuExtractor(Extractor): @@ -34,7 +34,7 @@ class ItakuExtractor(Extractor): for image in images: image["date"] = self.parse_datetime_iso(image["date_added"]) for category, tags in image.pop("categorized_tags").items(): - image[f"tags_{category.lower()}"] = [ + image["tags_" + category.lower()] = [ t["name"] for t in tags] image["tags"] = [t["name"] for t in image["tags"]] @@ -73,9 +73,9 @@ class ItakuExtractor(Extractor): return if users := self.users(): - base = f"{self.root}/profile/" + base = self.root + "/profile/" for user in users: - url = f"{base}{user['owner_username']}" + url = base + user["owner_username"] user["_extractor"] = ItakuUserExtractor yield Message.Queue, url, user return @@ -86,7 +86,7 @@ class ItakuExtractor(Extractor): class ItakuGalleryExtractor(ItakuExtractor): """Extractor for an itaku user's gallery""" subcategory = "gallery" - pattern = rf"{USER_PATTERN}/gallery(?:/(\d+))?" + pattern = USER_PATTERN + r"/gallery(?:/(\d+))?" example = "https://itaku.ee/profile/USER/gallery" def images(self): @@ -104,7 +104,7 @@ class ItakuPostsExtractor(ItakuExtractor): "{id}{title:? //}") filename_fmt = "{file[id]}{file[title]:? //}.{extension}" archive_fmt = "{id}_{file[id]}" - pattern = rf"{USER_PATTERN}/posts(?:/(\d+))?" + pattern = USER_PATTERN + r"/posts(?:/(\d+))?" example = "https://itaku.ee/profile/USER/posts" def posts(self): @@ -118,7 +118,7 @@ class ItakuPostsExtractor(ItakuExtractor): class ItakuStarsExtractor(ItakuExtractor): """Extractor for an itaku user's starred images""" subcategory = "stars" - pattern = rf"{USER_PATTERN}/stars(?:/(\d+))?" + pattern = USER_PATTERN + r"/stars(?:/(\d+))?" example = "https://itaku.ee/profile/USER/stars" def images(self): @@ -132,7 +132,7 @@ class ItakuStarsExtractor(ItakuExtractor): class ItakuFollowingExtractor(ItakuExtractor): subcategory = "following" - pattern = rf"{USER_PATTERN}/following" + pattern = USER_PATTERN + r"/following" example = "https://itaku.ee/profile/USER/following" def users(self): @@ -143,7 +143,7 @@ class ItakuFollowingExtractor(ItakuExtractor): class ItakuFollowersExtractor(ItakuExtractor): subcategory = "followers" - pattern = rf"{USER_PATTERN}/followers" + pattern = USER_PATTERN + r"/followers" example = "https://itaku.ee/profile/USER/followers" def users(self): @@ -155,7 +155,7 @@ class ItakuFollowersExtractor(ItakuExtractor): class ItakuBookmarksExtractor(ItakuExtractor): """Extractor for an itaku bookmarks folder""" subcategory = "bookmarks" - pattern = rf"{USER_PATTERN}/bookmarks/(image|user)/(\d+)" + pattern = USER_PATTERN + r"/bookmarks/(image|user)/(\d+)" example = "https://itaku.ee/profile/USER/bookmarks/image/12345" def _init(self): @@ -176,23 +176,23 @@ class ItakuBookmarksExtractor(ItakuExtractor): class ItakuUserExtractor(Dispatch, ItakuExtractor): """Extractor for itaku user profiles""" - pattern = rf"{USER_PATTERN}/?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:$|\?|#)" example = "https://itaku.ee/profile/USER" def items(self): base = f"{self.root}/profile/{self.groups[0]}/" return self._dispatch_extractors(( - (ItakuGalleryExtractor , f"{base}gallery"), - (ItakuPostsExtractor , f"{base}posts"), - (ItakuFollowersExtractor, f"{base}followers"), - (ItakuFollowingExtractor, f"{base}following"), - (ItakuStarsExtractor , f"{base}stars"), + (ItakuGalleryExtractor , base + "gallery"), + (ItakuPostsExtractor , base + "posts"), + (ItakuFollowersExtractor, base + "followers"), + (ItakuFollowingExtractor, base + "following"), + (ItakuStarsExtractor , base + "stars"), ), ("gallery",)) class ItakuImageExtractor(ItakuExtractor): subcategory = "image" - pattern = rf"{BASE_PATTERN}/images/(\d+)" + pattern = BASE_PATTERN + r"/images/(\d+)" example = "https://itaku.ee/images/12345" def images(self): @@ -205,7 +205,7 @@ class ItakuPostExtractor(ItakuExtractor): "{id}{title:? //}") filename_fmt = "{file[id]}{file[title]:? //}.{extension}" archive_fmt = "{id}_{file[id]}" - pattern = rf"{BASE_PATTERN}/posts/(\d+)" + pattern = BASE_PATTERN + r"/posts/(\d+)" example = "https://itaku.ee/posts/12345" def posts(self): @@ -214,7 +214,7 @@ class ItakuPostExtractor(ItakuExtractor): class ItakuSearchExtractor(ItakuExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/home/images/?\?([^#]+)" + pattern = BASE_PATTERN + r"/home/images/?\?([^#]+)" example = "https://itaku.ee/home/images?tags=SEARCH" def images(self): @@ -246,7 +246,7 @@ class ItakuAPI(): def __init__(self, extractor): self.extractor = extractor - self.root = f"{extractor.root}/api" + self.root = extractor.root + "/api" self.headers = { "Accept": "application/json, text/plain, */*", } @@ -309,7 +309,7 @@ class ItakuAPI(): def _call(self, endpoint, params=None): if not endpoint.startswith("http"): - endpoint = f"{self.root}{endpoint}" + endpoint = self.root + endpoint return self.extractor.request_json( endpoint, params=params, headers=self.headers) diff --git a/gallery_dl/extractor/iwara.py b/gallery_dl/extractor/iwara.py index d9d1cf0..5a2049d 100644 --- a/gallery_dl/extractor/iwara.py +++ b/gallery_dl/extractor/iwara.py @@ -12,7 +12,7 @@ from ..cache import cache, memcache import hashlib BASE_PATTERN = r"(?:https?://)?(?:www\.)?iwara\.tv" -USER_PATTERN = rf"{BASE_PATTERN}/profile/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)" class IwaraExtractor(Extractor): @@ -79,10 +79,10 @@ class IwaraExtractor(Extractor): continue yield Message.Directory, "", info - yield Message.Url, f"https:{download_url}", info + yield Message.Url, "https:" + download_url, info def items_user(self, users, key=None): - base = f"{self.root}/profile/" + base = self.root + "/profile/" for user in users: if key is not None: user = user[key] @@ -90,7 +90,7 @@ class IwaraExtractor(Extractor): continue user["type"] = "user" user["_extractor"] = IwaraUserExtractor - yield Message.Queue, f"{base}{username}", user + yield Message.Queue, base + username, user def items_by_type(self, type, results): if type == "image": @@ -158,21 +158,21 @@ class IwaraExtractor(Extractor): class IwaraUserExtractor(Dispatch, IwaraExtractor): """Extractor for iwara.tv profile pages""" - pattern = rf"{USER_PATTERN}/?$" + pattern = USER_PATTERN + r"/?$" example = "https://www.iwara.tv/profile/USERNAME" def items(self): base = f"{self.root}/profile/{self.groups[0]}/" return self._dispatch_extractors(( - (IwaraUserImagesExtractor , f"{base}images"), - (IwaraUserVideosExtractor , f"{base}videos"), - (IwaraUserPlaylistsExtractor, f"{base}playlists"), + (IwaraUserImagesExtractor , base + "images"), + (IwaraUserVideosExtractor , base + "videos"), + (IwaraUserPlaylistsExtractor, base + "playlists"), ), ("user-images", "user-videos")) class IwaraUserImagesExtractor(IwaraExtractor): subcategory = "user-images" - pattern = rf"{USER_PATTERN}/images(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/images(?:\?([^#]+))?" example = "https://www.iwara.tv/profile/USERNAME/images" def items(self): @@ -182,7 +182,7 @@ class IwaraUserImagesExtractor(IwaraExtractor): class IwaraUserVideosExtractor(IwaraExtractor): subcategory = "user-videos" - pattern = rf"{USER_PATTERN}/videos(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/videos(?:\?([^#]+))?" example = "https://www.iwara.tv/profile/USERNAME/videos" def items(self): @@ -192,22 +192,22 @@ class IwaraUserVideosExtractor(IwaraExtractor): class IwaraUserPlaylistsExtractor(IwaraExtractor): subcategory = "user-playlists" - pattern = rf"{USER_PATTERN}/playlists(?:\?([^#]+))?" + pattern = USER_PATTERN + r"/playlists(?:\?([^#]+))?" example = "https://www.iwara.tv/profile/USERNAME/playlists" def items(self): - base = f"{self.root}/playlist/" + base = self.root + "/playlist/" for playlist in self.api.playlists(self._user_params()[1]): playlist["type"] = "playlist" playlist["_extractor"] = IwaraPlaylistExtractor - url = f"{base}{playlist['id']}" + url = base + playlist["id"] yield Message.Queue, url, playlist class IwaraFollowingExtractor(IwaraExtractor): subcategory = "following" - pattern = rf"{USER_PATTERN}/following" + pattern = USER_PATTERN + r"/following" example = "https://www.iwara.tv/profile/USERNAME/following" def items(self): @@ -217,7 +217,7 @@ class IwaraFollowingExtractor(IwaraExtractor): class IwaraFollowersExtractor(IwaraExtractor): subcategory = "followers" - pattern = rf"{USER_PATTERN}/followers" + pattern = USER_PATTERN + r"/followers" example = "https://www.iwara.tv/profile/USERNAME/followers" def items(self): @@ -228,7 +228,7 @@ class IwaraFollowersExtractor(IwaraExtractor): class IwaraImageExtractor(IwaraExtractor): """Extractor for individual iwara.tv image pages""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/image/([^/?#]+)" + pattern = BASE_PATTERN + r"/image/([^/?#]+)" example = "https://www.iwara.tv/image/ID" def items(self): @@ -238,7 +238,7 @@ class IwaraImageExtractor(IwaraExtractor): class IwaraVideoExtractor(IwaraExtractor): """Extractor for individual iwara.tv videos""" subcategory = "video" - pattern = rf"{BASE_PATTERN}/video/([^/?#]+)" + pattern = BASE_PATTERN + r"/video/([^/?#]+)" example = "https://www.iwara.tv/video/ID" def items(self): @@ -248,7 +248,7 @@ class IwaraVideoExtractor(IwaraExtractor): class IwaraPlaylistExtractor(IwaraExtractor): """Extractor for individual iwara.tv playlist pages""" subcategory = "playlist" - pattern = rf"{BASE_PATTERN}/playlist/([^/?#]+)" + pattern = BASE_PATTERN + r"/playlist/([^/?#]+)" example = "https://www.iwara.tv/playlist/ID" def items(self): @@ -257,7 +257,7 @@ class IwaraPlaylistExtractor(IwaraExtractor): class IwaraFavoriteExtractor(IwaraExtractor): subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/favorites(?:/(image|video)s)?" + pattern = BASE_PATTERN + r"/favorites(?:/(image|video)s)?" example = "https://www.iwara.tv/favorites/videos" def items(self): @@ -268,7 +268,7 @@ class IwaraFavoriteExtractor(IwaraExtractor): class IwaraSearchExtractor(IwaraExtractor): """Extractor for iwara.tv search pages""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/search\?([^#]+)" + pattern = BASE_PATTERN + r"/search\?([^#]+)" example = "https://www.iwara.tv/search?query=QUERY&type=TYPE" def items(self): @@ -281,7 +281,7 @@ class IwaraSearchExtractor(IwaraExtractor): class IwaraTagExtractor(IwaraExtractor): """Extractor for iwara.tv tag search""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}/(image|video)s(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/(image|video)s(?:\?([^#]+))?" example = "https://www.iwara.tv/videos?tags=TAGS" def items(self): @@ -298,7 +298,7 @@ class IwaraAPI(): def __init__(self, extractor): self.extractor = extractor self.headers = { - "Referer" : f"{extractor.root}/", + "Referer" : extractor.root + "/", "Content-Type": "application/json", "Origin" : extractor.root, } @@ -308,15 +308,15 @@ class IwaraAPI(): self.authenticate = util.noop def image(self, image_id): - endpoint = f"/image/{image_id}" + endpoint = "/image/" + image_id return self._call(endpoint) def video(self, video_id): - endpoint = f"/video/{video_id}" + endpoint = "/video/" + video_id return self._call(endpoint) def playlist(self, playlist_id): - endpoint = f"/playlist/{playlist_id}" + endpoint = "/playlist/" + playlist_id return self._pagination(endpoint) def detail(self, media): @@ -356,7 +356,7 @@ class IwaraAPI(): @memcache(keyarg=1) def profile(self, username): - endpoint = f"/profile/{username}" + endpoint = "/profile/" + username return self._call(endpoint) def user_following(self, user_id): @@ -387,7 +387,7 @@ class IwaraAPI(): if refresh_token is None: self.extractor.log.info("Logging in as %s", username) - url = f"{self.root}/user/login" + url = self.root + "/user/login" json = { "email" : username, "password": self.password @@ -403,15 +403,15 @@ class IwaraAPI(): self.extractor.log.info("Refreshing access token for %s", username) - url = f"{self.root}/user/token" - headers = {"Authorization": f"Bearer {refresh_token}", **self.headers} + url = self.root + "/user/token" + headers = {"Authorization": "Bearer " + refresh_token, **self.headers} data = self.extractor.request_json( url, method="POST", headers=headers, fatal=False) if not (access_token := data.get("accessToken")): self.extractor.log.debug(data) raise exception.AuthenticationError(data.get("message")) - return f"Bearer {access_token}" + return "Bearer " + access_token def _call(self, endpoint, params=None, headers=None): if headers is None: diff --git a/gallery_dl/extractor/jschan.py b/gallery_dl/extractor/jschan.py index 5dacf70..6e03e7b 100644 --- a/gallery_dl/extractor/jschan.py +++ b/gallery_dl/extractor/jschan.py @@ -30,7 +30,7 @@ class JschanThreadExtractor(JschanExtractor): "{threadId} {subject|nomarkup[:50]}") filename_fmt = "{postId}{num:?-//} {filename}.{extension}" archive_fmt = "{board}_{postId}_{num}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/thread/(\d+)\.html" + pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html" example = "https://94chan.org/a/thread/12345.html" def items(self): @@ -56,7 +56,7 @@ class JschanThreadExtractor(JschanExtractor): class JschanBoardExtractor(JschanExtractor): """Extractor for jschan boards""" subcategory = "board" - pattern = (rf"{BASE_PATTERN}/([^/?#]+)" + pattern = (BASE_PATTERN + r"/([^/?#]+)" r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)") example = "https://94chan.org/a/" diff --git a/gallery_dl/extractor/kabeuchi.py b/gallery_dl/extractor/kabeuchi.py index 88f2e32..e41e9b0 100644 --- a/gallery_dl/extractor/kabeuchi.py +++ b/gallery_dl/extractor/kabeuchi.py @@ -52,7 +52,7 @@ class KabeuchiUserExtractor(Extractor): return self._pagination(target_id) def _pagination(self, target_id): - url = f"{self.root}/get_posts.php" + url = self.root + "/get_posts.php" data = { "user_id" : "0", "target_id" : target_id, diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py index bf35670..a001d10 100644 --- a/gallery_dl/extractor/kemono.py +++ b/gallery_dl/extractor/kemono.py @@ -16,7 +16,7 @@ import json BASE_PATTERN = (r"(?:https?://)?(?:www\.|beta\.)?" r"(kemono|coomer)\.(cr|s[tu]|party)") -USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)/user/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)" HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})" @@ -200,7 +200,7 @@ class KemonoExtractor(Extractor): username = username[0] self.log.info("Logging in as %s", username) - url = f"{self.root}/api/v1/authentication/login" + url = self.root + "/api/v1/authentication/login" data = {"username": username, "password": password} response = self.request(url, method="POST", json=data, fatal=False) @@ -322,7 +322,7 @@ def _validate(response): class KemonoUserExtractor(KemonoExtractor): """Extractor for all posts from a kemono.cr user listing""" subcategory = "user" - pattern = rf"{USER_PATTERN}/?(?:\?([^#]+))?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)" example = "https://kemono.cr/SERVICE/user/12345" def __init__(self, match): @@ -345,7 +345,7 @@ class KemonoUserExtractor(KemonoExtractor): class KemonoPostsExtractor(KemonoExtractor): """Extractor for kemono.cr post listings""" subcategory = "posts" - pattern = rf"{BASE_PATTERN}/posts()()(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?" example = "https://kemono.cr/posts" def posts(self): @@ -357,7 +357,7 @@ class KemonoPostsExtractor(KemonoExtractor): class KemonoPostExtractor(KemonoExtractor): """Extractor for a single kemono.cr post""" subcategory = "post" - pattern = rf"{USER_PATTERN}/post/([^/?#]+)(/revisions?(?:/(\d*))?)?" + pattern = USER_PATTERN + r"/post/([^/?#]+)(/revisions?(?:/(\d*))?)?" example = "https://kemono.cr/SERVICE/user/12345/post/12345" def __init__(self, match): @@ -390,7 +390,7 @@ class KemonoDiscordExtractor(KemonoExtractor): "{server_id} {server}", "{channel_id} {channel}") filename_fmt = "{id}_{num:>02}_{filename}.{extension}" archive_fmt = "discord_{server_id}_{id}_{num}" - pattern = rf"{BASE_PATTERN}/discord/server/(\d+)[/#](?:channel/)?(\d+)" + pattern = BASE_PATTERN + r"/discord/server/(\d+)[/#](?:channel/)?(\d+)" example = "https://kemono.cr/discord/server/12345/12345" def items(self): @@ -434,7 +434,7 @@ class KemonoDiscordExtractor(KemonoExtractor): attachment["type"] = "attachment" files.append(attachment) for path in find_inline(post["content"] or ""): - files.append({"path": f"https://cdn.discordapp.com{path}", + files.append({"path": "https://cdn.discordapp.com" + path, "name": path, "type": "inline", "hash": ""}) post.update(data) @@ -460,7 +460,7 @@ class KemonoDiscordExtractor(KemonoExtractor): class KemonoDiscordServerExtractor(KemonoExtractor): subcategory = "discord-server" - pattern = rf"{BASE_PATTERN}/discord/server/(\d+)$" + pattern = BASE_PATTERN + r"/discord/server/(\d+)$" example = "https://kemono.cr/discord/server/12345" def items(self): @@ -488,7 +488,7 @@ def discord_server_info(extr, server_id): class KemonoFavoriteExtractor(KemonoExtractor): """Extractor for kemono.cr favorites""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/(?:account/)?favorites()()(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/(?:account/)?favorites()()(?:/?\?([^#]+))?" example = "https://kemono.cr/account/favorites/artists" def items(self): @@ -536,7 +536,7 @@ class KemonoFavoriteExtractor(KemonoExtractor): class KemonoArtistsExtractor(KemonoExtractor): """Extractor for kemono artists""" subcategory = "artists" - pattern = rf"{BASE_PATTERN}/artists(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/artists(?:\?([^#]+))?" example = "https://kemono.cr/artists" def items(self): @@ -577,7 +577,7 @@ class KemonoAPI(): def __init__(self, extractor): self.extractor = extractor - self.root = f"{extractor.root}/api" + self.root = extractor.root + "/api" self.headers = {"Accept": "text/css"} def posts(self, offset=0, query=None, tags=None): @@ -586,7 +586,7 @@ class KemonoAPI(): return self._pagination(endpoint, params, 50, "posts") def file(self, file_hash): - endpoint = f"/v1/file/{file_hash}" + endpoint = "/v1/file/" + file_hash return self._call(endpoint) def creators(self): @@ -643,18 +643,18 @@ class KemonoAPI(): return self._call(endpoint) def discord_channel(self, channel_id, post_count=None): - endpoint = f"/v1/discord/channel/{channel_id}" + endpoint = "/v1/discord/channel/" + channel_id if post_count is None: return self._pagination(endpoint, {}, 150) else: return self._pagination_reverse(endpoint, {}, 150, post_count) def discord_channel_lookup(self, server_id): - endpoint = f"/v1/discord/channel/lookup/{server_id}" + endpoint = "/v1/discord/channel/lookup/" + server_id return self._call(endpoint) def discord_server(self, server_id): - endpoint = f"/v1/discord/server/{server_id}" + endpoint = "/v1/discord/server/" + server_id return self._call(endpoint) def account_favorites(self, type): @@ -669,7 +669,7 @@ class KemonoAPI(): headers = {**self.headers, **headers} return self.extractor.request_json( - f"{self.root}{endpoint}", params=params, headers=headers, + self.root + endpoint, params=params, headers=headers, encoding="utf-8", fatal=fatal) def _pagination(self, endpoint, params, batch=50, key=None): diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index e2f00e1..fad63c9 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -44,7 +44,7 @@ class KomikcastBase(): class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): """Extractor for komikcast manga chapters""" - pattern = rf"{BASE_PATTERN}(/chapter/[^/?#]+/)" + pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)" example = "https://komikcast.li/chapter/TITLE/" def metadata(self, page): @@ -64,7 +64,7 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): class KomikcastMangaExtractor(KomikcastBase, MangaExtractor): """Extractor for komikcast manga""" chapterclass = KomikcastChapterExtractor - pattern = rf"{BASE_PATTERN}(/(?:komik/)?[^/?#]+/?)$" + pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+/?)$" example = "https://komikcast.li/komik/TITLE" def chapters(self, page): diff --git a/gallery_dl/extractor/koofr.py b/gallery_dl/extractor/koofr.py index 9ebc133..08a737c 100644 --- a/gallery_dl/extractor/koofr.py +++ b/gallery_dl/extractor/koofr.py @@ -17,6 +17,8 @@ class KoofrSharedExtractor(Extractor): category = "koofr" subcategory = "shared" root = "https://app.koofr.net" + directory_fmt = ("{category}", "{date:%Y-%m-%d} {title}") + archive_fmt = "{post[id]}_{hash|id}" pattern = (r"(?:https?://)?(?:" r"(?:app\.)?koofr\.(?:net|eu)/links/([\w-]+)|" r"k00\.fr/(\w+))") @@ -41,15 +43,45 @@ class KoofrSharedExtractor(Extractor): "Sec-Fetch-Site" : "same-origin", } data = self.request_json(url, params=params, headers=headers) + root = data.get("publicUrlBase") or self.root + base = f"{root}/content/links/{uuid}/files/get/" + headers = {"Referer": referer} + file = data["file"] - name = data["name"] - file = text.nameext_from_name(name, data["file"]) - file["_http_headers"] = {"Referer": referer} + if file["type"] == "dir" and not self.config("zip", False): + path = True + url = url + "/bundle" + params["path"] = "/" + files = self.request_json( + url, params=params, headers=headers)["files"] + else: + path = False + files = (file,) - root = data.get("publicUrlBase") or self.root - url = f"{root}/content/links/{uuid}/files/get/{name}?path=/&force=" if password: - url = f"{url}&password={password}" + password = text.escape(password) + + post = { + "id" : data["id"], + "title": data["name"], + "count": len(files), + "date" : self.parse_timestamp(file["modified"] / 1000), + } + + yield Message.Directory, "", post + for num, file in enumerate(files, 1): + file["count"] = len(files) + file["num"] = num + file["post"] = post + file["date"] = self.parse_timestamp(file["modified"] / 1000) + file["_http_headers"] = headers + + name = file["name"] + text.nameext_from_name(name, file) + + name = text.escape(name) + url = (f"{base}{name}?path=%2F{name if path else '&force'}") + if password: + url = f"{url}&password={password}" - yield Message.Directory, "", file - yield Message.Url, url, file + yield Message.Url, url, file diff --git a/gallery_dl/extractor/leakgallery.py b/gallery_dl/extractor/leakgallery.py index 2939304..d119ac6 100644 --- a/gallery_dl/extractor/leakgallery.py +++ b/gallery_dl/extractor/leakgallery.py @@ -35,7 +35,7 @@ class LeakgalleryExtractor(Extractor): else: media["creator"] = creator - media["url"] = url = f"https://cdn.leakgallery.com/{path}" + media["url"] = url = "https://cdn.leakgallery.com/" + path text.nameext_from_url(url, media) yield Message.Directory, "", media yield Message.Url, url, media @@ -43,7 +43,7 @@ class LeakgalleryExtractor(Extractor): def _pagination(self, type, base, params=None, creator=None, pnum=1): while True: try: - data = self.request_json(f"{base}{pnum}", params=params) + data = self.request_json(base + str(pnum), params=params) if not data: return @@ -81,7 +81,7 @@ class LeakgalleryUserExtractor(LeakgalleryExtractor): class LeakgalleryTrendingExtractor(LeakgalleryExtractor): """Extractor for trending posts on leakgallery.com""" subcategory = "trending" - pattern = rf"{BASE_PATTERN}/trending-medias(?:/([\w-]+))?" + pattern = BASE_PATTERN + r"/trending-medias(?:/([\w-]+))?" example = "https://leakgallery.com/trending-medias/Week" def items(self): @@ -93,7 +93,7 @@ class LeakgalleryTrendingExtractor(LeakgalleryExtractor): class LeakgalleryMostlikedExtractor(LeakgalleryExtractor): """Extractor for most liked posts on leakgallery.com""" subcategory = "mostliked" - pattern = rf"{BASE_PATTERN}/most-liked" + pattern = BASE_PATTERN + r"/most-liked" example = "https://leakgallery.com/most-liked" def items(self): @@ -104,7 +104,7 @@ class LeakgalleryMostlikedExtractor(LeakgalleryExtractor): class LeakgalleryPostExtractor(LeakgalleryExtractor): """Extractor for individual posts on leakgallery.com""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)" example = "https://leakgallery.com/CREATOR/12345" def items(self): diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py index a7b1318..124991a 100644 --- a/gallery_dl/extractor/lensdump.py +++ b/gallery_dl/extractor/lensdump.py @@ -31,7 +31,7 @@ class LensdumpBase(): class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor): subcategory = "album" - pattern = rf"{BASE_PATTERN}/a/(\w+)(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/a/(\w+)(?:/?\?([^#]+))?" example = "https://lensdump.com/a/ID" def __init__(self, match): @@ -76,7 +76,7 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor): class LensdumpAlbumsExtractor(LensdumpBase, Extractor): """Extractor for album list from lensdump.com""" subcategory = "albums" - pattern = rf"{BASE_PATTERN}/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?" example = "https://lensdump.com/USER" def items(self): diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index d17549d..1ddce55 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -25,7 +25,7 @@ BASE_PATTERN = LolisafeExtractor.update({ class LolisafeAlbumExtractor(LolisafeExtractor): subcategory = "album" - pattern = rf"{BASE_PATTERN}/a/([^/?#]+)" + pattern = BASE_PATTERN + "/a/([^/?#]+)" example = "https://xbunkr.com/a/ID" def __init__(self, match): diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py index 7cf1282..86b317a 100644 --- a/gallery_dl/extractor/lynxchan.py +++ b/gallery_dl/extractor/lynxchan.py @@ -39,7 +39,7 @@ class LynxchanThreadExtractor(LynxchanExtractor): "{threadId} {subject|message[:50]}") filename_fmt = "{postId}{num:?-//} {filename}.{extension}" archive_fmt = "{boardUri}_{postId}_{num}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)" example = "https://endchan.org/a/res/12345.html" def items(self): @@ -63,7 +63,7 @@ class LynxchanThreadExtractor(LynxchanExtractor): class LynxchanBoardExtractor(LynxchanExtractor): """Extractor for LynxChan boards""" subcategory = "board" - pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/index|/catalog|/\d+|/?$)" + pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)" example = "https://endchan.org/a/" def items(self): diff --git a/gallery_dl/extractor/madokami.py b/gallery_dl/extractor/madokami.py index e15b90d..0b90e8e 100644 --- a/gallery_dl/extractor/madokami.py +++ b/gallery_dl/extractor/madokami.py @@ -25,7 +25,7 @@ class MadokamiMangaExtractor(MadokamiExtractor): subcategory = "manga" directory_fmt = ("{category}", "{manga}") archive_fmt = "{chapter_id}" - pattern = rf"{BASE_PATTERN}/Manga/(\w/\w{{2}}/\w{{4}}/.+)" + pattern = BASE_PATTERN + r"/Manga/(\w/\w{2}/\w{4}/.+)" example = "https://manga.madokami.al/Manga/A/AB/ABCD/ABCDE_TITLE" def items(self): @@ -85,7 +85,7 @@ class MadokamiMangaExtractor(MadokamiExtractor): else: ch["volume"] = ch["chapter"] = ch["chapter_end"] = 0 - url = f"{self.root}{ch['path']}" + url = self.root + ch["path"] text.nameext_from_url(url, ch) yield Message.Directory, "", ch diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 0a1709d..12e5f8d 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -66,7 +66,7 @@ class MangadexExtractor(Extractor): "title" : cattributes["title"], "volume" : text.parse_int(cattributes["volume"]), "chapter" : text.parse_int(chnum), - "chapter_minor": f"{sep}{minor}", + "chapter_minor": sep + minor, "chapter_id": chapter["id"], "date" : self.parse_datetime_iso(cattributes["publishAt"]), "group" : [group["attributes"]["name"] @@ -83,8 +83,8 @@ class MangadexCoversExtractor(MangadexExtractor): directory_fmt = ("{category}", "{manga}", "Covers") filename_fmt = "{volume:>02}_{lang}.{extension}" archive_fmt = "c_{cover_id}" - pattern = (rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" - rf"(?:/[^/?#]+)?\?tab=art") + pattern = (BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" + r"(?:/[^/?#]+)?\?tab=art") example = ("https://mangadex.org/title" "/01234567-89ab-cdef-0123-456789abcdef?tab=art") @@ -96,7 +96,7 @@ class MangadexCoversExtractor(MangadexExtractor): text.nameext_from_url(name, data) data["cover_id"] = data["filename"] yield Message.Directory, "", data - yield Message.Url, f"{base}{name}", data + yield Message.Url, base + name, data def _transform_cover(self, cover): relationships = defaultdict(list) @@ -117,7 +117,7 @@ class MangadexCoversExtractor(MangadexExtractor): class MangadexChapterExtractor(MangadexExtractor): """Extractor for manga-chapters from mangadex.org""" subcategory = "chapter" - pattern = rf"{BASE_PATTERN}/chapter/([0-9a-f-]+)" + pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" example = ("https://mangadex.org/chapter" "/01234567-89ab-cdef-0123-456789abcdef") @@ -148,15 +148,15 @@ class MangadexChapterExtractor(MangadexExtractor): enum = util.enumerate_reversed if self.config( "page-reverse") else enumerate - for data["page"], page in enum(chapter[key], 1): - text.nameext_from_url(page, data) - yield Message.Url, f"{base}{page}", data + for data["page"], path in enum(chapter[key], 1): + text.nameext_from_url(path, data) + yield Message.Url, base + path, data class MangadexMangaExtractor(MangadexExtractor): """Extractor for manga from mangadex.org""" subcategory = "manga" - pattern = rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" + pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" example = ("https://mangadex.org/title" "/01234567-89ab-cdef-0123-456789abcdef") @@ -167,7 +167,7 @@ class MangadexMangaExtractor(MangadexExtractor): class MangadexFeedExtractor(MangadexExtractor): """Extractor for chapters from your Updates Feed""" subcategory = "feed" - pattern = rf"{BASE_PATTERN}/titles?/feed$()" + pattern = BASE_PATTERN + r"/titles?/feed$()" example = "https://mangadex.org/title/feed" def chapters(self): @@ -177,7 +177,7 @@ class MangadexFeedExtractor(MangadexExtractor): class MangadexFollowingExtractor(MangadexExtractor): """Extractor for followed manga from your Library""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/titles?/follows(?:\?([^#]+))?$" + pattern = BASE_PATTERN + r"/titles?/follows(?:\?([^#]+))?$" example = "https://mangadex.org/title/follows" items = MangadexExtractor._items_manga @@ -189,8 +189,8 @@ class MangadexFollowingExtractor(MangadexExtractor): class MangadexListExtractor(MangadexExtractor): """Extractor for mangadex MDLists""" subcategory = "list" - pattern = (rf"{BASE_PATTERN}" - rf"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?") + pattern = (BASE_PATTERN + + r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?") example = ("https://mangadex.org/list" "/01234567-89ab-cdef-0123-456789abcdef/NAME") @@ -215,7 +215,7 @@ class MangadexListExtractor(MangadexExtractor): class MangadexAuthorExtractor(MangadexExtractor): """Extractor for mangadex authors""" subcategory = "author" - pattern = rf"{BASE_PATTERN}/author/([0-9a-f-]+)" + pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)" example = ("https://mangadex.org/author" "/01234567-89ab-cdef-0123-456789abcdef/NAME") @@ -253,22 +253,22 @@ class MangadexAPI(): else text.ensure_http_scheme(server).rstrip("/")) def athome_server(self, uuid): - return self._call(f"/at-home/server/{uuid}") + return self._call("/at-home/server/" + uuid) def author(self, uuid, manga=False): params = {"includes[]": ("manga",)} if manga else None - return self._call(f"/author/{uuid}", params)["data"] + return self._call("/author/" + uuid, params)["data"] def chapter(self, uuid): params = {"includes[]": ("scanlation_group",)} - return self._call(f"/chapter/{uuid}", params)["data"] + return self._call("/chapter/" + uuid, params)["data"] def covers_manga(self, uuid): params = {"manga[]": uuid} return self._pagination_covers("/cover", params) def list(self, uuid): - return self._call(f"/list/{uuid}", None, True)["data"] + return self._call("/list/" + uuid, None, True)["data"] def list_feed(self, uuid): return self._pagination_chapters(f"/list/{uuid}/feed", None, True) @@ -276,7 +276,7 @@ class MangadexAPI(): @memcache(keyarg=1) def manga(self, uuid): params = {"includes[]": ("artist", "author")} - return self._call(f"/manga/{uuid}", params)["data"] + return self._call("/manga/" + uuid, params)["data"] def manga_author(self, uuid_author): params = {"authorOrArtist": uuid_author} @@ -339,17 +339,17 @@ class MangadexAPI(): _refresh_token_cache.update( (username, "personal"), data["refresh_token"]) - return f"Bearer {access_token}" + return "Bearer " + access_token @cache(maxage=900, keyarg=1) def _authenticate_impl_legacy(self, username, password): if refresh_token := _refresh_token_cache(username): self.extractor.log.info("Refreshing access token") - url = f"{self.root}/auth/refresh" + url = self.root + "/auth/refresh" json = {"token": refresh_token} else: self.extractor.log.info("Logging in as %s", username) - url = f"{self.root}/auth/login" + url = self.root + "/auth/login" json = {"username": username, "password": password} self.extractor.log.debug("Using legacy login method") @@ -360,10 +360,10 @@ class MangadexAPI(): if refresh_token != data["token"]["refresh"]: _refresh_token_cache.update(username, data["token"]["refresh"]) - return f"Bearer {data['token']['session']}" + return "Bearer " + data["token"]["session"] def _call(self, endpoint, params=None, auth=False): - url = f"{self.root}{endpoint}" + url = self.root + endpoint headers = self.headers_auth if auth else self.headers while True: diff --git a/gallery_dl/extractor/mangafire.py b/gallery_dl/extractor/mangafire.py index 5ccb732..8db91b3 100644 --- a/gallery_dl/extractor/mangafire.py +++ b/gallery_dl/extractor/mangafire.py @@ -31,8 +31,8 @@ class MangafireChapterExtractor(MangafireBase, ChapterExtractor): "{page:>03}.{extension}") archive_fmt = ( "{manga_id}_{chapter_id}_{page}") - pattern = (rf"{BASE_PATTERN}/read/([\w-]+\.(\w+))/([\w-]+)" - rf"/((chapter|volume)-\d+(?:\D.*)?)") + pattern = (BASE_PATTERN + r"/read/([\w-]+\.(\w+))/([\w-]+)" + r"/((chapter|volume)-\d+(?:\D.*)?)") example = "https://mangafire.to/read/MANGA.ID/LANG/chapter-123" def metadata(self, _): @@ -64,7 +64,7 @@ class MangafireChapterExtractor(MangafireBase, ChapterExtractor): class MangafireMangaExtractor(MangafireBase, MangaExtractor): """Extractor for mangafire manga""" chapterclass = MangafireChapterExtractor - pattern = rf"{BASE_PATTERN}/manga/([\w-]+)\.(\w+)" + pattern = BASE_PATTERN + r"/manga/([\w-]+)\.(\w+)" example = "https://mangafire.to/manga/MANGA.ID" def chapters(self, page): @@ -75,7 +75,7 @@ class MangafireMangaExtractor(MangafireBase, MangaExtractor): chapters = _manga_chapters(self, (manga_id, "chapter", lang)) return [ - (f"""{self.root}{text.extr(anchor, 'href="', '"')}""", { + (self.root + text.extr(anchor, 'href="', '"'), { **manga, **_chapter_info(anchor), }) @@ -160,7 +160,7 @@ def _chapter_info(info): chapter, sep, minor = text.extr(info, 'data-number="', '"').partition(".") return { "chapter" : text.parse_int(chapter), - "chapter_minor" : f"{sep}{minor}", + "chapter_minor" : sep + minor, "chapter_string": chapter_info, "chapter_id" : text.parse_int(text.extr(info, 'data-id="', '"')), "title" : text.unescape(text.extr(info, 'title="', '"')), diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index 8fa645b..a5f31e3 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -18,8 +18,8 @@ class MangafoxChapterExtractor(ChapterExtractor): """Extractor for manga chapters from fanfox.net""" category = "mangafox" root = "https://m.fanfox.net" - pattern = (rf"{BASE_PATTERN}" - rf"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))") + pattern = BASE_PATTERN + \ + r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))" example = "https://fanfox.net/manga/TITLE/v01/c001/1.html" def __init__(self, match): @@ -62,7 +62,7 @@ class MangafoxMangaExtractor(MangaExtractor): category = "mangafox" root = "https://m.fanfox.net" chapterclass = MangafoxChapterExtractor - pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+)/?$" + pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$" example = "https://fanfox.net/manga/TITLE" def chapters(self, page): diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 9b3a3a1..f0cc928 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -28,7 +28,7 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor): def __init__(self, match): self.part, self.volume, self.chapter = match.groups() self.base = f"{self.root_mobile}/manga/{self.part}/" - ChapterExtractor.__init__(self, match, f"{self.base}1.html") + ChapterExtractor.__init__(self, match, self.base + "1.html") def _init(self): self.session.headers["Referer"] = self.root_mobile + "/" diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py index 3ecf934..6b68b83 100644 --- a/gallery_dl/extractor/manganelo.py +++ b/gallery_dl/extractor/manganelo.py @@ -9,8 +9,8 @@ """Extractors for https://www.mangakakalot.gg/ and mirror sites""" -from .common import BaseExtractor, ChapterExtractor, MangaExtractor -from .. import text, util +from .common import BaseExtractor, ChapterExtractor, MangaExtractor, Message +from .. import text, util, exception class ManganeloExtractor(BaseExtractor): @@ -39,7 +39,7 @@ BASE_PATTERN = ManganeloExtractor.update({ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor): """Extractor for manganelo manga chapters""" - pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+/chapter-[^/?#]+)" + pattern = BASE_PATTERN + r"(/manga/[^/?#]+/chapter-[^/?#]+)" example = "https://www.mangakakalot.gg/manga/MANGA_NAME/chapter-123" def __init__(self, match): @@ -86,7 +86,7 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor): class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor): """Extractor for manganelo manga""" chapterclass = ManganeloChapterExtractor - pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+)$" + pattern = BASE_PATTERN + r"(/manga/[^/?#]+)$" example = "https://www.mangakakalot.gg/manga/MANGA_NAME" def __init__(self, match): @@ -126,3 +126,33 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor): "language": "English", })) return results + + +class ManganeloBookmarkExtractor(ManganeloExtractor): + """Extractor for manganelo bookmarks""" + subcategory = "bookmark" + pattern = BASE_PATTERN + r"/bookmark" + example = "https://www.mangakakalot.gg/bookmark" + + def items(self): + data = {"_extractor": ManganeloMangaExtractor} + + url = self.root + "/bookmark" + params = {"page": 1} + + response = self.request(url, params=params) + if response.history: + raise exception.AuthRequired( + "authenticated cookies", "your bookmarks") + page = response.text + last = text.parse_int(text.extr(page, ">Last(", ")")) + + while True: + for bookmark in text.extract_iter( + page, 'class="user-bookmark-item ', '</a>'): + yield Message.Queue, text.extr(bookmark, ' href="', '"'), data + + if params["page"] >= last: + break + params["page"] += 1 + page = self.request(url, params=params).text diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index e2f9166..35fbf19 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -70,8 +70,8 @@ class MangaparkBase(): class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): """Extractor for manga-chapters from mangapark.net""" - pattern = (rf"{BASE_PATTERN}/" - rf"(?:title/[^/?#]+/|comic/\d+/[^/?#]+/[^/?#]+-i)(\d+)") + pattern = (BASE_PATTERN + + r"/(?:title/[^/?#]+/|comic/\d+/[^/?#]+/[^/?#]+-i)(\d+)") example = "https://mangapark.net/title/MANGA/12345-en-ch.01" def __init__(self, match): @@ -111,7 +111,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): class MangaparkMangaExtractor(MangaparkBase, Extractor): """Extractor for manga from mangapark.net""" subcategory = "manga" - pattern = rf"{BASE_PATTERN}/(?:title|comic)/(\d+)(?:[/-][^/?#]*)?/?$" + pattern = BASE_PATTERN + r"/(?:title|comic)/(\d+)(?:[/-][^/?#]*)?/?$" example = "https://mangapark.net/title/12345-MANGA" def __init__(self, match): diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index eb53998..c786c99 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -31,8 +31,8 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor): "{page:>03}.{extension}") archive_fmt = ( "{manga_id}_{chapter_id}_{page}") - pattern = (rf"{BASE_PATTERN}/read/([\w-]+-\d+)/([^/?#]+)" - rf"/(chapter|volume)-(\d+[^/?#]*)") + pattern = (BASE_PATTERN + r"/read/([\w-]+-\d+)/([^/?#]+)" + r"/(chapter|volume)-(\d+[^/?#]*)") example = "https://mangareader.to/read/MANGA-123/LANG/chapter-123" def metadata(self, _): @@ -81,7 +81,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor): class MangareaderMangaExtractor(MangareaderBase, MangaExtractor): """Extractor for mangareader manga""" chapterclass = MangareaderChapterExtractor - pattern = rf"{BASE_PATTERN}/([\w-]+-\d+)" + pattern = BASE_PATTERN + r"/([\w-]+-\d+)" example = "https://mangareader.to/MANGA-123" def chapters(self, page): @@ -138,9 +138,9 @@ def _manga_info(self, manga_path): current[chap] = { "title" : name.partition(":")[2].strip(), "chapter" : text.parse_int(chapter), - "chapter_minor" : f"{sep}{minor}", + "chapter_minor" : sep + minor, "chapter_string": chap, - "chapter_url" : f"{base}{path}", + "chapter_url" : base + path, "lang" : lang, } @@ -162,7 +162,7 @@ def _manga_info(self, manga_path): "chapter" : 0, "chapter_minor" : "", "chapter_string": voln, - "chapter_url" : f"{base}{path}", + "chapter_url" : base + path, "lang" : lang, } diff --git a/gallery_dl/extractor/mangataro.py b/gallery_dl/extractor/mangataro.py index 029bc2e..7d9c126 100644 --- a/gallery_dl/extractor/mangataro.py +++ b/gallery_dl/extractor/mangataro.py @@ -23,7 +23,7 @@ class MangataroBase(): class MangataroChapterExtractor(MangataroBase, ChapterExtractor): """Extractor for mangataro manga chapters""" - pattern = rf"{BASE_PATTERN}(/read/([^/?#]+)/(?:[^/?#]*-)?(\d+))" + pattern = BASE_PATTERN + r"(/read/([^/?#]+)/(?:[^/?#]*-)?(\d+))" example = "https://mangataro.org/read/MANGA/ch123-12345" def metadata(self, page): @@ -59,7 +59,7 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor): class MangataroMangaExtractor(MangataroBase, MangaExtractor): """Extractor for mangataro manga""" chapterclass = MangataroChapterExtractor - pattern = rf"{BASE_PATTERN}(/manga/([^/?#]+))" + pattern = BASE_PATTERN + r"(/manga/([^/?#]+))" example = "https://mangataro.org/manga/MANGA" def chapters(self, page): @@ -75,7 +75,7 @@ class MangataroMangaExtractor(MangataroBase, MangaExtractor): results.append((url, { **manga, "chapter" : text.parse_int(chapter), - "chapter_minor": f".{minor}" if sep else "", + "chapter_minor": "." + minor if sep else "", "chapter_id" : text.parse_int(chapter_id), })) return results diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 165f8b8..cca1530 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -118,7 +118,7 @@ BASE_PATTERN = MastodonExtractor.update({ class MastodonUserExtractor(MastodonExtractor): """Extractor for all images of an account/user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?:@|users/)([^/?#]+)(?:/media)?/?$" + pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$" example = "https://mastodon.social/@USER" def statuses(self): @@ -138,7 +138,7 @@ class MastodonUserExtractor(MastodonExtractor): class MastodonBookmarkExtractor(MastodonExtractor): """Extractor for mastodon bookmarks""" subcategory = "bookmark" - pattern = rf"{BASE_PATTERN}/bookmarks" + pattern = BASE_PATTERN + r"/bookmarks" example = "https://mastodon.social/bookmarks" def statuses(self): @@ -148,7 +148,7 @@ class MastodonBookmarkExtractor(MastodonExtractor): class MastodonFavoriteExtractor(MastodonExtractor): """Extractor for mastodon favorites""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/favourites" + pattern = BASE_PATTERN + r"/favourites" example = "https://mastodon.social/favourites" def statuses(self): @@ -158,7 +158,7 @@ class MastodonFavoriteExtractor(MastodonExtractor): class MastodonListExtractor(MastodonExtractor): """Extractor for mastodon lists""" subcategory = "list" - pattern = rf"{BASE_PATTERN}/lists/(\w+)" + pattern = BASE_PATTERN + r"/lists/(\w+)" example = "https://mastodon.social/lists/12345" def statuses(self): @@ -168,7 +168,7 @@ class MastodonListExtractor(MastodonExtractor): class MastodonHashtagExtractor(MastodonExtractor): """Extractor for mastodon hashtags""" subcategory = "hashtag" - pattern = rf"{BASE_PATTERN}/tags/(\w+)" + pattern = BASE_PATTERN + r"/tags/(\w+)" example = "https://mastodon.social/tags/NAME" def statuses(self): @@ -178,7 +178,7 @@ class MastodonHashtagExtractor(MastodonExtractor): class MastodonFollowingExtractor(MastodonExtractor): """Extractor for followed mastodon users""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/(?:@|users/)([^/?#]+)/following" + pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)/following" example = "https://mastodon.social/@USER/following" def items(self): @@ -193,7 +193,7 @@ class MastodonFollowingExtractor(MastodonExtractor): class MastodonStatusExtractor(MastodonExtractor): """Extractor for images from a status""" subcategory = "status" - pattern = (rf"{BASE_PATTERN}/(?:@[^/?#]+|(?:users/[^/?#]+/)?" + pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?" r"(?:statuses|notice|objects()))/(?!following)([^/?#]+)") example = "https://mastodon.social/@USER/12345" diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py index ca3ae18..f37bcad 100644 --- a/gallery_dl/extractor/misskey.py +++ b/gallery_dl/extractor/misskey.py @@ -102,7 +102,7 @@ BASE_PATTERN = MisskeyExtractor.update({ class MisskeyUserExtractor(Dispatch, MisskeyExtractor): """Extractor for all images of a Misskey user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/@([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/@([^/?#]+)/?$" example = "https://misskey.io/@USER" def items(self): @@ -118,7 +118,7 @@ class MisskeyUserExtractor(Dispatch, MisskeyExtractor): class MisskeyNotesExtractor(MisskeyExtractor): """Extractor for a Misskey user's notes""" subcategory = "notes" - pattern = rf"{BASE_PATTERN}/@([^/?#]+)/notes" + pattern = BASE_PATTERN + r"/@([^/?#]+)/notes" example = "https://misskey.io/@USER/notes" def notes(self): @@ -129,7 +129,7 @@ class MisskeyNotesExtractor(MisskeyExtractor): class MisskeyInfoExtractor(MisskeyExtractor): """Extractor for a Misskey user's profile data""" subcategory = "info" - pattern = rf"{BASE_PATTERN}/@([^/?#]+)/info" + pattern = BASE_PATTERN + r"/@([^/?#]+)/info" example = "https://misskey.io/@USER/info" def items(self): @@ -140,7 +140,7 @@ class MisskeyInfoExtractor(MisskeyExtractor): class MisskeyAvatarExtractor(MisskeyExtractor): """Extractor for a Misskey user's avatar""" subcategory = "avatar" - pattern = rf"{BASE_PATTERN}/@([^/?#]+)/avatar" + pattern = BASE_PATTERN + r"/@([^/?#]+)/avatar" example = "https://misskey.io/@USER/avatar" def notes(self): @@ -152,7 +152,7 @@ class MisskeyAvatarExtractor(MisskeyExtractor): class MisskeyBackgroundExtractor(MisskeyExtractor): """Extractor for a Misskey user's banner image""" subcategory = "background" - pattern = rf"{BASE_PATTERN}/@([^/?#]+)/ba(?:nner|ckground)" + pattern = BASE_PATTERN + r"/@([^/?#]+)/ba(?:nner|ckground)" example = "https://misskey.io/@USER/banner" def notes(self): @@ -164,7 +164,7 @@ class MisskeyBackgroundExtractor(MisskeyExtractor): class MisskeyFollowingExtractor(MisskeyExtractor): """Extractor for followed Misskey users""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/@([^/?#]+)/following" + pattern = BASE_PATTERN + r"/@([^/?#]+)/following" example = "https://misskey.io/@USER/following" def items(self): @@ -181,7 +181,7 @@ class MisskeyFollowingExtractor(MisskeyExtractor): class MisskeyNoteExtractor(MisskeyExtractor): """Extractor for images from a Note""" subcategory = "note" - pattern = rf"{BASE_PATTERN}/notes/(\w+)" + pattern = BASE_PATTERN + r"/notes/(\w+)" example = "https://misskey.io/notes/98765" def notes(self): @@ -191,7 +191,7 @@ class MisskeyNoteExtractor(MisskeyExtractor): class MisskeyFavoriteExtractor(MisskeyExtractor): """Extractor for favorited notes""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/(?:my|api/i)/favorites" + pattern = BASE_PATTERN + r"/(?:my|api/i)/favorites" example = "https://misskey.io/my/favorites" def notes(self): diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index 23f8fd9..083c421 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -92,7 +92,7 @@ class MoebooruTagExtractor(MoebooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/post\?(?:[^&#]*&)*tags=([^&#]*)" + pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]*)" example = "https://yande.re/post?tags=TAG" def __init__(self, match): @@ -104,14 +104,14 @@ class MoebooruTagExtractor(MoebooruExtractor): def posts(self): params = {"tags": self.tags} - return self._pagination(f"{self.root}/post.json", params) + return self._pagination(self.root + "/post.json", params) class MoebooruPoolExtractor(MoebooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool}") archive_fmt = "p_{pool}_{id}" - pattern = rf"{BASE_PATTERN}/pool/show/(\d+)" + pattern = BASE_PATTERN + r"/pool/show/(\d+)" example = "https://yande.re/pool/show/12345" def __init__(self, match): @@ -129,26 +129,26 @@ class MoebooruPoolExtractor(MoebooruExtractor): def posts(self): params = {"tags": "pool:" + self.pool_id} - return self._pagination(f"{self.root}/post.json", params) + return self._pagination(self.root + "/post.json", params) class MoebooruPostExtractor(MoebooruExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/post/show/(\d+)" + pattern = BASE_PATTERN + r"/post/show/(\d+)" example = "https://yande.re/post/show/12345" def posts(self): params = {"tags": "id:" + self.groups[-1]} - return self.request_json(f"{self.root}/post.json", params=params) + return self.request_json(self.root + "/post.json", params=params) class MoebooruPopularExtractor(MoebooruExtractor): subcategory = "popular" directory_fmt = ("{category}", "popular", "{scale}", "{date}") archive_fmt = "P_{scale[0]}_{date}_{id}" - pattern = (rf"{BASE_PATTERN}" - rf"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?") + pattern = BASE_PATTERN + \ + r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?" example = "https://yande.re/post/popular_by_month?year=YYYY&month=MM" def __init__(self, match): diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py index c20f138..7d5d319 100644 --- a/gallery_dl/extractor/motherless.py +++ b/gallery_dl/extractor/motherless.py @@ -93,8 +93,8 @@ class MotherlessExtractor(Extractor): title = self._extract_group_title(page, gid) return { - f"{category}_id": gid, - f"{category}_title": title, + category + "_id": gid, + category + "_title": title, "uploader": text.remove_html(extr( f'class="{category}-member-username">', "</")), "count": text.parse_int( @@ -152,9 +152,9 @@ class MotherlessExtractor(Extractor): class MotherlessMediaExtractor(MotherlessExtractor): """Extractor for a single image/video from motherless.com""" subcategory = "media" - pattern = (rf"{BASE_PATTERN}/(" - rf"(?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?" - rf"(?!G)[A-Z0-9]+)") + pattern = (BASE_PATTERN + + r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?" + r"(?!G)[A-Z0-9]+)") example = "https://motherless.com/ABC123" def items(self): @@ -170,7 +170,7 @@ class MotherlessGalleryExtractor(MotherlessExtractor): directory_fmt = ("{category}", "{uploader}", "{gallery_id} {gallery_title}") archive_fmt = "{gallery_id}_{id}" - pattern = rf"{BASE_PATTERN}/G([IVG])?([A-Z0-9]+)/?$" + pattern = BASE_PATTERN + "/G([IVG])?([A-Z0-9]+)/?$" example = "https://motherless.com/GABC123" def items(self): @@ -206,7 +206,7 @@ class MotherlessGroupExtractor(MotherlessExtractor): directory_fmt = ("{category}", "{uploader}", "{group_id} {group_title}") archive_fmt = "{group_id}_{id}" - pattern = rf"{BASE_PATTERN}/g([iv]?)/?([a-z0-9_]+)/?$" + pattern = BASE_PATTERN + "/g([iv]?)/?([a-z0-9_]+)/?$" example = "https://motherless.com/g/abc123" def items(self): diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 936f857..b94a73f 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -20,7 +20,7 @@ class MyhentaigalleryBase(): class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor): """Extractor for image galleries from myhentaigallery.com""" directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") - pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)" + pattern = BASE_PATTERN + r"/g(?:allery/(?:thumbnails|show))?/(\d+)" example = "https://myhentaigallery.com/g/12345" def __init__(self, match): @@ -62,7 +62,7 @@ class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor): class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor): """Extractor for myhentaigallery tag searches""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)" + pattern = BASE_PATTERN + r"(/g/(artist|category|group|parody)/(\d+).*)" example = "https://myhentaigallery.com/g/category/123" def items(self): diff --git a/gallery_dl/extractor/naverblog.py b/gallery_dl/extractor/naverblog.py index cc96e09..24707f7 100644 --- a/gallery_dl/extractor/naverblog.py +++ b/gallery_dl/extractor/naverblog.py @@ -142,7 +142,7 @@ class NaverBlogBlogExtractor(NaverBlogBase, Extractor): ) # setup params for API calls - url = f"{self.root}/PostViewBottomTitleListAsync.nhn" + url = self.root + "/PostViewBottomTitleListAsync.nhn" params = { "blogId" : self.blog_id, "logNo" : post_num or "0", diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py index 72089d0..3211941 100644 --- a/gallery_dl/extractor/naverwebtoon.py +++ b/gallery_dl/extractor/naverwebtoon.py @@ -27,7 +27,7 @@ class NaverWebtoonEpisodeExtractor(NaverWebtoonBase, GalleryExtractor): directory_fmt = ("{category}", "{comic}") filename_fmt = "{episode:>03}-{num:>02}.{extension}" archive_fmt = "{title_id}_{episode}_{num}" - pattern = rf"{BASE_PATTERN}/detail(?:\.nhn)?\?([^#]+)" + pattern = BASE_PATTERN + r"/detail(?:\.nhn)?\?([^#]+)" example = "https://comic.naver.com/webtoon/detail?titleId=12345&no=1" def __init__(self, match): @@ -66,7 +66,7 @@ class NaverWebtoonEpisodeExtractor(NaverWebtoonBase, GalleryExtractor): class NaverWebtoonComicExtractor(NaverWebtoonBase, Extractor): subcategory = "comic" categorytransfer = True - pattern = rf"{BASE_PATTERN}/list(?:\.nhn)?\?([^#]+)" + pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)" example = "https://comic.naver.com/webtoon/list?titleId=12345" def __init__(self, match): diff --git a/gallery_dl/extractor/nekohouse.py b/gallery_dl/extractor/nekohouse.py index 728912b..8de2951 100644 --- a/gallery_dl/extractor/nekohouse.py +++ b/gallery_dl/extractor/nekohouse.py @@ -12,7 +12,7 @@ from .common import Extractor, Message from .. import text BASE_PATTERN = r"(?:https?://)?nekohouse\.su" -USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)/user/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)" class NekohouseExtractor(Extractor): @@ -27,7 +27,7 @@ class NekohousePostExtractor(NekohouseExtractor): "{post_id} {date} {title[b:230]}") filename_fmt = "{num:>02} {id|filename}.{extension}" archive_fmt = "{service}_{user_id}_{post_id}_{hash}" - pattern = rf"{USER_PATTERN}/post/([^/?#]+)" + pattern = USER_PATTERN + r"/post/([^/?#]+)" example = "https://nekohouse.su/SERVICE/user/12345/post/12345" def items(self): @@ -98,7 +98,7 @@ class NekohousePostExtractor(NekohouseExtractor): class NekohouseUserExtractor(NekohouseExtractor): subcategory = "user" - pattern = rf"{USER_PATTERN}/?(?:\?([^#]+))?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)" example = "https://nekohouse.su/SERVICE/user/12345" def items(self): diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index f980f4b..f1a07b3 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -412,7 +412,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor): class NewgroundsMediaExtractor(NewgroundsExtractor): """Extractor for a media file from newgrounds.com""" subcategory = "media" - pattern = rf"{BASE_PATTERN}(/(?:portal/view|audio/listen)/\d+)" + pattern = BASE_PATTERN + r"(/(?:portal/view|audio/listen)/\d+)" example = "https://www.newgrounds.com/portal/view/12345" def __init__(self, match): @@ -427,34 +427,34 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): class NewgroundsArtExtractor(NewgroundsExtractor): """Extractor for all images of a newgrounds user""" subcategory = _path = "art" - pattern = rf"{USER_PATTERN}/art(?:(?:/page/|/?\?page=)(\d+))?/?$" + pattern = USER_PATTERN + r"/art(?:(?:/page/|/?\?page=)(\d+))?/?$" example = "https://USER.newgrounds.com/art" class NewgroundsAudioExtractor(NewgroundsExtractor): """Extractor for all audio submissions of a newgrounds user""" subcategory = _path = "audio" - pattern = rf"{USER_PATTERN}/audio(?:(?:/page/|/?\?page=)(\d+))?/?$" + pattern = USER_PATTERN + r"/audio(?:(?:/page/|/?\?page=)(\d+))?/?$" example = "https://USER.newgrounds.com/audio" class NewgroundsMoviesExtractor(NewgroundsExtractor): """Extractor for all movies of a newgrounds user""" subcategory = _path = "movies" - pattern = rf"{USER_PATTERN}/movies(?:(?:/page/|/?\?page=)(\d+))?/?$" + pattern = USER_PATTERN + r"/movies(?:(?:/page/|/?\?page=)(\d+))?/?$" example = "https://USER.newgrounds.com/movies" class NewgroundsGamesExtractor(NewgroundsExtractor): """Extractor for a newgrounds user's games""" subcategory = _path = "games" - pattern = rf"{USER_PATTERN}/games(?:(?:/page/|/?\?page=)(\d+))?/?$" + pattern = USER_PATTERN + r"/games(?:(?:/page/|/?\?page=)(\d+))?/?$" example = "https://USER.newgrounds.com/games" class NewgroundsUserExtractor(Dispatch, NewgroundsExtractor): """Extractor for a newgrounds user profile""" - pattern = rf"{USER_PATTERN}/?$" + pattern = USER_PATTERN + r"/?$" example = "https://USER.newgrounds.com" def items(self): @@ -471,7 +471,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): """Extractor for posts favorited by a newgrounds user""" subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") - pattern = (rf"{USER_PATTERN}/favorites(?!/following)(?:/(art|audio|movies)" + pattern = (USER_PATTERN + r"/favorites(?!/following)(?:/(art|audio|movies)" r"(?:(?:/page/|/?\?page=)(\d+))?)?") example = "https://USER.newgrounds.com/favorites" @@ -510,16 +510,15 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): def _extract_favorites(self, page): return [ self.root + path - for path in text.extract_iter(page, f'href="{self.root}', '"') + for path in text.extract_iter(page, 'href="' + self.root, '"') ] class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): """Extractor for a newgrounds user's favorited users""" subcategory = "following" - pattern = (rf"{USER_PATTERN}/favorites/(following)" + pattern = (USER_PATTERN + r"/favorites/(following)" r"(?:(?:/page/|/?\?page=)(\d+))?") - example = "https://USER.newgrounds.com/favorites/following" def items(self): @@ -539,7 +538,7 @@ class NewgroundsSearchExtractor(NewgroundsExtractor): """Extractor for newgrounds.com search reesults""" subcategory = "search" directory_fmt = ("{category}", "search", "{search_tags}") - pattern = rf"{BASE_PATTERN}/search/conduct/([^/?#]+)/?\?([^#]+)" + pattern = BASE_PATTERN + r"/search/conduct/([^/?#]+)/?\?([^#]+)" example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY" def __init__(self, match): diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index a6b01c2..181d54d 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -140,7 +140,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - url = f"{self.root}/login_int.php" + url = self.root + "/login_int.php" data = {"email": username, "password": password, "save": "on"} response = self.request(url, method="POST", data=data) @@ -178,7 +178,7 @@ BASE_PATTERN = NijieExtractor.update({ class NijieUserExtractor(Dispatch, NijieExtractor): """Extractor for nijie user profiles""" - pattern = rf"{BASE_PATTERN}/members\.php\?id=(\d+)" + pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)" example = "https://nijie.info/members.php?id=12345" def items(self): @@ -194,7 +194,7 @@ class NijieUserExtractor(Dispatch, NijieExtractor): class NijieIllustrationExtractor(NijieExtractor): """Extractor for all illustrations of a nijie-user""" subcategory = "illustration" - pattern = rf"{BASE_PATTERN}/members_illust\.php\?id=(\d+)" + pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)" example = "https://nijie.info/members_illust.php?id=12345" def image_ids(self): @@ -204,7 +204,7 @@ class NijieIllustrationExtractor(NijieExtractor): class NijieDoujinExtractor(NijieExtractor): """Extractor for doujin entries of a nijie user""" subcategory = "doujin" - pattern = rf"{BASE_PATTERN}/members_dojin\.php\?id=(\d+)" + pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)" example = "https://nijie.info/members_dojin.php?id=12345" def image_ids(self): @@ -216,7 +216,7 @@ class NijieFavoriteExtractor(NijieExtractor): subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_id}") archive_fmt = "f_{user_id}_{image_id}_{num}" - pattern = rf"{BASE_PATTERN}/user_like_illust_view\.php\?id=(\d+)" + pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)" example = "https://nijie.info/user_like_illust_view.php?id=12345" def image_ids(self): @@ -234,7 +234,7 @@ class NijieNuitaExtractor(NijieExtractor): subcategory = "nuita" directory_fmt = ("{category}", "nuita", "{user_id}") archive_fmt = "n_{user_id}_{image_id}_{num}" - pattern = rf"{BASE_PATTERN}/history_nuita\.php\?id=(\d+)" + pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)" example = "https://nijie.info/history_nuita.php?id=12345" def image_ids(self): @@ -253,7 +253,7 @@ class NijieNuitaExtractor(NijieExtractor): class NijieFeedExtractor(NijieExtractor): """Extractor for nijie liked user feed""" subcategory = "feed" - pattern = rf"{BASE_PATTERN}/like_user_view\.php" + pattern = BASE_PATTERN + r"/like_user_view\.php" example = "https://nijie.info/like_user_view.php" def image_ids(self): @@ -266,7 +266,7 @@ class NijieFeedExtractor(NijieExtractor): class NijieFollowedExtractor(NijieExtractor): """Extractor for followed nijie users""" subcategory = "followed" - pattern = rf"{BASE_PATTERN}/like_my\.php" + pattern = BASE_PATTERN + r"/like_my\.php" example = "https://nijie.info/like_my.php" def items(self): @@ -292,7 +292,7 @@ class NijieFollowedExtractor(NijieExtractor): class NijieImageExtractor(NijieExtractor): """Extractor for a nijie work/image""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/view(?:_popup)?\.php\?id=(\d+)" + pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)" example = "https://nijie.info/view.php?id=12345" def image_ids(self): diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py index 321883c..cf463b2 100644 --- a/gallery_dl/extractor/nitter.py +++ b/gallery_dl/extractor/nitter.py @@ -229,12 +229,12 @@ class NitterExtractor(BaseExtractor): BASE_PATTERN = NitterExtractor.update({ }) -USER_PATTERN = rf"{BASE_PATTERN}/(i(?:/user/|d:)(\d+)|[^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)" class NitterTweetsExtractor(NitterExtractor): subcategory = "tweets" - pattern = rf"{USER_PATTERN}(?:/tweets)?(?:$|\?|#)" + pattern = USER_PATTERN + r"(?:/tweets)?(?:$|\?|#)" example = "https://nitter.net/USER" def tweets(self): @@ -243,7 +243,7 @@ class NitterTweetsExtractor(NitterExtractor): class NitterRepliesExtractor(NitterExtractor): subcategory = "replies" - pattern = rf"{USER_PATTERN}/with_replies" + pattern = USER_PATTERN + r"/with_replies" example = "https://nitter.net/USER/with_replies" def tweets(self): @@ -252,7 +252,7 @@ class NitterRepliesExtractor(NitterExtractor): class NitterMediaExtractor(NitterExtractor): subcategory = "media" - pattern = rf"{USER_PATTERN}/media" + pattern = USER_PATTERN + r"/media" example = "https://nitter.net/USER/media" def tweets(self): @@ -261,7 +261,7 @@ class NitterMediaExtractor(NitterExtractor): class NitterSearchExtractor(NitterExtractor): subcategory = "search" - pattern = rf"{USER_PATTERN}/search" + pattern = USER_PATTERN + r"/search" example = "https://nitter.net/USER/search" def tweets(self): @@ -274,7 +274,7 @@ class NitterTweetExtractor(NitterExtractor): directory_fmt = ("{category}", "{user[name]}") filename_fmt = "{tweet_id}_{num}.{extension}" archive_fmt = "{tweet_id}_{num}" - pattern = rf"{BASE_PATTERN}/(i/web|[^/?#]+)/status/(\d+())" + pattern = BASE_PATTERN + r"/(i/web|[^/?#]+)/status/(\d+())" example = "https://nitter.net/USER/status/12345" def tweets(self): diff --git a/gallery_dl/extractor/nudostar.py b/gallery_dl/extractor/nudostar.py index 2eb4340..d0128c0 100644 --- a/gallery_dl/extractor/nudostar.py +++ b/gallery_dl/extractor/nudostar.py @@ -21,7 +21,7 @@ class NudostarExtractor(GalleryExtractor): class NudostarModelExtractor(NudostarExtractor): """Extractor for NudoStar models""" subcategory = "model" - pattern = rf"{BASE_PATTERN}(/models/([^/?#]+)/?)$" + pattern = BASE_PATTERN + r"(/models/([^/?#]+)/?)$" example = "https://nudostar.tv/models/MODEL/" def metadata(self, page): @@ -53,7 +53,7 @@ class NudostarModelExtractor(NudostarExtractor): class NudostarImageExtractor(NudostarExtractor): """Extractor for NudoStar images""" subcategory = "image" - pattern = rf"{BASE_PATTERN}(/models/([^/?#]+)/(\d+)/)" + pattern = BASE_PATTERN + r"(/models/([^/?#]+)/(\d+)/)" example = "https://nudostar.tv/models/MODEL/123/" def items(self): diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index d56331f..be22dea 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -97,7 +97,7 @@ class PahealTagExtractor(PahealExtractor): while True: try: - page = self.request(f"{base}{pnum}").text + page = self.request(base + str(pnum)).text except exception.HttpError as exc: if exc.status == 404: return diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 12dfd48..cdfda0f 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -300,8 +300,8 @@ class PatreonExtractor(Extractor): order = "-published_at" elif order in {"a", "asc", "r", "reverse"}: order = "published_at" - return f"&sort={order}" - return f"&sort={sort}" if sort else "" + return "&sort=" + order + return "&sort=" + sort if sort else "" def _build_file_generators(self, filetypes): if filetypes is None: @@ -382,8 +382,8 @@ class PatreonCollectionExtractor(PatreonExtractor): elif order in {"d", "desc", "r", "reverse"}: # "-collection_order" results in a '400 Bad Request' error order = "-published_at" - return f"&sort={order}" - return f"&sort={sort}" if sort else "" + return "&sort=" + order + return "&sort=" + sort if sort else "" class PatreonCreatorExtractor(PatreonExtractor): diff --git a/gallery_dl/extractor/pexels.py b/gallery_dl/extractor/pexels.py index 9e2f40c..5f4ebc7 100644 --- a/gallery_dl/extractor/pexels.py +++ b/gallery_dl/extractor/pexels.py @@ -62,7 +62,7 @@ class PexelsCollectionExtractor(PexelsExtractor): """Extractor for a pexels.com collection""" subcategory = "collection" directory_fmt = ("{category}", "Collections", "{collection}") - pattern = rf"{BASE_PATTERN}/collections/((?:[^/?#]*-)?(\w+))" + pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))" example = "https://www.pexels.com/collections/SLUG-a1b2c3/" def metadata(self): @@ -77,7 +77,7 @@ class PexelsSearchExtractor(PexelsExtractor): """Extractor for pexels.com search results""" subcategory = "search" directory_fmt = ("{category}", "Searches", "{search_tags}") - pattern = rf"{BASE_PATTERN}/search/([^/?#]+)" + pattern = BASE_PATTERN + r"/search/([^/?#]+)" example = "https://www.pexels.com/search/QUERY/" def metadata(self): @@ -91,7 +91,7 @@ class PexelsUserExtractor(PexelsExtractor): """Extractor for pexels.com user galleries""" subcategory = "user" directory_fmt = ("{category}", "@{user[slug]}") - pattern = rf"{BASE_PATTERN}/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))" + pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))" example = "https://www.pexels.com/@USER-12345/" def posts(self): @@ -100,7 +100,7 @@ class PexelsUserExtractor(PexelsExtractor): class PexelsImageExtractor(PexelsExtractor): subcategory = "image" - pattern = rf"{BASE_PATTERN}/photo/((?:[^/?#]*-)?\d+)" + pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)" example = "https://www.pexels.com/photo/SLUG-12345/" def posts(self): diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index 3634c66..2f52b9a 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -61,7 +61,7 @@ BASE_PATTERN = PhilomenaExtractor.update({ class PhilomenaPostExtractor(PhilomenaExtractor): """Extractor for single posts on a Philomena booru""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(?:images/)?(\d+)" + pattern = BASE_PATTERN + r"/(?:images/)?(\d+)" example = "https://derpibooru.org/images/12345" def posts(self): @@ -72,7 +72,7 @@ class PhilomenaSearchExtractor(PhilomenaExtractor): """Extractor for Philomena search results""" subcategory = "search" directory_fmt = ("{category}", "{search_tags}") - pattern = rf"{BASE_PATTERN}/(?:search/?\?([^#]+)|tags/([^/?#]+))" + pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))" example = "https://derpibooru.org/search?q=QUERY" def __init__(self, match): @@ -106,7 +106,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor): subcategory = "gallery" directory_fmt = ("{category}", "galleries", "{gallery[id]} {gallery[title]}") - pattern = rf"{BASE_PATTERN}/galleries/(\d+)" + pattern = BASE_PATTERN + r"/galleries/(\d+)" example = "https://derpibooru.org/galleries/12345" def metadata(self): @@ -116,7 +116,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor): raise exception.NotFoundError("gallery") def posts(self): - gallery_id = f"gallery_id:{self.groups[-1]}" + gallery_id = "gallery_id:" + self.groups[-1] params = {"sd": "desc", "sf": gallery_id, "q": gallery_id} return self.api.search(params) diff --git a/gallery_dl/extractor/photovogue.py b/gallery_dl/extractor/photovogue.py index cb16b23..8559a10 100644 --- a/gallery_dl/extractor/photovogue.py +++ b/gallery_dl/extractor/photovogue.py @@ -18,7 +18,7 @@ class PhotovogueUserExtractor(Extractor): directory_fmt = ("{category}", "{photographer[id]} {photographer[name]}") filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/photographers/(\d+)" + pattern = BASE_PATTERN + r"/photographers/(\d+)" example = "https://www.vogue.com/photovogue/photographers/12345" def __init__(self, match): diff --git a/gallery_dl/extractor/pictoa.py b/gallery_dl/extractor/pictoa.py index 0dfe304..e358541 100644 --- a/gallery_dl/extractor/pictoa.py +++ b/gallery_dl/extractor/pictoa.py @@ -24,7 +24,7 @@ class PictoaExtractor(Extractor): class PictoaImageExtractor(PictoaExtractor): """Extractor for single images from pictoa.com""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/albums/(?:[\w-]+-)?(\d+)/(\d+)" + pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+)/(\d+)" example = "https://www.pictoa.com/albums/NAME-12345/12345.html" def items(self): @@ -50,7 +50,7 @@ class PictoaImageExtractor(PictoaExtractor): class PictoaAlbumExtractor(PictoaExtractor): """Extractor for image albums from pictoa.com""" subcategory = "album" - pattern = rf"{BASE_PATTERN}/albums/(?:[\w-]+-)?(\d+).html" + pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+).html" example = "https://www.pictoa.com/albums/NAME-12345.html" def items(self): diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 6661e7d..ccb6aa6 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -66,7 +66,7 @@ class PiczelExtractor(Extractor): class PiczelUserExtractor(PiczelExtractor): """Extractor for all images from a user's gallery""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$" example = "https://piczel.tv/gallery/USER" def posts(self): @@ -79,7 +79,7 @@ class PiczelFolderExtractor(PiczelExtractor): subcategory = "folder" directory_fmt = ("{category}", "{user[username]}", "{folder[name]}") archive_fmt = "f{folder[id]}_{id}_{num}" - pattern = rf"{BASE_PATTERN}/gallery/(?!image/)[^/?#]+/(\d+)" + pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)" example = "https://piczel.tv/gallery/USER/12345" def posts(self): @@ -90,7 +90,7 @@ class PiczelFolderExtractor(PiczelExtractor): class PiczelImageExtractor(PiczelExtractor): """Extractor for individual images""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/gallery/image/(\d+)" + pattern = BASE_PATTERN + r"/gallery/image/(\d+)" example = "https://piczel.tv/gallery/image/12345" def posts(self): diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py index 0b750fe..570ed85 100644 --- a/gallery_dl/extractor/pillowfort.py +++ b/gallery_dl/extractor/pillowfort.py @@ -119,7 +119,7 @@ class PillowfortExtractor(Extractor): class PillowfortPostExtractor(PillowfortExtractor): """Extractor for a single pillowfort post""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/posts/(\d+)" + pattern = BASE_PATTERN + r"/posts/(\d+)" example = "https://www.pillowfort.social/posts/12345" def posts(self): @@ -130,7 +130,7 @@ class PillowfortPostExtractor(PillowfortExtractor): class PillowfortUserExtractor(PillowfortExtractor): """Extractor for all posts of a pillowfort user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?!posts/)([^/?#]+(?:/tagged/[^/?#]+)?)" + pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+(?:/tagged/[^/?#]+)?)" example = "https://www.pillowfort.social/USER" def posts(self): diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 7aa32ec..cd3d077 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -207,7 +207,7 @@ class PinterestExtractor(Extractor): class PinterestUserExtractor(PinterestExtractor): """Extractor for a user's boards""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)(?:/_saved)?/?$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)(?:/_saved)?/?$" example = "https://www.pinterest.com/USER/" def __init__(self, match): @@ -225,7 +225,7 @@ class PinterestAllpinsExtractor(PinterestExtractor): """Extractor for a user's 'All Pins' feed""" subcategory = "allpins" directory_fmt = ("{category}", "{user}") - pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/pins/?$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/pins/?$" example = "https://www.pinterest.com/USER/pins/" def __init__(self, match): @@ -243,7 +243,7 @@ class PinterestCreatedExtractor(PinterestExtractor): """Extractor for a user's created pins""" subcategory = "created" directory_fmt = ("{category}", "{user}") - pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/_created/?$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/_created/?$" example = "https://www.pinterest.com/USER/_created/" def __init__(self, match): @@ -263,7 +263,7 @@ class PinterestSectionExtractor(PinterestExtractor): directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}", "{section[title]}") archive_fmt = "{board[id]}_{id}" - pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)" example = "https://www.pinterest.com/USER/BOARD/SECTION" def __init__(self, match): @@ -291,7 +291,7 @@ class PinterestSearchExtractor(PinterestExtractor): """Extractor for Pinterest search results""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = rf"{BASE_PATTERN}/search/pins/?\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)" example = "https://www.pinterest.com/search/pins/?q=QUERY" def __init__(self, match): @@ -308,7 +308,7 @@ class PinterestSearchExtractor(PinterestExtractor): class PinterestPinExtractor(PinterestExtractor): """Extractor for images from a single pin from pinterest.com""" subcategory = "pin" - pattern = rf"{BASE_PATTERN}/pin/([^/?#]+)(?!.*#related$)" + pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)" example = "https://www.pinterest.com/pin/12345/" def __init__(self, match): @@ -329,7 +329,7 @@ class PinterestBoardExtractor(PinterestExtractor): subcategory = "board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") archive_fmt = "{board[id]}_{id}" - pattern = (rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)" + pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)" r"/([^/?#]+)/?(?!.*#related$)") example = "https://www.pinterest.com/USER/BOARD/" @@ -361,7 +361,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor): """Extractor for related pins of another pin from pinterest.com""" subcategory = "related-pin" directory_fmt = ("{category}", "related {original_pin[id]}") - pattern = rf"{BASE_PATTERN}/pin/([^/?#]+).*#related$" + pattern = BASE_PATTERN + r"/pin/([^/?#]+).*#related$" example = "https://www.pinterest.com/pin/12345/#related" def metadata(self): @@ -376,7 +376,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor): subcategory = "related-board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}", "related") - pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$" example = "https://www.pinterest.com/USER/BOARD/#related" def pins(self): diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py index 1486976..f299db9 100644 --- a/gallery_dl/extractor/pixeldrain.py +++ b/gallery_dl/extractor/pixeldrain.py @@ -29,7 +29,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor): """Extractor for pixeldrain files""" subcategory = "file" filename_fmt = "{filename[:230]} ({id}).{extension}" - pattern = rf"{BASE_PATTERN}/(?:u|api/file)/(\w+)" + pattern = BASE_PATTERN + r"/(?:u|api/file)/(\w+)" example = "https://pixeldrain.com/u/abcdefgh" def __init__(self, match): @@ -54,7 +54,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor): directory_fmt = ("{category}", "{album[date]:%Y-%m-%d} {album[title]} ({album[id]})") filename_fmt = "{num:>03} {filename[:230]} ({id}).{extension}" - pattern = rf"{BASE_PATTERN}/(?:l|api/list)/(\w+)(?:#item=(\d+))?" + pattern = BASE_PATTERN + r"/(?:l|api/list)/(\w+)(?:#item=(\d+))?" example = "https://pixeldrain.com/l/abcdefgh" def __init__(self, match): @@ -97,7 +97,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor): subcategory = "folder" filename_fmt = "{filename[:230]}.{extension}" archive_fmt = "{path}_{num}" - pattern = rf"{BASE_PATTERN}/(?:d|api/filesystem)/([^?]+)" + pattern = BASE_PATTERN + r"/(?:d|api/filesystem)/([^?]+)" example = "https://pixeldrain.com/d/abcdefgh" def metadata(self, data): diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index eb1a7f2..7d6edfa 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -15,7 +15,7 @@ import itertools import hashlib BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net" -USER_PATTERN = rf"{BASE_PATTERN}/(?:en/)?users/(\d+)" +USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)" class PixivExtractor(Extractor): @@ -92,10 +92,11 @@ class PixivExtractor(Extractor): work["caption"] = self._sanitize_ajax_caption( body["illustComment"]) - if transform_tags: + if transform_tags is not None: transform_tags(work) work["num"] = 0 work["date"] = dt.parse_iso(work["create_date"]) + work["count"] = len(files) work["rating"] = ratings.get(work["x_restrict"]) work["suffix"] = "" work.update(metadata) @@ -393,12 +394,20 @@ class PixivExtractor(Extractor): class PixivUserExtractor(Dispatch, PixivExtractor): """Extractor for a pixiv user profile""" - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") example = "https://www.pixiv.net/en/users/12345" def items(self): + if (inc := self.config("include")) and ( + "sketch" in inc or inc == "all"): + Extractor.initialize(self) + user = PixivAppAPI(self).user_detail(self.groups[0]) + sketch = "https://sketch.pixiv.net/@" + user["user"]["account"] + else: + sketch = "" + base = f"{self.root}/users/{self.groups[0]}/" return self._dispatch_extractors(( (PixivAvatarExtractor , base + "avatar"), @@ -407,6 +416,7 @@ class PixivUserExtractor(Dispatch, PixivExtractor): (PixivFavoriteExtractor , base + "bookmarks/artworks"), (PixivNovelBookmarkExtractor, base + "bookmarks/novels"), (PixivNovelUserExtractor , base + "novels"), + (PixivSketchExtractor , sketch), ), ("artworks",), ( ("bookmark", "novel-bookmark"), ("user" , "novel-user"), @@ -416,7 +426,7 @@ class PixivUserExtractor(Dispatch, PixivExtractor): class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") @@ -505,7 +515,7 @@ class PixivAvatarExtractor(PixivExtractor): subcategory = "avatar" filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "avatar_{user[id]}_{date}" - pattern = rf"{USER_PATTERN}/avatar" + pattern = USER_PATTERN + r"/avatar" example = "https://www.pixiv.net/en/users/12345/avatar" def _init(self): @@ -523,7 +533,7 @@ class PixivBackgroundExtractor(PixivExtractor): subcategory = "background" filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" - pattern = rf"{USER_PATTERN}/background" + pattern = USER_PATTERN + "/background" example = "https://www.pixiv.net/en/users/12345/background" def _init(self): @@ -585,7 +595,7 @@ class PixivWorkExtractor(PixivExtractor): class PixivUnlistedExtractor(PixivExtractor): """Extractor for a unlisted pixiv illustrations""" subcategory = "unlisted" - pattern = rf"{BASE_PATTERN}/(?:en/)?artworks/unlisted/(\w+)" + pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)" example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0" def _extract_files(self, work): @@ -604,7 +614,7 @@ class PixivFavoriteExtractor(PixivExtractor): directory_fmt = ("{category}", "bookmarks", "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" - pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?" + pattern = (BASE_PATTERN + r"/(?:(?:en/)?" r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?" r"|bookmark\.php)(?:\?([^#]*))?") example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks" @@ -657,7 +667,7 @@ class PixivFavoriteExtractor(PixivExtractor): for preview in self.api.user_following(self.user_id, restrict): user = preview["user"] user["_extractor"] = PixivUserExtractor - url = f"https://www.pixiv.net/users/{user['id']}" + url = "https://www.pixiv.net/users/" + str(user["id"]) yield Message.Queue, url, user @@ -667,7 +677,7 @@ class PixivRankingExtractor(PixivExtractor): archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}" directory_fmt = ("{category}", "rankings", "{ranking[mode]}", "{ranking[date]}") - pattern = rf"{BASE_PATTERN}/ranking\.php(?:\?([^#]*))?" + pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?" example = "https://www.pixiv.net/ranking.php" def __init__(self, match): @@ -736,7 +746,7 @@ class PixivSearchExtractor(PixivExtractor): subcategory = "search" archive_fmt = "s_{search[word]}_{id}{num}.{extension}" directory_fmt = ("{category}", "search", "{search[word]}") - pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" + pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") example = "https://www.pixiv.net/en/tags/TAG" @@ -802,7 +812,7 @@ class PixivFollowExtractor(PixivExtractor): subcategory = "follow" archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}" directory_fmt = ("{category}", "following") - pattern = rf"{BASE_PATTERN}/bookmark_new_illust\.php" + pattern = BASE_PATTERN + r"/bookmark_new_illust\.php" example = "https://www.pixiv.net/bookmark_new_illust.php" def works(self): @@ -851,7 +861,7 @@ class PixivSeriesExtractor(PixivExtractor): directory_fmt = ("{category}", "{user[id]} {user[account]}", "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" - pattern = rf"{BASE_PATTERN}/user/(\d+)/series/(\d+)" + pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" example = "https://www.pixiv.net/user/12345/series/12345" def __init__(self, match): @@ -1042,7 +1052,7 @@ class PixivNovelExtractor(PixivExtractor): class PixivNovelNovelExtractor(PixivNovelExtractor): """Extractor for pixiv novels""" subcategory = "novel" - pattern = rf"{BASE_PATTERN}/n(?:ovel/show\.php\?id=|/)(\d+)" + pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" example = "https://www.pixiv.net/novel/show.php?id=12345" def novels(self): @@ -1056,7 +1066,7 @@ class PixivNovelNovelExtractor(PixivNovelExtractor): class PixivNovelUserExtractor(PixivNovelExtractor): """Extractor for pixiv users' novels""" subcategory = "user" - pattern = rf"{USER_PATTERN}/novels" + pattern = USER_PATTERN + r"/novels" example = "https://www.pixiv.net/en/users/12345/novels" def novels(self): @@ -1066,7 +1076,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor): class PixivNovelSeriesExtractor(PixivNovelExtractor): """Extractor for pixiv novel series""" subcategory = "series" - pattern = rf"{BASE_PATTERN}/novel/series/(\d+)" + pattern = BASE_PATTERN + r"/novel/series/(\d+)" example = "https://www.pixiv.net/novel/series/12345" def novels(self): @@ -1076,7 +1086,7 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor): class PixivNovelBookmarkExtractor(PixivNovelExtractor): """Extractor for bookmarked pixiv novels""" subcategory = "bookmark" - pattern = (rf"{USER_PATTERN}/bookmarks/novels" + pattern = (USER_PATTERN + r"/bookmarks/novels" r"(?:/([^/?#]+))?(?:/?\?([^#]+))?") example = "https://www.pixiv.net/en/users/12345/bookmarks/novels" @@ -1302,7 +1312,7 @@ class PixivAppAPI(): msg = (f"'{msg}'" if (msg := error.get("user_message")) else f"'{msg}'" if (msg := error.get("message")) else error) - raise exception.AbortExtraction(f"API request failed: {msg}") + raise exception.AbortExtraction("API request failed: " + msg) def _pagination(self, endpoint, params, key_items="illusts", key_data=None, key_user=None): diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py index 2feab95..68f546b 100644 --- a/gallery_dl/extractor/pixnet.py +++ b/gallery_dl/extractor/pixnet.py @@ -65,7 +65,7 @@ class PixnetImageExtractor(PixnetExtractor): subcategory = "image" filename_fmt = "{id}.{extension}" directory_fmt = ("{category}", "{blog}") - pattern = rf"{BASE_PATTERN}/album/photo/(\d+)" + pattern = BASE_PATTERN + r"/album/photo/(\d+)" example = "https://USER.pixnet.net/album/photo/12345" def items(self): @@ -92,7 +92,7 @@ class PixnetSetExtractor(PixnetExtractor): subcategory = "set" directory_fmt = ("{category}", "{blog}", "{folder_id} {folder_title}", "{set_id} {set_title}") - pattern = rf"{BASE_PATTERN}/album/set/(\d+)" + pattern = BASE_PATTERN + r"/album/set/(\d+)" example = "https://USER.pixnet.net/album/set/12345" def items(self): @@ -137,7 +137,7 @@ class PixnetFolderExtractor(PixnetExtractor): """Extractor for all sets in a pixnet folder""" subcategory = "folder" url_fmt = "{}/album/folder/{}" - pattern = rf"{BASE_PATTERN}/album/folder/(\d+)" + pattern = BASE_PATTERN + r"/album/folder/(\d+)" example = "https://USER.pixnet.net/album/folder/12345" @@ -145,5 +145,5 @@ class PixnetUserExtractor(PixnetExtractor): """Extractor for all sets and folders of a pixnet user""" subcategory = "user" url_fmt = "{}{}/album/list" - pattern = rf"{BASE_PATTERN}()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])" + pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])" example = "https://USER.pixnet.net/" diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index c3aaaba..23f3b54 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -54,7 +54,7 @@ class PoipikuExtractor(Extractor): for post_url in self.posts(): if post_url[0] == "/": - post_url = f"{self.root}{post_url}" + post_url = self.root + post_url page = self.request(post_url).text extr = text.extract_from(page) parts = post_url.rsplit("/", 2) @@ -148,7 +148,7 @@ class PoipikuExtractor(Extractor): return files def _show_illust_detail(self, post): - url = f"{self.root}/f/ShowIllustDetailF.jsp" + url = self.root + "/f/ShowIllustDetailF.jsp" data = { "ID" : post["user_id"], "TD" : post["post_id"], @@ -160,7 +160,7 @@ class PoipikuExtractor(Extractor): interval=False) def _show_append_file(self, post): - url = f"{self.root}/f/ShowAppendFileF.jsp" + url = self.root + "/f/ShowAppendFileF.jsp" data = { "UID": post["user_id"], "IID": post["post_id"], @@ -176,14 +176,14 @@ class PoipikuExtractor(Extractor): class PoipikuUserExtractor(PoipikuExtractor): """Extractor for posts from a poipiku user""" subcategory = "user" - pattern = (rf"{BASE_PATTERN}/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?" - rf"(\d+)/?(?:$|[?&#])") + pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?" + r"(\d+)/?(?:$|[?&#])") example = "https://poipiku.com/12345/" def posts(self): pnum, user_id = self.groups - url = f"{self.root}/IllustListPcV.jsp" + url = self.root + "/IllustListPcV.jsp" params = { "PG" : text.parse_int(pnum, 0), "ID" : user_id, @@ -207,7 +207,7 @@ class PoipikuUserExtractor(PoipikuExtractor): class PoipikuPostExtractor(PoipikuExtractor): """Extractor for a poipiku post""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(\d+)/(\d+)" + pattern = BASE_PATTERN + r"/(\d+)/(\d+)" example = "https://poipiku.com/12345/12345.html" def posts(self): diff --git a/gallery_dl/extractor/poringa.py b/gallery_dl/extractor/poringa.py index 832bedf..94a493f 100644 --- a/gallery_dl/extractor/poringa.py +++ b/gallery_dl/extractor/poringa.py @@ -104,7 +104,7 @@ class PoringaExtractor(Extractor): class PoringaPostExtractor(PoringaExtractor): """Extractor for posts on poringa.net""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/posts/imagenes/(\d+)" + pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)" example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html" def posts(self): @@ -113,7 +113,7 @@ class PoringaPostExtractor(PoringaExtractor): class PoringaUserExtractor(PoringaExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/(\w+)$" + pattern = BASE_PATTERN + r"/(\w+)$" example = "http://www.poringa.net/USER" def posts(self): @@ -124,7 +124,7 @@ class PoringaUserExtractor(PoringaExtractor): class PoringaSearchExtractor(PoringaExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/buscar/\?&?q=([^&#]+)" + pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)" example = "http://www.poringa.net/buscar/?q=QUERY" def posts(self): diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index 5ced315..d4ee3bc 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -54,7 +54,7 @@ class PornhubGalleryExtractor(PornhubExtractor): directory_fmt = ("{category}", "{user}", "{gallery[id]} {gallery[title]}") filename_fmt = "{num:>03}_{id}.{extension}" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/album/(\d+)" + pattern = BASE_PATTERN + r"/album/(\d+)" example = "https://www.pornhub.com/album/12345" def __init__(self, match): @@ -134,7 +134,7 @@ class PornhubGifExtractor(PornhubExtractor): directory_fmt = ("{category}", "{user}", "gifs") filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/gif/(\d+)" + pattern = BASE_PATTERN + r"/gif/(\d+)" example = "https://www.pornhub.com/gif/12345" def __init__(self, match): @@ -163,7 +163,7 @@ class PornhubGifExtractor(PornhubExtractor): class PornhubUserExtractor(Dispatch, PornhubExtractor): """Extractor for a pornhub user""" - pattern = rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)/?$" + pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$" example = "https://www.pornhub.com/model/USER" def items(self): @@ -177,7 +177,7 @@ class PornhubUserExtractor(Dispatch, PornhubExtractor): class PornhubPhotosExtractor(PornhubExtractor): """Extractor for all galleries of a pornhub user""" subcategory = "photos" - pattern = (rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)" + pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)" "/(photos(?:/[^/?#]+)?)") example = "https://www.pornhub.com/model/USER/photos" @@ -198,7 +198,7 @@ class PornhubPhotosExtractor(PornhubExtractor): class PornhubGifsExtractor(PornhubExtractor): """Extractor for a pornhub user's gifs""" subcategory = "gifs" - pattern = (rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)" + pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)" "/(gifs(?:/[^/?#]+)?)") example = "https://www.pornhub.com/model/USER/gifs" diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py index 9c926e8..0bcd7f4 100644 --- a/gallery_dl/extractor/pornpics.py +++ b/gallery_dl/extractor/pornpics.py @@ -58,7 +58,7 @@ class PornpicsExtractor(Extractor): class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor): """Extractor for pornpics galleries""" - pattern = rf"{BASE_PATTERN}/galleries/((?:[^/?#]+-)?(\d+))" + pattern = BASE_PATTERN + r"/galleries/((?:[^/?#]+-)?(\d+))" example = "https://www.pornpics.com/galleries/TITLE-12345/" def __init__(self, match): @@ -94,7 +94,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor): class PornpicsTagExtractor(PornpicsExtractor): """Extractor for galleries from pornpics tag searches""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}/tags/([^/?#]+)" + pattern = BASE_PATTERN + r"/tags/([^/?#]+)" example = "https://www.pornpics.com/tags/TAGS/" def galleries(self): @@ -105,7 +105,7 @@ class PornpicsTagExtractor(PornpicsExtractor): class PornpicsSearchExtractor(PornpicsExtractor): """Extractor for galleries from pornpics search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/(?:\?q=|pornstars/|channels/)([^/&#]+)" + pattern = BASE_PATTERN + r"/(?:\?q=|pornstars/|channels/)([^/&#]+)" example = "https://www.pornpics.com/?q=QUERY" def galleries(self): @@ -125,8 +125,8 @@ class PornpicsListingExtractor(PornpicsExtractor): and use single quotes in HTML, unlike category pages. """ subcategory = "listing" - pattern = (rf"{BASE_PATTERN}" - rf"/(popular|recent|rating|likes|views|comments)/?$") + pattern = (BASE_PATTERN + + r"/(popular|recent|rating|likes|views|comments)/?$") example = "https://www.pornpics.com/popular/" def galleries(self): @@ -142,7 +142,7 @@ class PornpicsListingExtractor(PornpicsExtractor): class PornpicsCategoryExtractor(PornpicsExtractor): """Extractor for galleries from pornpics categories""" subcategory = "category" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/([^/?#]+)/?$" example = "https://www.pornpics.com/ass/" def galleries(self): diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py index e71246a..22d2bde 100644 --- a/gallery_dl/extractor/postmill.py +++ b/gallery_dl/extractor/postmill.py @@ -95,7 +95,7 @@ class PostmillSubmissionsExtractor(PostmillExtractor): groups[-1]).items() if self.acceptable_query(key)} def items(self): - url = self.root + self.base + self.sorting_path + url = f"{self.root}{self.base}{self.sorting_path}" while url: response = self.request(url, params=self.query) @@ -130,14 +130,14 @@ BASE_PATTERN = PostmillExtractor.update({ } }) QUERY_RE = r"(?:\?([^#]+))?$" -SORTING_RE = (rf"(/(?:hot|new|active|top|controversial|most_commented))?" - rf"{QUERY_RE}") +SORTING_RE = (r"(/(?:hot|new|active|top|controversial|most_commented))?" + + QUERY_RE) class PostmillPostExtractor(PostmillExtractor): """Extractor for a single submission URL""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/f/(\w+)/(\d+)" + pattern = BASE_PATTERN + r"/f/(\w+)/(\d+)" example = "https://raddle.me/f/FORUM/123/TITLE" def __init__(self, match): @@ -146,13 +146,13 @@ class PostmillPostExtractor(PostmillExtractor): self.post_id = match[4] def post_urls(self): - return (self.root + "/f/" + self.forum + "/" + self.post_id,) + return (f"{self.root}/f/{self.forum}/{self.post_id}",) class PostmillShortURLExtractor(PostmillExtractor): """Extractor for short submission URLs""" subcategory = "shorturl" - pattern = rf"{BASE_PATTERN}(/\d+)$" + pattern = BASE_PATTERN + r"(/\d+)$" example = "https://raddle.me/123" def items(self): @@ -193,6 +193,6 @@ class PostmillTagExtractor(PostmillSubmissionsExtractor): class PostmillSearchExtractor(PostmillSubmissionsExtractor): """Extractor for search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}(/search)()\?(q=[^#]+)$" + pattern = BASE_PATTERN + r"(/search)()\?(q=[^#]+)$" example = "https://raddle.me/search?q=QUERY" whitelisted_parameters = ("q",) diff --git a/gallery_dl/extractor/rawkuma.py b/gallery_dl/extractor/rawkuma.py index a4a0c9b..4a11549 100644 --- a/gallery_dl/extractor/rawkuma.py +++ b/gallery_dl/extractor/rawkuma.py @@ -21,7 +21,7 @@ class RawkumaBase(): class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor): """Extractor for manga chapters from rawkuma.net""" archive_fmt = "{chapter_id}_{page}" - pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+/chapter-\d+(?:.\d+)?\.(\d+))" + pattern = BASE_PATTERN + r"(/manga/[^/?#]+/chapter-\d+(?:.\d+)?\.(\d+))" example = "https://rawkuma.net/manga/7TITLE/chapter-123.321" def __init__(self, match): @@ -54,7 +54,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor): class RawkumaMangaExtractor(RawkumaBase, MangaExtractor): """Extractor for manga from rawkuma.net""" chapterclass = RawkumaChapterExtractor - pattern = rf"{BASE_PATTERN}/manga/([^/?#]+)" + pattern = BASE_PATTERN + r"/manga/([^/?#]+)" example = "https://rawkuma.net/manga/TITLE/" def __init__(self, match): @@ -65,7 +65,7 @@ class RawkumaMangaExtractor(RawkumaBase, MangaExtractor): manga = text.unescape(text.extr(page, "<title>", " – ")) manga_id = text.parse_int(text.extr(page, "manga_id=", "&")) - url = f"{self.root}/wp-admin/admin-ajax.php" + url = self.root + "/wp-admin/admin-ajax.php" params = { "manga_id": manga_id, "page" : "1", diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 8e974d2..8fd29d2 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -171,7 +171,7 @@ class ReactorTagExtractor(ReactorExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "{search_tags}_{post_id}_{num}" - pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)(?:/[^/?#]+)?" + pattern = BASE_PATTERN + r"/tag/([^/?#]+)(?:/[^/?#]+)?" example = "http://reactor.cc/tag/TAG" def __init__(self, match): @@ -187,7 +187,7 @@ class ReactorSearchExtractor(ReactorExtractor): subcategory = "search" directory_fmt = ("{category}", "search", "{search_tags}") archive_fmt = "s_{search_tags}_{post_id}_{num}" - pattern = rf"{BASE_PATTERN}/search(?:/|\?q=)([^/?#]+)" + pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" example = "http://reactor.cc/search?q=QUERY" def __init__(self, match): @@ -202,7 +202,7 @@ class ReactorUserExtractor(ReactorExtractor): """Extractor for all posts of a user on *reactor.cc sites""" subcategory = "user" directory_fmt = ("{category}", "user", "{user}") - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)" + pattern = BASE_PATTERN + r"/user/([^/?#]+)" example = "http://reactor.cc/user/USER" def __init__(self, match): @@ -216,7 +216,7 @@ class ReactorUserExtractor(ReactorExtractor): class ReactorPostExtractor(ReactorExtractor): """Extractor for single posts on *reactor.cc sites""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "http://reactor.cc/post/12345" def __init__(self, match): diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index dccf91d..24a0171 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -44,7 +44,7 @@ class ReadcomiconlineBase(): class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): """Extractor for comic-issues from readcomiconline.li""" subcategory = "issue" - pattern = rf"{BASE_PATTERN}(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)" + pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)" example = "https://readcomiconline.li/Comic/TITLE/Issue-123?id=12345" def _init(self): @@ -98,7 +98,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): """Extractor for comics from readcomiconline.li""" chapterclass = ReadcomiconlineIssueExtractor subcategory = "comic" - pattern = rf"{BASE_PATTERN}(/Comic/[^/?#]+/?)$" + pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/?)$" example = "https://readcomiconline.li/Comic/TITLE" def chapters(self, page): diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py index 7f731f8..3454340 100644 --- a/gallery_dl/extractor/realbooru.py +++ b/gallery_dl/extractor/realbooru.py @@ -49,7 +49,7 @@ class RealbooruExtractor(booru.BooruExtractor): tags.append(tag) tags_categories[tag_type].append(tag) for key, value in tags_categories.items(): - post[f"tags_{key}"] = ", ".join(value) + post["tags_" + key] = ", ".join(value) tags.sort() post["tags"] = ", ".join(tags) @@ -85,7 +85,7 @@ class RealbooruTagExtractor(RealbooruExtractor): directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" per_page = 42 - pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]*)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)" example = "https://realbooru.com/index.php?page=post&s=list&tags=TAG" def metadata(self): @@ -105,7 +105,7 @@ class RealbooruFavoriteExtractor(RealbooruExtractor): directory_fmt = ("{category}", "favorites", "{favorite_id}") archive_fmt = "f_{favorite_id}_{id}" per_page = 50 - pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" example = "https://realbooru.com/index.php?page=favorites&s=view&id=12345" def metadata(self): @@ -123,7 +123,7 @@ class RealbooruPoolExtractor(RealbooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool} {pool_name}") archive_fmt = "p_{pool}_{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=pool&s=show&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)" example = "https://realbooru.com/index.php?page=pool&s=show&id=12345" def metadata(self): @@ -150,7 +150,7 @@ class RealbooruPoolExtractor(RealbooruExtractor): class RealbooruPostExtractor(RealbooruExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" example = "https://realbooru.com/index.php?page=post&s=view&id=12345" def posts(self): diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index cc73e47..a8bde87 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -223,10 +223,10 @@ class RedditExtractor(Extractor): self.log.debug(src) elif url := data.get("dashUrl"): submission["_ytdl_manifest"] = "dash" - yield f"ytdl:{url}" + yield "ytdl:" + url elif url := data.get("hlsUrl"): submission["_ytdl_manifest"] = "hls" - yield f"ytdl:{url}" + yield "ytdl:" + url def _extract_video_ytdl(self, submission): return "https://www.reddit.com" + submission["permalink"] @@ -506,7 +506,7 @@ class RedditAPI(): return "Bearer " + data["access_token"] def _call(self, endpoint, params): - url = f"{self.root}{endpoint}" + url = self.root + endpoint params["raw_json"] = "1" while True: diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 164fdf4..de8b82e 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -135,7 +135,7 @@ class RedgifsCollectionsExtractor(RedgifsExtractor): def items(self): base = f"{self.root}/users/{self.key}/collections/" for collection in self.api.collections(self.key): - url = f"{base}{collection['folderId']}" + url = base + collection["folderId"] collection["_extractor"] = RedgifsCollectionExtractor yield Message.Queue, url, collection diff --git a/gallery_dl/extractor/rule34vault.py b/gallery_dl/extractor/rule34vault.py index 9f75f64..74d79d3 100644 --- a/gallery_dl/extractor/rule34vault.py +++ b/gallery_dl/extractor/rule34vault.py @@ -79,7 +79,7 @@ class Rule34vaultExtractor(BooruExtractor): class Rule34vaultPostExtractor(Rule34vaultExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://rule34vault.com/post/12345" def posts(self): @@ -90,7 +90,7 @@ class Rule34vaultPlaylistExtractor(Rule34vaultExtractor): subcategory = "playlist" directory_fmt = ("{category}", "{playlist_id}") archive_fmt = "p_{playlist_id}_{id}" - pattern = rf"{BASE_PATTERN}/playlists/view/(\d+)" + pattern = BASE_PATTERN + r"/playlists/view/(\d+)" example = "https://rule34vault.com/playlists/view/12345" def metadata(self): @@ -105,7 +105,7 @@ class Rule34vaultTagExtractor(Rule34vaultExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/(?!p(?:ost|laylists)/)([^/?#]+)" + pattern = BASE_PATTERN + r"/(?!p(?:ost|laylists)/)([^/?#]+)" example = "https://rule34vault.com/TAG" def metadata(self): diff --git a/gallery_dl/extractor/rule34xyz.py b/gallery_dl/extractor/rule34xyz.py index ddd656f..b395f03 100644 --- a/gallery_dl/extractor/rule34xyz.py +++ b/gallery_dl/extractor/rule34xyz.py @@ -120,13 +120,13 @@ class Rule34xyzExtractor(BooruExtractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - url = f"{self.root}/api/v2/auth/signin" + url = self.root + "/api/v2/auth/signin" data = {"email": username, "password": password} response = self.request_json( url, method="POST", json=data, fatal=False) if jwt := response.get("jwt"): - return f"Bearer {jwt}" + return "Bearer " + jwt raise exception.AuthenticationError( (msg := response.get("message")) and f'"{msg}"') @@ -134,7 +134,7 @@ class Rule34xyzExtractor(BooruExtractor): class Rule34xyzPostExtractor(Rule34xyzExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://rule34.xyz/post/12345" def posts(self): @@ -145,7 +145,7 @@ class Rule34xyzPlaylistExtractor(Rule34xyzExtractor): subcategory = "playlist" directory_fmt = ("{category}", "{playlist_id}") archive_fmt = "p_{playlist_id}_{id}" - pattern = rf"{BASE_PATTERN}/playlists/view/(\d+)" + pattern = BASE_PATTERN + r"/playlists/view/(\d+)" example = "https://rule34.xyz/playlists/view/12345" def metadata(self): @@ -160,7 +160,7 @@ class Rule34xyzTagExtractor(Rule34xyzExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)$" + pattern = BASE_PATTERN + r"/([^/?#]+)$" example = "https://rule34.xyz/TAG" def metadata(self): diff --git a/gallery_dl/extractor/s3ndpics.py b/gallery_dl/extractor/s3ndpics.py index 9201a3f..7eeeece 100644 --- a/gallery_dl/extractor/s3ndpics.py +++ b/gallery_dl/extractor/s3ndpics.py @@ -18,7 +18,7 @@ class S3ndpicsExtractor(Extractor): """Base class for s3ndpics extractors""" category = "s3ndpics" root = "https://s3nd.pics" - root_api = f"{root}/api" + root_api = root + "/api" directory_fmt = ("{category}", "{user[username]}", "{date} {title:?/ /}({id})") filename_fmt = "{num:>02}.{extension}" @@ -41,7 +41,7 @@ class S3ndpicsExtractor(Extractor): post["type"] = file["type"] path = file["url"] text.nameext_from_url(path, post) - yield Message.Url, f"{base}{path}", post + yield Message.Url, base + path, post def _pagination(self, url, params): params["page"] = 1 @@ -59,7 +59,7 @@ class S3ndpicsExtractor(Extractor): class S3ndpicsPostExtractor(S3ndpicsExtractor): subcategory = "post" - pattern = rf"{BASE_PATTERN}/post/([0-9a-f]+)" + pattern = BASE_PATTERN + r"/post/([0-9a-f]+)" example = "https://s3nd.pics/post/0123456789abcdef01234567" def posts(self): @@ -69,14 +69,14 @@ class S3ndpicsPostExtractor(S3ndpicsExtractor): class S3ndpicsUserExtractor(S3ndpicsExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/user/(\w+)" + pattern = BASE_PATTERN + r"/user/(\w+)" example = "https://s3nd.pics/user/USER" def posts(self): url = f"{self.root_api}/users/username/{self.groups[0]}" self.kwdict["user"] = user = self.request_json(url)["user"] - url = f"{self.root_api}/posts" + url = self.root_api + "/posts" params = { "userId": user["_id"], "limit" : "12", @@ -87,11 +87,11 @@ class S3ndpicsUserExtractor(S3ndpicsExtractor): class S3ndpicsSearchExtractor(S3ndpicsExtractor): subcategory = "search" - pattern = rf"{BASE_PATTERN}/search/?\?([^#]+)" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" example = "https://s3nd.pics/search?QUERY" def posts(self): - url = f"{self.root_api}/posts" + url = self.root_api + "/posts" params = text.parse_query(self.groups[0]) params.setdefault("limit", "20") self.kwdict["search_tags"] = \ diff --git a/gallery_dl/extractor/saint.py b/gallery_dl/extractor/saint.py index e15c628..9618b7d 100644 --- a/gallery_dl/extractor/saint.py +++ b/gallery_dl/extractor/saint.py @@ -18,7 +18,7 @@ class SaintAlbumExtractor(LolisafeAlbumExtractor): """Extractor for saint albums""" category = "saint" root = "https://saint2.su" - pattern = rf"{BASE_PATTERN}/a/([^/?#]+)" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://saint2.su/a/ID" def fetch_album(self, album_id): @@ -58,7 +58,7 @@ class SaintMediaExtractor(SaintAlbumExtractor): """Extractor for saint media links""" subcategory = "media" directory_fmt = ("{category}",) - pattern = rf"{BASE_PATTERN}(/(embe)?d/([^/?#]+))" + pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))" example = "https://saint2.su/embed/ID" def fetch_album(self, album_id): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 690b515..6cb9f79 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -119,7 +119,7 @@ class SankakuTagExtractor(SankakuExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}(?:/posts)?/?\?([^#]*)" + pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)" example = "https://sankaku.app/?tags=TAG" def __init__(self, match): @@ -149,7 +149,7 @@ class SankakuPoolExtractor(SankakuExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}") archive_fmt = "p_{pool}_{id}" - pattern = rf"{BASE_PATTERN}/(?:books|pools?/show)/(\w+)" + pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)" example = "https://sankaku.app/books/12345" def metadata(self): @@ -171,7 +171,7 @@ class SankakuPostExtractor(SankakuExtractor): """Extractor for single posts from sankaku.app""" subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/posts?(?:/show)?/(\w+)" + pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)" example = "https://sankaku.app/post/show/12345" def posts(self): @@ -181,7 +181,7 @@ class SankakuPostExtractor(SankakuExtractor): class SankakuBooksExtractor(SankakuExtractor): """Extractor for books by tag search on sankaku.app""" subcategory = "books" - pattern = rf"{BASE_PATTERN}/books/?\?([^#]*)" + pattern = BASE_PATTERN + r"/books/?\?([^#]*)" example = "https://sankaku.app/books?tags=TAG" def __init__(self, match): @@ -193,7 +193,7 @@ class SankakuBooksExtractor(SankakuExtractor): params = {"tags": self.tags, "pool_type": "0"} for pool in self.api.pools_keyset(params): pool["_extractor"] = SankakuPoolExtractor - url = f"https://sankaku.app/books/{pool['id']}" + url = "https://sankaku.app/books/" + pool["id"] yield Message.Queue, url, pool @@ -356,6 +356,7 @@ def _authenticate_impl(extr, username, password): extr.log.info("Logging in as %s", username) api = extr.api + api.headers["Authorization"] = None url = api.ROOT + "/auth/token" data = {"login": username, "password": password} diff --git a/gallery_dl/extractor/schalenetwork.py b/gallery_dl/extractor/schalenetwork.py index bbbb9da..30e57bc 100644 --- a/gallery_dl/extractor/schalenetwork.py +++ b/gallery_dl/extractor/schalenetwork.py @@ -64,7 +64,7 @@ class SchalenetworkExtractor(Extractor): def _token(self, required=True): if token := self.config("token"): - return f"Bearer {token.rpartition(' ')[2]}" + return "Bearer " + token.rpartition(' ')[2] if required: raise exception.AuthRequired("'token'", "your favorites") @@ -98,7 +98,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): directory_fmt = ("{category}", "{id} {title}") archive_fmt = "{id}_{num}" request_interval = 0.0 - pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)" + pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)" example = "https://niyaniya.moe/g/12345/67890abcde/" TAG_TYPES = { @@ -172,7 +172,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): if self.config("cbz", False): headers["Authorization"] = self._token() dl = self.request_json( - f"{url}&action=dl", method="POST", headers=headers) + url + "&action=dl", method="POST", headers=headers) # 'crt' parameter here is necessary for 'hdoujin' downloads url = f"{dl['base']}?crt={self._crt()}" info = text.nameext_from_url(url) @@ -227,7 +227,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): class SchalenetworkSearchExtractor(SchalenetworkExtractor): """Extractor for schale.network search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$" + pattern = BASE_PATTERN + r"/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$" example = "https://niyaniya.moe/browse?s=QUERY" def items(self): @@ -252,14 +252,14 @@ class SchalenetworkSearchExtractor(SchalenetworkExtractor): class SchalenetworkFavoriteExtractor(SchalenetworkExtractor): """Extractor for schale.network favorites""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" example = "https://niyaniya.moe/favorites" def items(self): params = text.parse_query(self.groups[1]) params["page"] = text.parse_int(params.get("page"), 1) self.headers["Authorization"] = self._token() - return self._pagination(f"/books/favorites?crt={self._crt()}", params) + return self._pagination("/books/favorites?crt=" + self._crt(), params) SchalenetworkExtractor.extr_class = SchalenetworkGalleryExtractor diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py index b853f53..3fce2cf 100644 --- a/gallery_dl/extractor/scrolller.py +++ b/gallery_dl/extractor/scrolller.py @@ -136,7 +136,7 @@ class ScrolllerExtractor(Extractor): class ScrolllerSubredditExtractor(ScrolllerExtractor): """Extractor for media from a scrolller subreddit""" subcategory = "subreddit" - pattern = rf"{BASE_PATTERN}(/r/[^/?#]+)(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?" example = "https://scrolller.com/r/SUBREDDIT" def posts(self): @@ -173,7 +173,7 @@ class ScrolllerSubredditExtractor(ScrolllerExtractor): class ScrolllerFollowingExtractor(ScrolllerExtractor): """Extractor for followed scrolller subreddits""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/following" + pattern = BASE_PATTERN + r"/following" example = "https://scrolller.com/following" def items(self): @@ -199,7 +199,7 @@ class ScrolllerFollowingExtractor(ScrolllerExtractor): class ScrolllerPostExtractor(ScrolllerExtractor): """Extractor for media from a single scrolller post""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(?!r/|following$)([^/?#]+)" + pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)" example = "https://scrolller.com/TITLE-SLUG-a1b2c3d4f5" def posts(self): diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index b599f70..417538c 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -194,8 +194,8 @@ class SexcomPinExtractor(SexcomExtractor): """Extractor for a pinned image or video on www.sex.com""" subcategory = "pin" directory_fmt = ("{category}",) - pattern = (rf"{BASE_PATTERN}" - rf"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)") + pattern = (BASE_PATTERN + + r"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)") example = "https://www.sex.com/pin/12345-TITLE/" def pins(self): @@ -206,7 +206,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor): """Extractor for related pins on www.sex.com""" subcategory = "related-pin" directory_fmt = ("{category}", "related {original_pin[pin_id]}") - pattern = rf"{BASE_PATTERN}(/pin/(\d+)/?).*#related$" + pattern = BASE_PATTERN + r"(/pin/(\d+)/?).*#related$" example = "https://www.sex.com/pin/12345#related" def metadata(self): @@ -223,7 +223,7 @@ class SexcomPinsExtractor(SexcomExtractor): """Extractor for a user's pins on www.sex.com""" subcategory = "pins" directory_fmt = ("{category}", "{user}") - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/pins/" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/pins/" example = "https://www.sex.com/user/USER/pins/" def metadata(self): @@ -238,7 +238,7 @@ class SexcomLikesExtractor(SexcomExtractor): """Extractor for a user's liked pins on www.sex.com""" subcategory = "likes" directory_fmt = ("{category}", "{user}", "Likes") - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/likes/" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/likes/" example = "https://www.sex.com/user/USER/likes/" def metadata(self): @@ -253,8 +253,8 @@ class SexcomBoardExtractor(SexcomExtractor): """Extractor for pins from a board on www.sex.com""" subcategory = "board" directory_fmt = ("{category}", "{user}", "{board}") - pattern = (rf"{BASE_PATTERN}/user" - rf"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)") + pattern = (BASE_PATTERN + r"/user" + r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)") example = "https://www.sex.com/user/USER/BOARD/" def metadata(self): @@ -273,7 +273,7 @@ class SexcomFeedExtractor(SexcomExtractor): """Extractor for pins from your account's main feed on www.sex.com""" subcategory = "feed" directory_fmt = ("{category}", "feed") - pattern = rf"{BASE_PATTERN}/feed" + pattern = BASE_PATTERN + r"/feed" example = "https://www.sex.com/feed/" def metadata(self): @@ -282,7 +282,7 @@ class SexcomFeedExtractor(SexcomExtractor): def pins(self): if not self.cookies_check(("sess_sex",)): self.log.warning("no 'sess_sex' cookie set") - url = f"{self.root}/feed/" + url = self.root + "/feed/" return self._pagination(url) @@ -290,10 +290,10 @@ class SexcomSearchExtractor(SexcomExtractor): """Extractor for search results on www.sex.com""" subcategory = "search" directory_fmt = ("{category}", "search", "{search[search]}") - pattern = (rf"{BASE_PATTERN}/(?:" - rf"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))" - rf"|search/(pic|gif|video)s" - rf")/?(?:\?([^#]+))?") + pattern = (BASE_PATTERN + r"/(?:" + r"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))" + r"|search/(pic|gif|video)s" + r")/?(?:\?([^#]+))?") example = "https://www.sex.com/search/pics?query=QUERY" def _init(self): @@ -341,10 +341,10 @@ class SexcomSearchExtractor(SexcomExtractor): pin["type"] = "gif" if gifs and pin["extension"] == "webp": pin["extension"] = "gif" - pin["_fallback"] = (f"{root}{path}",) - path = f"{path[:-4]}gif" + pin["_fallback"] = (root + path,) + path = path[:-4] + "gif" - pin["url"] = f"{root}{path}" + pin["url"] = root + path yield Message.Directory, "", pin yield Message.Url, pin["url"], pin diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py index 5572b4d..e2e1c6f 100644 --- a/gallery_dl/extractor/shimmie2.py +++ b/gallery_dl/extractor/shimmie2.py @@ -100,7 +100,7 @@ class Shimmie2TagExtractor(Shimmie2Extractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") file_url_fmt = "{}/_images/{}/{}%20-%20{}.{}" - pattern = rf"{BASE_PATTERN}post/list/([^/?#]+)(?:/(\d+))?" + pattern = BASE_PATTERN + r"post/list/([^/?#]+)(?:/(\d+))?" example = "https://vidya.pics/post/list/TAG/1" def metadata(self): @@ -164,7 +164,7 @@ class Shimmie2TagExtractor(Shimmie2Extractor): class Shimmie2PostExtractor(Shimmie2Extractor): """Extractor for single shimmie2 posts""" subcategory = "post" - pattern = rf"{BASE_PATTERN}post/view/(\d+)" + pattern = BASE_PATTERN + r"post/view/(\d+)" example = "https://vidya.pics/post/view/12345" def posts(self): diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index ad38562..c5e5c20 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -90,7 +90,7 @@ class ShopifyCollectionExtractor(ShopifyExtractor): """Base class for collection extractors for Shopify based sites""" subcategory = "collection" directory_fmt = ("{category}", "{collection[title]}") - pattern = rf"{BASE_PATTERN}(/collections/[\w-]+)/?(?:$|[?#])" + pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])" example = "https://www.fashionnova.com/collections/TITLE" def metadata(self): @@ -113,7 +113,7 @@ class ShopifyProductExtractor(ShopifyExtractor): """Base class for product extractors for Shopify based sites""" subcategory = "product" directory_fmt = ("{category}", "Products") - pattern = rf"{BASE_PATTERN}((?:/collections/[\w-]+)?/products/[\w-]+)" + pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)" example = "https://www.fashionnova.com/collections/TITLE/products/NAME" def products(self): diff --git a/gallery_dl/extractor/sizebooru.py b/gallery_dl/extractor/sizebooru.py index 00002b8..3b61747 100644 --- a/gallery_dl/extractor/sizebooru.py +++ b/gallery_dl/extractor/sizebooru.py @@ -98,7 +98,7 @@ class SizebooruExtractor(BooruExtractor): class SizebooruPostExtractor(SizebooruExtractor): """Extractor for sizebooru posts""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/Details/(\d+)" + pattern = BASE_PATTERN + r"/Details/(\d+)" example = "https://sizebooru.com/Details/12345" def posts(self): @@ -109,7 +109,7 @@ class SizebooruTagExtractor(SizebooruExtractor): """Extractor for sizebooru tag searches""" subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") - pattern = rf"{BASE_PATTERN}/Search/([^/?#]+)" + pattern = BASE_PATTERN + r"/Search/([^/?#]+)" example = "https://sizebooru.com/Search/TAG" def posts(self): @@ -122,7 +122,7 @@ class SizebooruGalleryExtractor(SizebooruExtractor): """Extractor for sizebooru galleries""" subcategory = "gallery" directory_fmt = ("{category}", "{gallery_name} ({gallery_id})") - pattern = rf"{BASE_PATTERN}/Galleries/List/(\d+)" + pattern = BASE_PATTERN + r"/Galleries/List/(\d+)" example = "https://sizebooru.com/Galleries/List/123" def posts(self): @@ -140,7 +140,7 @@ class SizebooruUserExtractor(SizebooruExtractor): """Extractor for a sizebooru user's uploads""" subcategory = "user" directory_fmt = ("{category}", "Uploads {user}") - pattern = rf"{BASE_PATTERN}/Profile/Uploads/([^/?#]+)" + pattern = BASE_PATTERN + r"/Profile/Uploads/([^/?#]+)" example = "https://sizebooru.com/Profile/Uploads/USER" def posts(self): @@ -153,7 +153,7 @@ class SizebooruFavoriteExtractor(SizebooruExtractor): """Extractor for a sizebooru user's favorites""" subcategory = "favorite" directory_fmt = ("{category}", "Favorites {user}") - pattern = rf"{BASE_PATTERN}/Profile/Favorites/([^/?#]+)" + pattern = BASE_PATTERN + r"/Profile/Favorites/([^/?#]+)" example = "https://sizebooru.com/Profile/Favorites/USER" def posts(self): diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 43e518e..3c0ae55 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -10,7 +10,7 @@ from .common import Extractor, Message, Dispatch from .. import text BASE_PATTERN = r"(?:https?://)?skeb\.jp" -USER_PATTERN = rf"{BASE_PATTERN}/@([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/@([^/?#]+)" class SkebExtractor(Extractor): @@ -194,7 +194,7 @@ class SkebExtractor(Extractor): class SkebPostExtractor(SkebExtractor): """Extractor for a single skeb post""" subcategory = "post" - pattern = rf"{USER_PATTERN}/works/(\d+)" + pattern = USER_PATTERN + r"/works/(\d+)" example = "https://skeb.jp/@USER/works/123" def posts(self): @@ -204,7 +204,7 @@ class SkebPostExtractor(SkebExtractor): class SkebWorksExtractor(SkebExtractor): """Extractor for a skeb user's works""" subcategory = "works" - pattern = rf"{USER_PATTERN}/works" + pattern = USER_PATTERN + r"/works" example = "https://skeb.jp/@USER/works" def posts(self): @@ -216,7 +216,7 @@ class SkebWorksExtractor(SkebExtractor): class SkebSentrequestsExtractor(SkebExtractor): """Extractor for a skeb user's sent requests""" subcategory = "sentrequests" - pattern = rf"{USER_PATTERN}/sent[ _-]?requests" + pattern = USER_PATTERN + r"/sent[ _-]?requests" example = "https://skeb.jp/@USER/sentrequests" def posts(self): @@ -227,7 +227,7 @@ class SkebSentrequestsExtractor(SkebExtractor): class SkebUserExtractor(Dispatch, SkebExtractor): """Extractor for a skeb user profile""" - pattern = rf"{USER_PATTERN}/?$" + pattern = USER_PATTERN + r"/?$" example = "https://skeb.jp/@USER" def items(self): @@ -246,7 +246,7 @@ class SkebUserExtractor(Dispatch, SkebExtractor): class SkebSearchExtractor(SkebExtractor): """Extractor for skeb search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/search\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search\?q=([^&#]+)" example = "https://skeb.jp/search?q=QUERY" def metadata(self): @@ -298,7 +298,7 @@ class SkebSearchExtractor(SkebExtractor): class SkebFollowingExtractor(SkebExtractor): """Extractor for all creators followed by a skeb user""" subcategory = "following" - pattern = rf"{USER_PATTERN}/following_creators" + pattern = USER_PATTERN + r"/following_creators" example = "https://skeb.jp/@USER/following_creators" items = SkebExtractor.items_users @@ -312,7 +312,7 @@ class SkebFollowingExtractor(SkebExtractor): class SkebFollowingUsersExtractor(SkebExtractor): """Extractor for your followed users""" subcategory = "following-users" - pattern = rf"{BASE_PATTERN}/following_users" + pattern = BASE_PATTERN + r"/following_users" example = "https://skeb.jp/following_users" items = SkebExtractor.items_users diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py index 6f723c8..ef78da3 100644 --- a/gallery_dl/extractor/slickpic.py +++ b/gallery_dl/extractor/slickpic.py @@ -32,7 +32,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor): "{album[id]} {album[title]}") filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/albums/([^/?#]+)" + pattern = BASE_PATTERN + r"/albums/([^/?#]+)" example = "https://USER.slickpic.com/albums/TITLE/" def __init__(self, match): @@ -110,7 +110,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor): class SlickpicUserExtractor(SlickpicExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}(?:/gallery)?/?(?:$|[?#])" + pattern = BASE_PATTERN + r"(?:/gallery)?/?(?:$|[?#])" example = "https://USER.slickpic.com/" def items(self): diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 902044c..23a4ee7 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -93,7 +93,7 @@ class SmugmugImageExtractor(SmugmugExtractor): """Extractor for individual smugmug images""" subcategory = "image" archive_fmt = "{Image[ImageKey]}" - pattern = rf"{BASE_PATTERN}(?:/[^/?#]+)+/i-([^/?#-]+)" + pattern = BASE_PATTERN + r"(?:/[^/?#]+)+/i-([^/?#-]+)" example = "https://USER.smugmug.com/PATH/i-ID" def __init__(self, match): @@ -114,7 +114,7 @@ class SmugmugImageExtractor(SmugmugExtractor): class SmugmugPathExtractor(SmugmugExtractor): """Extractor for smugmug albums from URL paths and users""" subcategory = "path" - pattern = rf"{BASE_PATTERN}((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$" + pattern = BASE_PATTERN + r"((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$" example = "https://USER.smugmug.com/PATH" def __init__(self, match): diff --git a/gallery_dl/extractor/soundgasm.py b/gallery_dl/extractor/soundgasm.py index a4617dd..1885614 100644 --- a/gallery_dl/extractor/soundgasm.py +++ b/gallery_dl/extractor/soundgasm.py @@ -50,7 +50,7 @@ class SoundgasmExtractor(Extractor): class SoundgasmAudioExtractor(SoundgasmExtractor): """Extractor for audio clips from soundgasm.net""" subcategory = "audio" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/([^/?#]+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)" example = "https://soundgasm.net/u/USER/TITLE" def __init__(self, match): @@ -64,7 +64,7 @@ class SoundgasmAudioExtractor(SoundgasmExtractor): class SoundgasmUserExtractor(SoundgasmExtractor): """Extractor for all sounds from a soundgasm user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/([^/?#]+)/?$" example = "https://soundgasm.net/u/USER" def __init__(self, match): diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py index c3af7fd..371f4e2 100644 --- a/gallery_dl/extractor/steamgriddb.py +++ b/gallery_dl/extractor/steamgriddb.py @@ -157,7 +157,7 @@ class SteamgriddbAssetsExtractor(SteamgriddbExtractor): class SteamgriddbAssetExtractor(SteamgriddbExtractor): """Extractor for a single asset""" subcategory = "asset" - pattern = rf"{BASE_PATTERN}/(grid|hero|logo|icon)/(\d+)" + pattern = BASE_PATTERN + r"/(grid|hero|logo|icon)/(\d+)" example = "https://www.steamgriddb.com/grid/1234" def __init__(self, match): @@ -177,7 +177,7 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor): class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor): subcategory = "grids" asset_type = "grid" - pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/grids(?:/(\d+))?" + pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/grids(?:/(\d+))?" example = "https://www.steamgriddb.com/game/1234/grids" valid_dimensions = ("460x215", "920x430", "600x900", "342x482", "660x930", "512x512", "1024x1024") @@ -189,7 +189,7 @@ class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor): class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor): subcategory = "heroes" asset_type = "hero" - pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/heroes(?:/(\d+))?" + pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/heroes(?:/(\d+))?" example = "https://www.steamgriddb.com/game/1234/heroes" valid_dimensions = ("1920x620", "3840x1240", "1600x650") valid_styles = ("alternate", "blurred", "material") @@ -199,7 +199,7 @@ class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor): class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor): subcategory = "logos" asset_type = "logo" - pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/logos(?:/(\d+))?" + pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/logos(?:/(\d+))?" example = "https://www.steamgriddb.com/game/1234/logos" valid_dimensions = None valid_styles = ("official", "white", "black", "custom") @@ -209,7 +209,7 @@ class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor): class SteamgriddbIconsExtractor(SteamgriddbAssetsExtractor): subcategory = "icons" asset_type = "icon" - pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/icons(?:/(\d+))?" + pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/icons(?:/(\d+))?" example = "https://www.steamgriddb.com/game/1234/icons" valid_dimensions = [f"{i}x{i}" for i in (8, 10, 14, 16, 20, 24, 28, 32, 35, 40, 48, 54, 56, 57, 60, 64, 72, 76, 80, 90, diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 280c8d7..5df7152 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -27,12 +27,12 @@ class SubscribestarExtractor(Extractor): _warning = True def __init__(self, match): - tld, self.item = match.groups() - if tld == "adult": + if match[1] == "adult": self.root = "https://subscribestar.adult" self.cookies_domain = ".subscribestar.adult" self.subcategory += "-adult" Extractor.__init__(self, match) + self.item = match[2] def items(self): self.login() @@ -58,7 +58,7 @@ class SubscribestarExtractor(Extractor): text.nameext_from_url(url, item) if url[0] == "/": - url = f"{self.root}{url}" + url = self.root + url yield Message.Url, url, item def posts(self): @@ -72,7 +72,7 @@ class SubscribestarExtractor(Extractor): "/verify_subscriber" in response.url or "/age_confirmation_warning" in response.url): raise exception.AbortExtraction( - f"HTTP redirect to {response.url}") + "HTTP redirect to " + response.url) content = response.content if len(content) < 250 and b">redirected<" in content: @@ -148,6 +148,21 @@ class SubscribestarExtractor(Extractor): for cookie in response.cookies } + def _pagination(self, url, params=None): + needle_next_page = 'data-role="infinite_scroll-next_page" href="' + page = self.request(url, params=params).text + + while True: + posts = page.split('<div class="post ')[1:] + if not posts: + return + yield from posts + + url = text.extr(posts[-1], needle_next_page, '"') + if not url: + return + page = self.request_json(self.root + text.unescape(url))["html"] + def _media_from_post(self, html): media = [] @@ -221,29 +236,27 @@ class SubscribestarExtractor(Extractor): class SubscribestarUserExtractor(SubscribestarExtractor): """Extractor for media from a subscribestar user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/(?!posts/)([^/?#]+)" + pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)(?:\?([^#]+))?" example = "https://www.subscribestar.com/USER" def posts(self): - needle_next_page = 'data-role="infinite_scroll-next_page" href="' - page = self.request(f"{self.root}/{self.item}").text + _, user, qs = self.groups + url = f"{self.root}/{user}" - while True: - posts = page.split('<div class="post ')[1:] - if not posts: - return - yield from posts + if qs is None: + params = None + else: + params = text.parse_query(qs) + if "tag" in params: + self.kwdict["search_tags"] = params["tag"] - url = text.extr(posts[-1], needle_next_page, '"') - if not url: - return - page = self.request_json(self.root + text.unescape(url))["html"] + return self._pagination(url, params) class SubscribestarPostExtractor(SubscribestarExtractor): """Extractor for media from a single subscribestar post""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/posts/(\d+)" + pattern = BASE_PATTERN + r"/posts/(\d+)" example = "https://www.subscribestar.com/posts/12345" def posts(self): diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py index 59477cc..2bf4a61 100644 --- a/gallery_dl/extractor/szurubooru.py +++ b/gallery_dl/extractor/szurubooru.py @@ -93,7 +93,7 @@ class SzurubooruTagExtractor(SzurubooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}_{version}" - pattern = rf"{BASE_PATTERN}/posts(?:/query=([^/?#]*))?" + pattern = BASE_PATTERN + r"/posts(?:/query=([^/?#]*))?" example = "https://booru.bcbnsfw.space/posts/query=TAG" def __init__(self, match): @@ -116,7 +116,7 @@ class SzurubooruTagExtractor(SzurubooruExtractor): class SzurubooruPostExtractor(SzurubooruExtractor): subcategory = "post" archive_fmt = "{id}_{version}" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://booru.bcbnsfw.space/post/12345" def posts(self): diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index 5f8cb67..3db5484 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -72,7 +72,7 @@ class TapasExtractor(Extractor): class TapasEpisodeExtractor(TapasExtractor): subcategory = "episode" - pattern = rf"{BASE_PATTERN}/episode/(\d+)" + pattern = BASE_PATTERN + r"/episode/(\d+)" example = "https://tapas.io/episode/12345" def items(self): @@ -102,6 +102,7 @@ class TapasEpisodeExtractor(TapasExtractor): else: # comic for episode["num"], url in enumerate(text.extract_iter( html, 'data-src="', '"'), 1): + url = text.unescape(url) yield Message.Url, url, text.nameext_from_url(url, episode) def _extract_series(self, html): @@ -116,7 +117,7 @@ class TapasEpisodeExtractor(TapasExtractor): class TapasSeriesExtractor(TapasExtractor): subcategory = "series" - pattern = rf"{BASE_PATTERN}/series/([^/?#]+)" + pattern = BASE_PATTERN + r"/series/([^/?#]+)" example = "https://tapas.io/series/TITLE" def items(self): @@ -150,7 +151,7 @@ class TapasSeriesExtractor(TapasExtractor): class TapasCreatorExtractor(TapasExtractor): subcategory = "creator" - pattern = rf"{BASE_PATTERN}/(?!series|episode)([^/?#]+)" + pattern = BASE_PATTERN + r"/(?!series|episode)([^/?#]+)" example = "https://tapas.io/CREATOR" def items(self): diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py index e27ef0d..6dcb153 100644 --- a/gallery_dl/extractor/tcbscans.py +++ b/gallery_dl/extractor/tcbscans.py @@ -15,7 +15,7 @@ BASE_PATTERN = (r"(?:https?://)?(?:tcb(?:-backup\.bihar-mirchi|scans)" class TcbscansChapterExtractor(ChapterExtractor): category = "tcbscans" - pattern = rf"{BASE_PATTERN}(/chapters/\d+/[^/?#]+)" + pattern = BASE_PATTERN + r"(/chapters/\d+/[^/?#]+)" example = "https://tcbscans.me/chapters/12345/MANGA-chapter-123" def __init__(self, match): @@ -44,7 +44,7 @@ class TcbscansChapterExtractor(ChapterExtractor): class TcbscansMangaExtractor(MangaExtractor): category = "tcbscans" chapterclass = TcbscansChapterExtractor - pattern = rf"{BASE_PATTERN}(/mangas/\d+/[^/?#]+)" + pattern = BASE_PATTERN + r"(/mangas/\d+/[^/?#]+)" example = "https://tcbscans.me/mangas/123/MANGA" def __init__(self, match): diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py index 3e4bab0..1edc361 100644 --- a/gallery_dl/extractor/tenor.py +++ b/gallery_dl/extractor/tenor.py @@ -111,7 +111,7 @@ class TenorExtractor(Extractor): class TenorImageExtractor(TenorExtractor): subcategory = "image" - pattern = rf"{BASE_PATTERN}view/(?:[^/?#]*-)?(\d+)" + pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)" example = "https://tenor.com/view/SLUG-1234567890" def gifs(self): @@ -125,7 +125,7 @@ class TenorImageExtractor(TenorExtractor): class TenorSearchExtractor(TenorExtractor): subcategory = "search" directory_fmt = ("{category}", "{search_tags}") - pattern = rf"{BASE_PATTERN}search/([^/?#]+)" + pattern = BASE_PATTERN + r"search/([^/?#]+)" example = "https://tenor.com/search/QUERY" def gifs(self): @@ -141,7 +141,7 @@ class TenorSearchExtractor(TenorExtractor): class TenorUserExtractor(TenorExtractor): subcategory = "user" directory_fmt = ("{category}", "@{user[username]}") - pattern = rf"{BASE_PATTERN}(?:users|official)/([^/?#]+)" + pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)" example = "https://tenor.com/users/USER" def gifs(self): diff --git a/gallery_dl/extractor/thehentaiworld.py b/gallery_dl/extractor/thehentaiworld.py index 773f300..f1b4ee3 100644 --- a/gallery_dl/extractor/thehentaiworld.py +++ b/gallery_dl/extractor/thehentaiworld.py @@ -90,12 +90,12 @@ class ThehentaiworldExtractor(Extractor): post["tags"] = tags_list = [] for key, value in tags.items(): tags_list.extend(value) - post[f"tags_{key}" if key else "tags_general"] = value + post["tags_" + key if key else "tags_general"] = value return post def _pagination(self, endpoint): - base = f"{self.root}{endpoint}" + base = self.root + endpoint pnum = self.page_start while True: @@ -116,7 +116,7 @@ class ThehentaiworldTagExtractor(ThehentaiworldExtractor): page_start = 1 post_start = 0 directory_fmt = ("{category}", "{search_tags}") - pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)" + pattern = BASE_PATTERN + r"/tag/([^/?#]+)" example = "https://thehentaiworld.com/tag/TAG/" def posts(self): @@ -132,8 +132,8 @@ class ThehentaiworldTagExtractor(ThehentaiworldExtractor): class ThehentaiworldPostExtractor(ThehentaiworldExtractor): subcategory = "post" - pattern = (rf"{BASE_PATTERN}(" - rf"/(?:video|(?:[\w-]+-)?hentai-image)s/([^/?#]+))") + pattern = (BASE_PATTERN + + r"(/(?:video|(?:[\w-]+-)?hentai-image)s/([^/?#]+))") example = "https://thehentaiworld.com/hentai-images/SLUG/" def posts(self): diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index a4c7171..daf2b69 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -6,10 +6,15 @@ """Extractors for https://www.tiktok.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, ytdl, exception +import functools +import itertools +import random +import time BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktokv?\.com" +USER_PATTERN = BASE_PATTERN + r"/@([\w_.-]+)" class TiktokExtractor(Extractor): @@ -17,18 +22,24 @@ class TiktokExtractor(Extractor): category = "tiktok" directory_fmt = ("{category}", "{user}") filename_fmt = ( - "{id}{num:?_//>02} {title[b:150]}{img_id|audio_id:? [/]/}.{extension}") - archive_fmt = "{id}_{num}_{img_id}" + "{id}{num:?_//>02} {title[b:150]}{file_id:? [/]/}.{extension}") + archive_fmt = "{id}_{num}_{file_id}" root = "https://www.tiktok.com" cookies_domain = ".tiktok.com" + rehydration_data_cache = {} + rehydration_data_app_context_cache = {} def _init(self): + self.photo = self.config("photos", True) self.audio = self.config("audio", True) self.video = self.config("videos", True) self.cover = self.config("covers", False) + self.range = self.config("tiktok-range") or "" + self.range_predicate = util.RangePredicate(self.range) + def items(self): - for tiktok_url in self.urls(): + for tiktok_url in self.posts(): tiktok_url = self._sanitize_url(tiktok_url) data = self._extract_rehydration_data(tiktok_url) if "webapp.video-detail" not in data: @@ -39,7 +50,7 @@ class TiktokExtractor(Extractor): data = self._extract_rehydration_data(tiktok_url) video_detail = data["webapp.video-detail"] - if not self._check_status_code(video_detail, tiktok_url): + if not self._check_status_code(video_detail, tiktok_url, "post"): continue post = video_detail["itemInfo"]["itemStruct"] @@ -51,22 +62,23 @@ class TiktokExtractor(Extractor): ytdl_media = False if "imagePost" in post: - if not original_title: - title = f"TikTok photo #{post['id']}" - img_list = post["imagePost"]["images"] - for i, img in enumerate(img_list, 1): - url = img["imageURL"]["urlList"][0] - text.nameext_from_url(url, post) - post.update({ - "type" : "image", - "image" : img, - "title" : title, - "num" : i, - "img_id": post["filename"].partition("~")[0], - "width" : img["imageWidth"], - "height": img["imageHeight"], - }) - yield Message.Url, url, post + if self.photo: + if not original_title: + title = f"TikTok photo #{post['id']}" + img_list = post["imagePost"]["images"] + for i, img in enumerate(img_list, 1): + url = img["imageURL"]["urlList"][0] + text.nameext_from_url(url, post) + post.update({ + "type" : "image", + "image" : img, + "title" : title, + "num" : i, + "file_id": post["filename"].partition("~")[0], + "width" : img["imageWidth"], + "height": img["imageHeight"], + }) + yield Message.Url, url, post if self.audio and "music" in post: if self.audio == "ytdl": @@ -75,8 +87,10 @@ class TiktokExtractor(Extractor): yield Message.Url, url, post elif "video" in post: - if self.video: + if self.video == "ytdl": ytdl_media = "video" + elif self.video and (url := self._extract_video(post)): + yield Message.Url, url, post if self.cover and (url := self._extract_cover(post, "video")): yield Message.Url, url, post @@ -93,7 +107,7 @@ class TiktokExtractor(Extractor): "extension" : "mp3" if ytdl_media == "audio" else "mp4", "title" : title, "num" : 0, - "img_id" : "", + "file_id" : "", "width" : 0, "height" : 0, }) @@ -102,7 +116,8 @@ class TiktokExtractor(Extractor): def _sanitize_url(self, url): return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1)) - def _extract_rehydration_data(self, url): + def _extract_rehydration_data(self, url, additional_keys=[], *, + has_keys=[]): tries = 0 while True: try: @@ -115,8 +130,14 @@ class TiktokExtractor(Extractor): data = text.extr( html, '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" ' 'type="application/json">', '</script>') - return util.json_loads(data)["__DEFAULT_SCOPE__"] - except ValueError: + data = util.json_loads(data)["__DEFAULT_SCOPE__"] + for key in additional_keys: + data = data[key] + for assert_key in has_keys: + if assert_key not in data: + raise KeyError(assert_key) + return data + except (ValueError, KeyError): # We failed to retrieve rehydration data. This happens # relatively frequently when making many requests, so # retry. @@ -128,6 +149,88 @@ class TiktokExtractor(Extractor): self._retries) self.sleep(self._timeout, "retry") + def _extract_rehydration_data_user(self, profile_url, additional_keys=()): + if profile_url in self.rehydration_data_cache: + data = self.rehydration_data_cache[profile_url] + else: + data = self._extract_rehydration_data( + profile_url, + has_keys=["webapp.user-detail", "webapp.app-context"] + ) + self.rehydration_data_cache[profile_url] = \ + data["webapp.user-detail"] + self.rehydration_data_app_context_cache = \ + data["webapp.app-context"] + data = data["webapp.user-detail"] + if not self._check_status_code(data, profile_url, "profile"): + raise exception.ExtractionError( + "%s: could not extract rehydration data", profile_url) + try: + for key in additional_keys: + data = data[key] + except KeyError as exc: + self.log.traceback(exc) + raise exception.ExtractionError( + "%s: could not extract rehydration data (%s)", + profile_url, ", ".join(additional_keys)) + return data + + def _ensure_rehydration_data_app_context_cache_is_populated(self): + if not self.rehydration_data_app_context_cache: + self.rehydration_data_app_context_cache = \ + self._extract_rehydration_data_user( + "https://www.tiktok.com/", ["webapp.app-context"]) + + def _extract_sec_uid(self, profile_url, user_name): + sec_uid = self._extract_id( + profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid") + if sec_uid is None: + raise exception.AbortExtraction( + f"{user_name}: unable to extract secondary user ID") + return sec_uid + + def _extract_author_id(self, profile_url, user_name): + author_id = self._extract_id( + profile_url, user_name, r"[0-9]+", "id") + if author_id is None: + raise exception.AbortExtraction( + f"{user_name}: unable to extract user ID") + return author_id + + def _extract_id(self, profile_url, user_name, regex, id_key): + match = text.re(regex).fullmatch + + if match(user_name) is not None: + # If it was provided in the URL, then we can skip extracting it + # from the rehydration data. + return user_name + + id = self._extract_rehydration_data_user( + profile_url, ("userInfo", "user", id_key)) + return None if match(id) is None else id + + def _extract_video(self, post): + video = post["video"] + try: + url = video["playAddr"] + except KeyError: + raise exception.ExtractionError("Failed to extract video URL, you " + "may need cookies to continue") + text.nameext_from_url(url, post) + post.update({ + "type" : "video", + "image" : None, + "title" : post["desc"] or f"TikTok video #{post['id']}", + "duration" : video.get("duration"), + "num" : 0, + "file_id" : video.get("id"), + "width" : video.get("width"), + "height" : video.get("height"), + }) + if not post["extension"]: + post["extension"] = video.get("format", "mp4") + return url + def _extract_audio(self, post): audio = post["music"] url = audio["playUrl"] @@ -138,8 +241,7 @@ class TiktokExtractor(Extractor): "title" : post["desc"] or f"TikTok audio #{post['id']}", "duration" : audio.get("duration"), "num" : 0, - "img_id" : "", - "audio_id" : audio.get("id"), + "file_id" : audio.get("id"), "width" : 0, "height" : 0, }) @@ -164,22 +266,38 @@ class TiktokExtractor(Extractor): "title" : post["desc"] or f"TikTok {type} cover #{post['id']}", "duration" : media.get("duration"), "num" : 0, - "img_id" : "", - "cover_id" : cover_id, + "file_id" : cover_id, "width" : 0, "height" : 0, }) return url - def _check_status_code(self, detail, url): + def _check_status_code(self, detail, url, type_of_url): status = detail.get("statusCode") if not status: return True if status == 10222: - self.log.error("%s: Login required to access this post", url) + # Video count workaround ported from yt-dlp: sometimes TikTok + # reports a profile as private even though we have the cookies to + # access it. We know that we can access it if we can see the + # videos stats. If we can't, we assume that we don't have access + # to the profile. + # We only care about this workaround for webapp.user-detail + # objects, so always fail the workaround for e.g. + # webapp.video-detail objects. + video_count = self._extract_video_count_from_user_detail(detail) + if video_count is None: + self.log.error("%s: Login required to access this %s", url, + type_of_url) + elif video_count > 0: + return True + else: + self.log.error("%s: Login required to access this %s, or this " + "profile has no videos posted", url, + type_of_url) elif status == 10204: - self.log.error("%s: Requested post not available", url) + self.log.error("%s: Requested %s not available", url, type_of_url) elif status == 10231: self.log.error("%s: Region locked - Try downloading with a " "VPN/proxy connection", url) @@ -189,14 +307,26 @@ class TiktokExtractor(Extractor): url, status, detail.get("statusMsg") or "") return False + def _extract_video_count_from_user_detail(self, detail): + user_info = detail.get("userInfo") + if not user_info: + return None + stats = user_info.get("stats") or user_info.get("statsV2") + try: + # stats.videoCount is an int, but statsV2.videoCount is a + # string, so we must explicitly convert the attribute. + return int(stats["videoCount"]) + except (KeyError, ValueError): + return None + class TiktokPostExtractor(TiktokExtractor): """Extract a single video or photo TikTok link""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)" + pattern = BASE_PATTERN + r"/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)" example = "https://www.tiktok.com/@USER/photo/1234567890" - def urls(self): + def posts(self): user, post_id = self.groups url = f"{self.root}/@{user or ''}/video/{post_id}" return (url,) @@ -223,31 +353,92 @@ class TiktokVmpostExtractor(TiktokExtractor): yield Message.Queue, url.partition("?")[0], data -class TiktokUserExtractor(TiktokExtractor): - """Extract a TikTok user's profile""" - subcategory = "user" - pattern = rf"{BASE_PATTERN}/@([\w_.-]+)/?(?:$|\?|#)" +class TiktokUserExtractor(Dispatch, TiktokExtractor): + """Extractor for a TikTok user profile""" + pattern = USER_PATTERN + r"/?(?:$|\?|#)" example = "https://www.tiktok.com/@USER" - def _init(self): - self.avatar = self.config("avatar", True) + def items(self): + base = f"{self.root}/@{self.groups[0]}/" + return self._dispatch_extractors(( + (TiktokAvatarExtractor , base + "avatar"), + (TiktokPostsExtractor , base + "posts"), + (TiktokRepostsExtractor, base + "reposts"), + (TiktokStoriesExtractor, base + "stories"), + (TiktokLikesExtractor , base + "likes"), + (TiktokSavedExtractor , base + "saved"), + ), ("avatar", "posts")) + + +class TiktokAvatarExtractor(TiktokExtractor): + subcategory = "avatar" + pattern = USER_PATTERN + r"/avatar" + example = "https://www.tiktok.com/@USER/avatar" def items(self): - """Attempt to use yt-dlp/youtube-dl to extract links from a - user's page""" + user_name = self.groups[0] + profile_url = f"{self.root}/@{user_name}" + + data = self._extract_rehydration_data_user( + profile_url, ("userInfo", "user")) + data["user"] = data.get("uniqueId", user_name) + avatar_url = data.get("avatarLarger") or data.get("avatarMedium") \ + or data["avatarThumb"] + avatar = text.nameext_from_url(avatar_url, data.copy()) + avatar.update({ + "type" : "avatar", + "title" : "@" + data["user"], + "id" : data["id"], + "file_id": avatar["filename"].partition("~")[0], + "num" : 0, + }) + yield Message.Directory, "", avatar + yield Message.Url, avatar_url, avatar + + +class TiktokPostsExtractor(TiktokExtractor): + subcategory = "posts" + pattern = USER_PATTERN + r"/posts" + example = "https://www.tiktok.com/@USER/posts" + + def posts(self): + user_name = self.groups[0] + profile_url = f"{self.root}/@{user_name}" + self.user_provided_cookies = bool(self.cookies) + + # If set to "ytdl", we shall first go via yt-dlp. If that fails, + # we shall attempt to extract directly. + if self.config("ytdl", False): + if posts := self._extract_posts_ytdl(profile_url): + return posts + ytdl = True + self.log.warning("Could not extract TikTok user " + f"{user_name} via yt-dlp or youtube-dl, " + "attempting the extraction directly") + else: + ytdl = False + + if posts := self._extract_posts_api(profile_url, user_name): + return posts + + message = "Could not extract any posts from TikTok user " \ + f"{user_name}" + if not ytdl: + message += ", try extracting post information using " \ + "yt-dlp with the -o " \ + "tiktok-user-extractor=ytdl argument" + self.log.warning(message) + return () + + def _extract_posts_ytdl(self, profile_url): try: module = ytdl.import_module(self.config("module")) except (ImportError, SyntaxError) as exc: self.log.error("Cannot import module '%s'", getattr(exc, "name", "")) self.log.traceback(exc) - raise exception.ExtractionError("yt-dlp or youtube-dl is required " - "for this feature!") - - ytdl_range = self.config("tiktok-range") - if ytdl_range is None or not ytdl_range and ytdl_range != 0: - ytdl_range = "" + return [] extr_opts = { "extract_flat" : True, @@ -257,7 +448,7 @@ class TiktokUserExtractor(TiktokExtractor): "retries" : self._retries, "socket_timeout" : self._timeout, "nocheckcertificate" : not self._verify, - "playlist_items" : str(ytdl_range), + "playlist_items" : str(self.range), } if self._proxies: user_opts["proxy"] = self._proxies.get("http") @@ -271,39 +462,889 @@ class TiktokUserExtractor(TiktokExtractor): for cookie in self.cookies: set_cookie(cookie) - user_name = self.groups[0] - profile_url = f"{self.root}/@{user_name}" - if self.avatar: - try: - avatar_url, avatar = self._generate_avatar( - user_name, profile_url) - except Exception as exc: - self.log.warning("Unable to extract 'avatar' URL (%s: %s)", - exc.__class__.__name__, exc) - else: - yield Message.Directory, "", avatar - yield Message.Url, avatar_url, avatar - with ytdl_instance as ydl: info_dict = ydl._YoutubeDL__extract_info( profile_url, ydl.get_info_extractor("TikTokUser"), False, {}, True) - # This should include video and photo posts in /video/ URL form. - for video in info_dict["entries"]: - data = {"_extractor": TiktokPostExtractor} - yield Message.Queue, video["url"].partition("?")[0], data - - def _generate_avatar(self, user_name, profile_url): - data = self._extract_rehydration_data(profile_url) - data = data["webapp.user-detail"]["userInfo"]["user"] - data["user"] = user_name - avatar_url = data["avatarLarger"] - avatar = text.nameext_from_url(avatar_url, data.copy()) - avatar.update({ - "type" : "avatar", - "title" : "@" + user_name, - "id" : data["id"], - "img_id": avatar["filename"].partition("~")[0], - "num" : 0, - }) - return (avatar_url, avatar) + # This should be a list of video and photo post URLs in /video/ + # format. + return [video["url"].partition("?")[0] + for video in info_dict["entries"]] + + def _extract_posts_api(self, profile_url, user_name): + self.post_order = self.config("order-posts") or "desc" + if self.post_order not in ["desc", "asc", "reverse", "popular"]: + self.post_order = "desc" + + sec_uid = self._extract_sec_uid(profile_url, user_name) + if not self.user_provided_cookies: + if self.post_order != "desc": + self.log.warning( + "%s: no cookies have been provided so the order-posts " + "option will not take effect. You must provide cookies in " + "order to extract a profile's posts in non-descending " + "order", + profile_url + ) + return self._extract_posts_api_legacy( + profile_url, sec_uid, self.range_predicate) + try: + return self._extract_posts_api_order( + profile_url, sec_uid, self.range_predicate) + except Exception as exc: + self.log.error( + "%s: failed to extract user posts using post/item_list (make " + "sure you provide valid cookies). Attempting with legacy " + "creator/item_list endpoint that does not support post " + "ordering", + profile_url + ) + self.log.traceback(exc) + return self._extract_posts_api_legacy( + profile_url, sec_uid, self.range_predicate) + + def _extract_posts_api_order(self, profile_url, sec_uid, range_predicate): + post_item_list_request_type = "0" + if self.post_order in ["asc", "reverse"]: + post_item_list_request_type = "2" + elif self.post_order in ["popular"]: + post_item_list_request_type = "1" + query_parameters = { + "secUid": sec_uid, + "post_item_list_request_type": post_item_list_request_type, + "count": "15", + "needPinnedItemIds": "false", + } + request = TiktokPostItemListRequest(range_predicate) + request.execute(self, profile_url, query_parameters) + return request.generate_urls(profile_url, self.video, self.photo, + self.audio) + + def _extract_posts_api_legacy(self, profile_url, sec_uid, range_predicate): + query_parameters = { + "secUid": sec_uid, + "type": "1", + "count": "15", + } + request = TiktokCreatorItemListRequest(range_predicate) + request.execute(self, profile_url, query_parameters) + return request.generate_urls(profile_url, self.video, self.photo, + self.audio) + + +class TiktokRepostsExtractor(TiktokExtractor): + subcategory = "reposts" + pattern = USER_PATTERN + r"/reposts" + example = "https://www.tiktok.com/@USER/reposts" + + def posts(self): + user_name = self.groups[0] + profile_url = f"{self.root}/@{user_name}" + + query_parameters = { + "secUid": self._extract_sec_uid(profile_url, user_name), + "post_item_list_request_type": "0", + "needPinnedItemIds": "false", + "count": "15", + } + request = TiktokRepostItemListRequest(self.range_predicate) + request.execute(self, profile_url, query_parameters) + return request.generate_urls(profile_url, self.video, self.photo, + self.audio) + + +class TiktokStoriesExtractor(TiktokExtractor): + subcategory = "stories" + pattern = USER_PATTERN + r"/stories" + example = "https://www.tiktok.com/@USER/stories" + + def posts(self): + user_name = self.groups[0] + profile_url = f"{self.root}/@{user_name}" + + query_parameters = { + "authorId": self._extract_author_id(profile_url, user_name), + "loadBackward": "false", + "count": "5", + } + request = TiktokStoryItemListRequest() + request.execute(self, profile_url, query_parameters) + return request.generate_urls(profile_url, self.video, self.photo, + self.audio) + + +class TiktokLikesExtractor(TiktokExtractor): + subcategory = "likes" + pattern = USER_PATTERN + r"/like[sd]" + example = "https://www.tiktok.com/@USER/liked" + + def posts(self): + user_name = self.groups[0] + profile_url = f"{self.root}/@{user_name}" + + query_parameters = { + "secUid": self._extract_sec_uid(profile_url, user_name), + "post_item_list_request_type": "0", + "needPinnedItemIds": "false", + "count": "15", + } + request = TiktokFavoriteItemListRequest(self.range_predicate) + request.execute(self, profile_url, query_parameters) + return request.generate_urls(profile_url, self.video, self.photo, + self.audio) + + +class TiktokSavedExtractor(TiktokExtractor): + subcategory = "saved" + pattern = USER_PATTERN + r"/saved" + example = "https://www.tiktok.com/@USER/saved" + + def posts(self): + user_name = self.groups[0] + profile_url = f"{self.root}/@{user_name}" + + query_parameters = { + "secUid": self._extract_sec_uid(profile_url, user_name), + "post_item_list_request_type": "0", + "needPinnedItemIds": "false", + "count": "15", + } + request = TiktokSavedPostItemListRequest(self.range_predicate) + request.execute(self, profile_url, query_parameters) + return request.generate_urls(profile_url, self.video, self.photo, + self.audio) + + +class TiktokFollowingExtractor(TiktokUserExtractor): + """Extract all of the stories of all of the users you follow""" + subcategory = "following" + pattern = rf"{BASE_PATTERN}/following" + example = "https://www.tiktok.com/following" + + def items(self): + """Attempt to extract all of the stories of all of the accounts + the user follows""" + + query_parameters = { + "storyFeedScene": "3", + "count": "15", + } + request = TiktokStoryUserListRequest() + if not request.execute(self, self.url, query_parameters): + self.log.error("%s: could not extract follower list, make sure " + "you are using logged-in cookies", self.url) + users = request.generate_urls() + if len(users) == 0: + self.log.warning("%s: No followers with stories could be " + "extracted", self.url) + + entries = [] + # Batch all of the users up into groups of at most ten and use the + # batch endpoint to improve performance. The response to the story user + # list request may also include the user themselves, so skip them if + # they ever turn up. + for b in range((len(users) - 1) // 10 + 1): + batch_number = b + 1 + user_batch = users[b*10:batch_number*10] + + # Handle edge case where final batch is composed of a single user + # and that user is the one we need to skip. If we don't handle this + # here (or when we generate the author ID list later), we will + # trigger an AssertionError for an empty author ID list. + if len(user_batch) == 1: + if self._is_current_user(user_batch[0][0]): + continue + + self.log.info("TikTok user stories, batch %d: %s", batch_number, + ", ".join([profile_url for user_id, profile_url in + user_batch if not self._is_current_user( + user_id)])) + + # Since we've already extracted all of the author IDs, we should be + # able to avoid having to request rehydration data (except for one + # time, since it's required to make _is_current_user() work), but + # we should keep this mechanism in place for safety. + author_ids = [self._extract_author_id(profile_url, user_id) + for user_id, profile_url in user_batch + if not self._is_current_user(user_id)] + query_parameters = { + "authorIds": ",".join(author_ids), + "storyCallScene": "2", + } + request = TiktokStoryBatchItemListRequest() + request.execute(self, f"Batch {batch_number}", query_parameters) + # We technically don't need to have the correct user name in the + # URL and it's easier to just ignore it here. + entries += request.generate_urls("https://www.tiktok.com/@_", + self.video, self.photo, + self.audio) + + for video in entries: + data = {"_extractor": TiktokPostExtractor} + yield Message.Queue, video, data + + def _is_current_user(self, user_id): + self._ensure_rehydration_data_app_context_cache_is_populated() + if "user" not in self.rehydration_data_app_context_cache: + return False + if "uid" not in self.rehydration_data_app_context_cache["user"]: + return False + return self.rehydration_data_app_context_cache["user"]["uid"] == \ + user_id + + +class TiktokPaginationCursor: + def current_page(self): + """Must return the page the cursor is currently pointing to. + + Returns + ------- + int + The current value of the cursor. + """ + + return 0 + + def next_page(self, data, query_parameters): + """Must progress the cursor to the next page. + + Parameters + ---------- + data : dict + The response of the most recent request. + query_parameters : dict + All of the query parameters used for the most recent + request. + + Returns + ------- + bool + True if the cursor detects that we've reached the end, False + otherwise. + """ + + return True + + +class TiktokTimeCursor(TiktokPaginationCursor): + def __init__(self, *, reverse=True): + super().__init__() + self.cursor = 0 + # If we expect the cursor to go up or down as we go to the next page. + # True for down, False for up. + self.reverse = reverse + + def current_page(self): + return self.cursor + + def next_page(self, data, query_parameters): + skip_fallback_logic = self.cursor == 0 + new_cursor = int(data.get("cursor", 0)) + no_cursor = not new_cursor + if not skip_fallback_logic: + # If the new cursor doesn't go in the direction we expect, use the + # fallback logic instead. + if self.reverse and (new_cursor > self.cursor or no_cursor): + new_cursor = self.fallback_cursor(data) + elif not self.reverse and (new_cursor < self.cursor or no_cursor): + new_cursor = self.fallback_cursor(data) + elif no_cursor: + raise exception.ExtractionError("Could not extract next cursor") + self.cursor = new_cursor + return not data.get("hasMore", False) + + def fallback_cursor(self, data): + try: + return int(data["itemList"][-1]["createTime"]) * 1000 + except Exception: + return 7 * 86_400_000 * (-1 if self.reverse else 1) + + +class TiktokForwardTimeCursor(TiktokTimeCursor): + def __init__(self): + super().__init__(reverse=False) + + +class TiktokBackwardTimeCursor(TiktokTimeCursor): + def __init__(self): + super().__init__(reverse=True) + + +class TiktokPopularTimeCursor(TiktokTimeCursor): + def __init__(self): + super().__init__(reverse=True) + + def fallback_cursor(self, data): + # Don't really know what to do here, all I know is that the cursor + # for the popular item feed goes down and it does not appear to be + # based on item list timestamps at all. + return -50_000 + + +class TiktokLegacyTimeCursor(TiktokPaginationCursor): + def __init__(self): + super().__init__() + self.cursor = int(time.time()) * 1000 + + def current_page(self): + return self.cursor + + def next_page(self, data, query_parameters): + old_cursor = self.cursor + try: + self.cursor = int(data["itemList"][-1]["createTime"]) * 1000 + except Exception: + self.cursor = 0 + if not self.cursor or old_cursor == self.cursor: + # User may not have posted within this ~1 week look back, + # so manually adjust the cursor. + self.cursor = old_cursor - 7 * 86_400_000 + # In case 'hasMorePrevious' is wrong, break if we have + # gone back before TikTok existed. + has_more_previous = data.get("hasMorePrevious") + return self.cursor < 1472706000000 or not has_more_previous + + +class TiktokItemCursor(TiktokPaginationCursor): + def __init__(self, list_key: str = "itemList"): + super().__init__() + self.cursor = 0 + self.list_key = list_key + + def current_page(self): + return self.cursor + + def next_page(self, data, query_parameters): + # We should offset the cursor by the number of items in the response. + # Sometimes less items are returned than what was requested in the + # count parameter! We could fall back onto the count query parameter + # but we could miss out on some posts, and truth is if the expected + # item list isn't in the response, the extraction was going to fail + # anyway. + self.cursor += len(data[self.list_key]) + return not data.get("hasMore", False) + + +class TiktokPaginationRequest: + def __init__(self, endpoint): + self.endpoint = endpoint + self._regenerate_device_id() + self.items = {} + + def execute(self, extractor, url, query_parameters): + """Performs requests until all pages have been retrieved. + + The items retrieved from this request are stored in self.items. + Each call to execute() will clear the previous value of + self.items. + + Usually extractors want a simple list of URLs. For this, each + request subtype is to implement generate_urls(). + + Parameters + ---------- + extractor : TiktokExtractor + The TikTok extractor performing the request. + url : str + The URL associated with this request for logging purposes. + query_parameters : dict[str, str] + The query parameters to apply to this request. + + Returns + ------- + bool + True if the request was performed successfully and all items + were retrieved, False if no items or only some items could + be retrieved. + """ + + self.validate_query_parameters(query_parameters) + self.items = {} + cursor_type = self.cursor_type(query_parameters) + cursor = cursor_type() if cursor_type else None + for page in itertools.count(start=1): + extractor.log.info("%s: retrieving %s page %d", url, self.endpoint, + page) + tries = 0 + while True: + try: + data, final_parameters = self._request_data( + extractor, + cursor, + query_parameters + ) + incoming_items = self.extract_items(data) + self._detect_duplicate_pages(extractor, url, + set(self.items.keys()), + set(incoming_items.keys())) + self.items.update(incoming_items) + if cursor: + final_page_reached = cursor.next_page(data, + final_parameters) + exit_early = self.exit_early(extractor, url) + if exit_early or final_page_reached: + return True + # Continue to next page and reset tries counter. + break + else: + # This request has no cursor: return immediately. + return True + except Exception as exc: + if tries >= extractor._retries: + extractor.log.error("%s: failed to retrieve %s page " + "%d", url, self.endpoint, page) + extractor.log.traceback(exc) + return False + tries += 1 + extractor.log.warning("%s: failed to retrieve %s page %d", + url, self.endpoint, page) + extractor.sleep(extractor._timeout, "retry") + + def validate_query_parameters(self, query_parameters): + """Used to validate the given parameters for this type of + pagination request. + + For developer purposes only. You should call + super().validate_query_parameters() for most requests as they + will usually have a count parameter. + + Parameters + ---------- + query_parameters : dict[str, str] + The query parameters to validate. + + Raises + ------- + AssertionError + If mandatory query parameters are not given, or they are + given in the wrong format. + """ + + assert "count" in query_parameters + assert type(query_parameters["count"]) is str + assert query_parameters["count"].isdigit() + assert query_parameters["count"] != "0" + + def cursor_type(self, query_parameters): + """Used to determine which type of cursor to use for this + request, if any. + + Parameters + ---------- + query_parameters : dict[str, str] + The query parameters given to the execute() call. + + Returns + ------- + Type[TiktokPaginationCursor] | None + The type of cursor to use, if any. + """ + + return None + + def extract_items(self, data): + """Used to extract data from the response of a request. + + Parameters + ---------- + data : dict + The data given by TikTok. + + Returns + ------- + dict + Each item from the response data, keyed on a unique ID. + + Raises + ------ + Exception + If items could not be extracted. + """ + + return {} + + def exit_early(self, extractor, url): + """Used to determine if we should exit early from the request. + + You have access to the items extracted so far (self.items). + + Parameters + ---------- + extractor : TiktokExtractor + The extractor making the requests. + url : str + The URL associated with the executing request for logging + purposes. + + Returns + ------- + bool + True if we should exit early, False otherwise. + """ + + return False + + def generate_urls(self): + """Used to convert the items retrieved from the request into a + list of URLs. + + Returns + ------- + list + Ideally one URL for each item, although subclasses are + permitted to return a list of any format they wish. + """ + + return [] + + def _regenerate_device_id(self): + self.device_id = str(random.randint( + 7_250_000_000_000_000_000, 7_325_099_899_999_994_577)) + + def _request_data(self, extractor, cursor, query_parameters): + # Implement simple 1 retry mechanism without delays that handles the + # flaky post/item_list endpoint. + retries = 0 + while True: + try: + url, final_parameters = self._build_api_request_url( + cursor, + query_parameters + ) + response = extractor.request(url) + return (util.json_loads(response.text), final_parameters) + except ValueError: + if retries == 1: + raise + extractor.log.warning( + "Could not decode response for this page, trying again" + ) + retries += 1 + + def _build_api_request_url(self, cursor, extra_parameters): + query_parameters = { + "aid": "1988", + "app_language": "en", + "app_name": "tiktok_web", + "browser_language": "en-US", + "browser_name": "Mozilla", + "browser_online": "true", + "browser_platform": "Win32", + "browser_version": "5.0 (Windows)", + "channel": "tiktok_web", + "cookie_enabled": "true", + "device_id": self.device_id, + "device_platform": "web_pc", + "focus_state": "true", + "from_page": "user", + "history_len": "2", + "is_fullscreen": "false", + "is_page_visible": "true", + "language": "en", + "os": "windows", + "priority_region": "", + "referer": "", + "region": "US", + "screen_height": "1080", + "screen_width": "1920", + "tz_name": "UTC", + "verifyFp": "verify_" + "".join(random.choices( + "0123456789abcdef", k=7)), + "webcast_language": "en", + } + if cursor: + # We must not write this as a floating-point number: + query_parameters["cursor"] = str(int(cursor.current_page())) + for key, value in extra_parameters.items(): + query_parameters[key] = f"{value}" + query_str = text.build_query(query_parameters) + return (f"https://www.tiktok.com/api/{self.endpoint}/?{query_str}", + query_parameters) + + def _detect_duplicate_pages(self, extractor, url, seen_ids, incoming_ids): + if incoming_ids and incoming_ids == seen_ids: + # TikTok API keeps sending the same page, likely due to + # a bad device ID. Generate a new one and try again. + self._regenerate_device_id() + extractor.log.warning("%s: TikTok API keeps sending the same " + "page. Taking measures to avoid an infinite " + "loop", url) + raise exception.ExtractionError( + "TikTok API keeps sending the same page") + + +class TiktokItemListRequest(TiktokPaginationRequest): + def __init__(self, endpoint, type_of_items, range_predicate): + super().__init__(endpoint) + self.type_of_items = type_of_items + self.range_predicate = range_predicate + self.exit_early_due_to_no_items = False + + def extract_items(self, data): + if "itemList" not in data: + self.exit_early_due_to_no_items = True + return {} + return {item["id"]: item for item in data["itemList"]} + + def exit_early(self, extractor, url): + if self.exit_early_due_to_no_items: + extractor.log.warning("%s: could not extract any %s for this user", + url, self.type_of_items) + return True + if not self.range_predicate: + # No range predicate given. + return False + if len(self.range_predicate.ranges) == 0: + # No range predicates given in the predicate object. + return False + # If our current selection of items can't satisfy the upper bound of + # the predicate, we must continue extracting them until we can. + return len(self.items) > self.range_predicate.upper + + def generate_urls(self, profile_url, video, photo, audio): + urls = [] + for index, id in enumerate(self.items.keys()): + if not self._matches_filters(self.items.get(id), index + 1, video, + photo, audio): + continue + # Try to grab the author's unique ID, but don't cause the + # extraction to fail if we can't, it's not imperative that the + # URLs include the actual poster's unique ID. + try: + url = f"https://www.tiktok.com/@" \ + f"{self.items[id]['author']['uniqueId']}/video/{id}" + except KeyError: + # Use the given profile URL as a back up. + url = f"{profile_url}/video/{id}" + urls.append(url) + return urls + + def _matches_filters(self, item, index, video, photo, audio): + # First, check if this index falls within any of our configured ranges. + # If it doesn't, we filter it out. + if self.range_predicate: + range_match = len(self.range_predicate.ranges) == 0 + for range in self.range_predicate.ranges: + if index in range: + range_match = True + break + if not range_match: + return False + + # Then, we apply basic video/photo filtering. + if not item: + return True + is_image_post = "imagePost" in item + if not photo and not audio and is_image_post: + return False + if not video and not is_image_post: + return False + return True + + +class TiktokCreatorItemListRequest(TiktokItemListRequest): + """A less flaky version of the post/item_list endpoint that doesn't + support latest/popular/oldest ordering.""" + + def __init__(self, range_predicate): + super().__init__("creator/item_list", "posts", range_predicate) + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + assert "secUid" in query_parameters + assert "type" in query_parameters + # Pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest. + # NOTE: ^ this type parameter doesn't seem to do what yt-dlp thinks it + # does. post/item_list is the only way to get an ordered feed + # based on latest/popular/oldest. + assert query_parameters["type"] == "0" or \ + query_parameters["type"] == "1" + + def cursor_type(self, query_parameters): + return TiktokLegacyTimeCursor + + +class TiktokPostItemListRequest(TiktokItemListRequest): + """Retrieves posts in latest/popular/oldest ordering. + + Very often, this request will just return an empty response, making + it quite flaky, but the next attempt to make the request usually + does return a response. For this reason creator/item_list was kept + as a backup, though it doesn't seem to support ordering. + + It also doesn't work without cookies. + """ + + def __init__(self, range_predicate): + super().__init__("post/item_list", "posts", range_predicate) + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + assert "secUid" in query_parameters + assert "post_item_list_request_type" in query_parameters + # Pagination type: + # 0 == newest-to-oldest. + # 1 == popular. + # 2 == oldest-to-newest. + assert query_parameters["post_item_list_request_type"] in \ + ["0", "1", "2"] + assert "needPinnedItemIds" in query_parameters + # If this value is set to "true", and "post_item_list_request_type" is + # set to "0", pinned posts will always show up first in the resulting + # itemList. It keeps our logic simpler if we avoid this behavior by + # setting this parameter to "false" (especially if we were to use a + # really small "count" value like "1" or "2"). + assert query_parameters["needPinnedItemIds"] in ["false"] + + def cursor_type(self, query_parameters): + request_type = query_parameters["post_item_list_request_type"] + if request_type == "2": + return TiktokForwardTimeCursor + elif request_type == "1": + return TiktokPopularTimeCursor + else: + return TiktokBackwardTimeCursor + + +class TiktokFavoriteItemListRequest(TiktokItemListRequest): + """Retrieves a user's liked posts. + + Appears to only support descending order, but it can work without + cookies. + """ + + def __init__(self, range_predicate): + super().__init__("favorite/item_list", "liked posts", range_predicate) + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + assert "secUid" in query_parameters + assert "post_item_list_request_type" in query_parameters + assert query_parameters["post_item_list_request_type"] == "0" + assert "needPinnedItemIds" in query_parameters + assert query_parameters["needPinnedItemIds"] in ["false"] + + def cursor_type(self, query_parameters): + return TiktokPopularTimeCursor + + +class TiktokRepostItemListRequest(TiktokItemListRequest): + """Retrieves a user's reposts. + + Appears to only support descending order, but it can work without + cookies. + """ + + def __init__(self, range_predicate): + super().__init__("repost/item_list", "reposts", range_predicate) + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + assert "secUid" in query_parameters + assert "post_item_list_request_type" in query_parameters + assert query_parameters["post_item_list_request_type"] == "0" + assert "needPinnedItemIds" in query_parameters + assert query_parameters["needPinnedItemIds"] in ["false"] + + def cursor_type(self, query_parameters): + return TiktokItemCursor + + +class TiktokSavedPostItemListRequest(TiktokItemListRequest): + """Retrieves a user's saved posts. + + Appears to only support descending order, but it can work without + cookies. + """ + + def __init__(self, range_predicate): + super().__init__("user/collect/item_list", "saved posts", + range_predicate) + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + assert "secUid" in query_parameters + assert "post_item_list_request_type" in query_parameters + assert query_parameters["post_item_list_request_type"] == "0" + assert "needPinnedItemIds" in query_parameters + assert query_parameters["needPinnedItemIds"] in ["false"] + + def cursor_type(self, query_parameters): + return TiktokPopularTimeCursor + + +class TiktokStoryItemListRequest(TiktokItemListRequest): + def __init__(self): + super().__init__("story/item_list", "stories", None) + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + assert "authorId" in query_parameters + assert "loadBackward" in query_parameters + assert query_parameters["loadBackward"] in ["true", "false"] + + def cursor_type(self, query_parameters): + return TiktokItemCursor + + +class TiktokStoryBatchItemListRequest(TiktokItemListRequest): + def __init__(self): + super().__init__("story/batch/item_list", "stories", None) + + def validate_query_parameters(self, query_parameters): + # This request type does not need a count parameter so don't invoke + # super().validate_query_parameters(). + assert "authorIds" in query_parameters + # I'd recommend between 1-10 users at a time, as that's what I see in + # the webapp. + author_count = query_parameters["authorIds"].count(",") + 1 + assert author_count >= 1 and author_count <= 10 + # Not sure what this parameter does. + assert "storyCallScene" in query_parameters + assert query_parameters["storyCallScene"] == "2" + + def extract_items(self, data): + # We need to extract each itemList within the response and combine each + # of them into a single list of items. If even one of the users doesn't + # have an item list, "exit early," but continue to gather the rest + # (this request doesn't use a cursor anyway so there is no concept of + # exiting early). + items = {} + if type(data.get("batchStoryItemLists")) is not list: + self.exit_early_due_to_no_items = True + return items + for userStories in data["batchStoryItemLists"]: + items.update(super().extract_items(userStories)) + return items + + +class TiktokStoryUserListRequest(TiktokPaginationRequest): + def __init__(self): + super().__init__("story/user_list") + self.exit_early_due_to_no_cookies = False + + def validate_query_parameters(self, query_parameters): + super().validate_query_parameters(query_parameters) + # Not sure what this parameter does. + assert "storyFeedScene" in query_parameters + assert query_parameters["storyFeedScene"] == "3" + + def cursor_type(self, query_parameters): + return functools.partial(TiktokItemCursor, "storyUsers") + + def extract_items(self, data): + if "storyUsers" not in data: + self.exit_early_due_to_no_cookies = True + return {} + return {item["user"]["id"]: item["user"]["uniqueId"] + for item in data["storyUsers"]} + + def exit_early(self, extractor, url): + if self.exit_early_due_to_no_cookies: + extractor.log.error("You must provide cookies to extract the " + "stories of your following list") + return self.exit_early_due_to_no_cookies + + def generate_urls(self): + return [(id, f"https://www.tiktok.com/@{name}") + for id, name in self.items.items()] diff --git a/gallery_dl/extractor/tmohentai.py b/gallery_dl/extractor/tmohentai.py index 873cce8..ef441d3 100644 --- a/gallery_dl/extractor/tmohentai.py +++ b/gallery_dl/extractor/tmohentai.py @@ -16,7 +16,7 @@ class TmohentaiGalleryExtractor(GalleryExtractor): category = "tmohentai" root = "http://tmohentai.com" directory_fmt = ("{category}", "{title} ({gallery_id})") - pattern = rf"{BASE_PATTERN}/(?:contents|reader)/(\w+)" + pattern = BASE_PATTERN + r"/(?:contents|reader)/(\w+)" example = "https://tmohentai.com/contents/12345a67b89c0" def __init__(self, match): diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py index cc29b11..acf1749 100644 --- a/gallery_dl/extractor/toyhouse.py +++ b/gallery_dl/extractor/toyhouse.py @@ -104,7 +104,7 @@ class ToyhouseExtractor(Extractor): class ToyhouseArtExtractor(ToyhouseExtractor): """Extractor for artworks of a toyhouse user""" subcategory = "art" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/art" + pattern = BASE_PATTERN + r"/([^/?#]+)/art" example = "https://www.toyhou.se/USER/art" def posts(self): diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py index 1ccdafb..2ac7e42 100644 --- a/gallery_dl/extractor/tsumino.py +++ b/gallery_dl/extractor/tsumino.py @@ -30,7 +30,7 @@ class TsuminoBase(): @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - url = f"{self.root}/Account/Login" + url = self.root + "/Account/Login" headers = {"Referer": url} data = {"Username": username, "Password": password} @@ -119,9 +119,9 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor): def galleries(self): """Return all gallery results matching 'self.query'""" - url = f"{self.root}/Search/Operate?type=Book" + url = self.root + "/Search/Operate?type=Book" headers = { - "Referer": f"{self.root}/", + "Referer": self.root + "/", "X-Requested-With": "XMLHttpRequest", } data = { diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 5bb5a40..bd59780 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -32,7 +32,7 @@ class TumblrExtractor(Extractor): def _init(self): if name := self.groups[1]: - self.blog = f"{name}.tumblr.com" + self.blog = name + ".tumblr.com" else: self.blog = self.groups[0] or self.groups[2] @@ -270,7 +270,7 @@ class TumblrExtractor(Extractor): class TumblrUserExtractor(TumblrExtractor): """Extractor for a Tumblr user's posts""" subcategory = "user" - pattern = rf"{BASE_PATTERN}(?:/page/\d+|/archive)?/?$" + pattern = BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$" example = "https://www.tumblr.com/BLOG" def posts(self): @@ -280,7 +280,7 @@ class TumblrUserExtractor(TumblrExtractor): class TumblrPostExtractor(TumblrExtractor): """Extractor for a single Tumblr post""" subcategory = "post" - pattern = rf"{BASE_PATTERN}/(?:post/|image/)?(\d+)" + pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)" example = "https://www.tumblr.com/BLOG/12345" def posts(self): @@ -295,7 +295,7 @@ class TumblrPostExtractor(TumblrExtractor): class TumblrTagExtractor(TumblrExtractor): """Extractor for Tumblr user's posts by tag""" subcategory = "tag" - pattern = rf"{BASE_PATTERN}(?:/archive)?/tagged/([^/?#]+)" + pattern = BASE_PATTERN + r"(?:/archive)?/tagged/([^/?#]+)" example = "https://www.tumblr.com/BLOG/tagged/TAG" def posts(self): @@ -307,7 +307,7 @@ class TumblrTagExtractor(TumblrExtractor): class TumblrDayExtractor(TumblrExtractor): """Extractor for Tumblr user's posts by day""" subcategory = "day" - pattern = rf"{BASE_PATTERN}/day/(\d\d\d\d/\d\d/\d\d)" + pattern = BASE_PATTERN + r"/day/(\d\d\d\d/\d\d/\d\d)" example = "https://www.tumblr.com/BLOG/day/1970/01/01" def posts(self): @@ -325,7 +325,7 @@ class TumblrLikesExtractor(TumblrExtractor): subcategory = "likes" directory_fmt = ("{category}", "{blog_name}", "likes") archive_fmt = "f_{blog[name]}_{id}_{num}" - pattern = rf"{BASE_PATTERN}/likes" + pattern = BASE_PATTERN + r"/likes" example = "https://www.tumblr.com/BLOG/likes" def posts(self): @@ -335,7 +335,7 @@ class TumblrLikesExtractor(TumblrExtractor): class TumblrFollowingExtractor(TumblrExtractor): """Extractor for a Tumblr user's followed blogs""" subcategory = "following" - pattern = rf"{BASE_PATTERN}/following" + pattern = BASE_PATTERN + r"/following" example = "https://www.tumblr.com/BLOG/following" items = TumblrExtractor.items_blogs @@ -347,7 +347,7 @@ class TumblrFollowingExtractor(TumblrExtractor): class TumblrFollowersExtractor(TumblrExtractor): """Extractor for a Tumblr user's followers""" subcategory = "followers" - pattern = rf"{BASE_PATTERN}/followers" + pattern = BASE_PATTERN + r"/followers" example = "https://www.tumblr.com/BLOG/followers" items = TumblrExtractor.items_blogs @@ -531,12 +531,16 @@ class TumblrAPI(oauth.OAuth1API): if self.api_key: params["api_key"] = self.api_key - strategy = self.extractor.config("pagination") - if not strategy: - if params.get("before"): - strategy = "before" - elif "offset" not in params: + if strategy := self.extractor.config("pagination"): + if strategy not in {"api", "before"} and "offset" not in params: + self.log.warning('Unable to use "pagination": "%s". ' + 'Falling back to "api".', strategy) strategy = "api" + elif params.get("before"): + strategy = "before" + elif "offset" not in params: + strategy = "api" + self.log.debug("Pagination strategy '%s'", strategy or "offset") while True: data = self._call(endpoint, params) diff --git a/gallery_dl/extractor/tumblrgallery.py b/gallery_dl/extractor/tumblrgallery.py index 68c9ec7..26868ec 100644 --- a/gallery_dl/extractor/tumblrgallery.py +++ b/gallery_dl/extractor/tumblrgallery.py @@ -36,7 +36,7 @@ class TumblrgalleryExtractor(GalleryExtractor): class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor): """Extractor for Tumblrblog on tumblrgallery.xyz""" subcategory = "tumblrblog" - pattern = rf"{BASE_PATTERN}(/tumblrblog/gallery/(\d+)\.html)" + pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+)\.html)" example = "https://tumblrgallery.xyz/tumblrblog/gallery/12345.html" def __init__(self, match): @@ -68,7 +68,7 @@ class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor): class TumblrgalleryPostExtractor(TumblrgalleryExtractor): """Extractor for Posts on tumblrgallery.xyz""" subcategory = "post" - pattern = rf"{BASE_PATTERN}(/post/(\d+)\.html)" + pattern = BASE_PATTERN + r"(/post/(\d+)\.html)" example = "https://tumblrgallery.xyz/post/12345.html" def __init__(self, match): @@ -93,7 +93,7 @@ class TumblrgallerySearchExtractor(TumblrgalleryExtractor): subcategory = "search" filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}" directory_fmt = ("{category}", "{search_term}") - pattern = rf"{BASE_PATTERN}(/s\.php\?q=([^&#]+))" + pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))" example = "https://tumblrgallery.xyz/s.php?q=QUERY" def __init__(self, match): diff --git a/gallery_dl/extractor/tungsten.py b/gallery_dl/extractor/tungsten.py index 67c0b50..8f2b55d 100644 --- a/gallery_dl/extractor/tungsten.py +++ b/gallery_dl/extractor/tungsten.py @@ -52,7 +52,7 @@ class TungstenExtractor(Extractor): class TungstenPostExtractor(TungstenExtractor): subcategory = "post" - pattern = rf"{BASE_PATTERN}/post/(\w+)" + pattern = BASE_PATTERN + r"/post/(\w+)" example = "https://tungsten.run/post/AbCdEfGhIjKlMnOp" def posts(self): @@ -64,7 +64,7 @@ class TungstenPostExtractor(TungstenExtractor): class TungstenModelExtractor(TungstenExtractor): subcategory = "model" - pattern = rf"{BASE_PATTERN}/model/(\w+)(?:/?\?model_version=(\w+))?" + pattern = BASE_PATTERN + r"/model/(\w+)(?:/?\?model_version=(\w+))?" example = "https://tungsten.run/model/AbCdEfGhIjKlM" def posts(self): @@ -87,7 +87,7 @@ class TungstenModelExtractor(TungstenExtractor): class TungstenUserExtractor(TungstenExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/user/([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?" example = "https://tungsten.run/user/USER" def posts(self): diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index e21ef2a..4558e21 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -48,7 +48,7 @@ class TwibooruPostExtractor(TwibooruExtractor): """Extractor for single twibooru posts""" subcategory = "post" request_interval = (0.5, 1.5) - pattern = rf"{BASE_PATTERN}/(\d+)" + pattern = BASE_PATTERN + r"/(\d+)" example = "https://twibooru.org/12345" def __init__(self, match): @@ -63,7 +63,7 @@ class TwibooruSearchExtractor(TwibooruExtractor): """Extractor for twibooru search results""" subcategory = "search" directory_fmt = ("{category}", "{search_tags}") - pattern = rf"{BASE_PATTERN}/(?:search/?\?([^#]+)|tags/([^/?#]+))" + pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))" example = "https://twibooru.org/search?q=TAG" def __init__(self, match): @@ -97,7 +97,7 @@ class TwibooruGalleryExtractor(TwibooruExtractor): subcategory = "gallery" directory_fmt = ("{category}", "galleries", "{gallery[id]} {gallery[title]}") - pattern = rf"{BASE_PATTERN}/galleries/(\d+)" + pattern = BASE_PATTERN + r"/galleries/(\d+)" example = "https://twibooru.org/galleries/12345" def __init__(self, match): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 546e8e1..cc3812e 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -16,7 +16,7 @@ import random BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?" r"(?:(?:[fv]x)?twitter|(?:fix(?:up|v))?x)\.com") -USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)" class TwitterExtractor(Extractor): @@ -521,10 +521,13 @@ class TwitterExtractor(Extractor): except KeyError: pass - core = user.get("core") or user - legacy = user.get("legacy") or user - lget = legacy.get + if "core" in user: + core = user["core"] + legacy = user["legacy"] + else: + core = legacy = user + lget = legacy.get if lget("withheld_scope"): self.log.warning("'%s'", lget("description")) @@ -533,14 +536,9 @@ class TwitterExtractor(Extractor): "id" : text.parse_int(uid), "name" : core.get("screen_name"), "nick" : core.get("name"), - "location" : user["location"].get("location"), "date" : self.parse_datetime( core["created_at"], "%a %b %d %H:%M:%S %z %Y"), - "verified" : user["verification"]["verified"], - "protected" : user["privacy"]["protected"], "profile_banner" : lget("profile_banner_url", ""), - "profile_image" : user["avatar"].get("image_url", "").replace( - "_normal.", "."), "favourites_count": lget("favourites_count"), "followers_count" : lget("followers_count"), "friends_count" : lget("friends_count"), @@ -549,6 +547,19 @@ class TwitterExtractor(Extractor): "statuses_count" : lget("statuses_count"), } + if "core" in user: + udata["location"] = user["location"].get("location") + udata["verified"] = user["verification"]["verified"] + udata["protected"] = user["privacy"]["protected"] + udata["profile_image"] = user["avatar"].get( + "image_url", "").replace("_normal.", ".") + else: + udata["location"] = user["location"] + udata["verified"] = user["verified"] + udata["protected"] = user["protected"] + udata["profile_image"] = user["profile_image_url_https"].replace( + "_normal.", ".") + descr = legacy["description"] if urls := entities["description"].get("urls"): for url in urls: @@ -667,8 +678,7 @@ class TwitterExtractor(Extractor): class TwitterHomeExtractor(TwitterExtractor): """Extractor for Twitter home timelines""" subcategory = "home" - pattern = (rf"{BASE_PATTERN}/" - rf"(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$") + pattern = BASE_PATTERN + r"/home(?:/fo(?:llowing|r[-_ ]?you()))?/?$" example = "https://x.com/home" def tweets(self): @@ -677,10 +687,20 @@ class TwitterHomeExtractor(TwitterExtractor): return self.api.home_timeline() +class TwitterNotificationsExtractor(TwitterExtractor): + """Extractor for Twitter notifications timelines""" + subcategory = "notifications" + pattern = BASE_PATTERN + r"/(?:notifications|i/timeline())" + example = "https://x.com/notifications" + + def tweets(self): + return self.api.notifications_devicefollow() + + class TwitterSearchExtractor(TwitterExtractor): """Extractor for Twitter search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/search/?\?(?:[^&#]+&)*q=([^&#]+)" + pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)" example = "https://x.com/search?q=QUERY" def metadata(self): @@ -711,7 +731,7 @@ class TwitterSearchExtractor(TwitterExtractor): class TwitterHashtagExtractor(TwitterExtractor): """Extractor for Twitter hashtags""" subcategory = "hashtag" - pattern = rf"{BASE_PATTERN}/hashtag/([^/?#]+)" + pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)" example = "https://x.com/hashtag/NAME" def items(self): @@ -722,7 +742,7 @@ class TwitterHashtagExtractor(TwitterExtractor): class TwitterUserExtractor(Dispatch, TwitterExtractor): """Extractor for a Twitter user""" - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"([^/?#]+)/?(?:$|\?|#)" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") example = "https://x.com/USER" @@ -730,26 +750,26 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor): def items(self): user, user_id = self.groups if user_id is not None: - user = f"id:{user_id}" + user = "id:" + user_id base = f"{self.root}/{user}/" return self._dispatch_extractors(( - (TwitterInfoExtractor , f"{base}info"), - (TwitterAvatarExtractor , f"{base}photo"), - (TwitterBackgroundExtractor, f"{base}header_photo"), - (TwitterTimelineExtractor , f"{base}timeline"), - (TwitterTweetsExtractor , f"{base}tweets"), - (TwitterMediaExtractor , f"{base}media"), - (TwitterRepliesExtractor , f"{base}with_replies"), - (TwitterHighlightsExtractor, f"{base}highlights"), - (TwitterLikesExtractor , f"{base}likes"), + (TwitterInfoExtractor , base + "info"), + (TwitterAvatarExtractor , base + "photo"), + (TwitterBackgroundExtractor, base + "header_photo"), + (TwitterTimelineExtractor , base + "timeline"), + (TwitterTweetsExtractor , base + "tweets"), + (TwitterMediaExtractor , base + "media"), + (TwitterRepliesExtractor , base + "with_replies"), + (TwitterHighlightsExtractor, base + "highlights"), + (TwitterLikesExtractor , base + "likes"), ), ("timeline",)) class TwitterTimelineExtractor(TwitterExtractor): """Extractor for a Twitter user timeline""" subcategory = "timeline" - pattern = rf"{USER_PATTERN}/timeline(?!\w)" + pattern = USER_PATTERN + r"/timeline(?!\w)" example = "https://x.com/USER/timeline" def _init_cursor(self): @@ -846,7 +866,7 @@ class TwitterTimelineExtractor(TwitterExtractor): class TwitterTweetsExtractor(TwitterExtractor): """Extractor for Tweets from a user's Tweets timeline""" subcategory = "tweets" - pattern = rf"{USER_PATTERN}/tweets(?!\w)" + pattern = USER_PATTERN + r"/tweets(?!\w)" example = "https://x.com/USER/tweets" def tweets(self): @@ -856,7 +876,7 @@ class TwitterTweetsExtractor(TwitterExtractor): class TwitterRepliesExtractor(TwitterExtractor): """Extractor for Tweets from a user's timeline including replies""" subcategory = "replies" - pattern = rf"{USER_PATTERN}/with_replies(?!\w)" + pattern = USER_PATTERN + r"/with_replies(?!\w)" example = "https://x.com/USER/with_replies" def tweets(self): @@ -866,7 +886,7 @@ class TwitterRepliesExtractor(TwitterExtractor): class TwitterHighlightsExtractor(TwitterExtractor): """Extractor for Tweets from a user's highlights timeline""" subcategory = "highlights" - pattern = rf"{USER_PATTERN}/highlights(?!\w)" + pattern = USER_PATTERN + r"/highlights(?!\w)" example = "https://x.com/USER/highlights" def tweets(self): @@ -876,7 +896,7 @@ class TwitterHighlightsExtractor(TwitterExtractor): class TwitterMediaExtractor(TwitterExtractor): """Extractor for Tweets from a user's Media timeline""" subcategory = "media" - pattern = rf"{USER_PATTERN}/media(?!\w)" + pattern = USER_PATTERN + r"/media(?!\w)" example = "https://x.com/USER/media" def tweets(self): @@ -886,7 +906,7 @@ class TwitterMediaExtractor(TwitterExtractor): class TwitterLikesExtractor(TwitterExtractor): """Extractor for liked tweets""" subcategory = "likes" - pattern = rf"{USER_PATTERN}/likes(?!\w)" + pattern = USER_PATTERN + r"/likes(?!\w)" example = "https://x.com/USER/likes" def metadata(self): @@ -899,7 +919,7 @@ class TwitterLikesExtractor(TwitterExtractor): class TwitterBookmarkExtractor(TwitterExtractor): """Extractor for bookmarked tweets""" subcategory = "bookmark" - pattern = rf"{BASE_PATTERN}/i/bookmarks()" + pattern = BASE_PATTERN + r"/i/bookmarks()" example = "https://x.com/i/bookmarks" def tweets(self): @@ -915,7 +935,7 @@ class TwitterBookmarkExtractor(TwitterExtractor): class TwitterListExtractor(TwitterExtractor): """Extractor for Twitter lists""" subcategory = "list" - pattern = rf"{BASE_PATTERN}/i/lists/(\d+)/?$" + pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$" example = "https://x.com/i/lists/12345" def tweets(self): @@ -925,7 +945,7 @@ class TwitterListExtractor(TwitterExtractor): class TwitterListMembersExtractor(TwitterExtractor): """Extractor for members of a Twitter list""" subcategory = "list-members" - pattern = rf"{BASE_PATTERN}/i/lists/(\d+)/members" + pattern = BASE_PATTERN + r"/i/lists/(\d+)/members" example = "https://x.com/i/lists/12345/members" def items(self): @@ -936,7 +956,7 @@ class TwitterListMembersExtractor(TwitterExtractor): class TwitterFollowingExtractor(TwitterExtractor): """Extractor for followed users""" subcategory = "following" - pattern = rf"{USER_PATTERN}/following(?!\w)" + pattern = USER_PATTERN + r"/following(?!\w)" example = "https://x.com/USER/following" def items(self): @@ -947,7 +967,7 @@ class TwitterFollowingExtractor(TwitterExtractor): class TwitterFollowersExtractor(TwitterExtractor): """Extractor for a user's followers""" subcategory = "followers" - pattern = rf"{USER_PATTERN}/followers(?!\w)" + pattern = USER_PATTERN + r"/followers(?!\w)" example = "https://x.com/USER/followers" def items(self): @@ -961,7 +981,7 @@ class TwitterCommunityExtractor(TwitterExtractor): directory_fmt = ("{category}", "Communities", "{community[name]} ({community[id]})") archive_fmt = "C_{community[id]}_{tweet_id}_{num}" - pattern = rf"{BASE_PATTERN}/i/communities/(\d+)" + pattern = BASE_PATTERN + r"/i/communities/(\d+)" example = "https://x.com/i/communities/12345" def tweets(self): @@ -975,7 +995,7 @@ class TwitterCommunitiesExtractor(TwitterExtractor): subcategory = "communities" directory_fmt = TwitterCommunityExtractor.directory_fmt archive_fmt = TwitterCommunityExtractor.archive_fmt - pattern = rf"{BASE_PATTERN}/([^/?#]+)/communities/?$" + pattern = BASE_PATTERN + r"/([^/?#]+)/communities/?$" example = "https://x.com/i/communities" def tweets(self): @@ -987,7 +1007,7 @@ class TwitterEventExtractor(TwitterExtractor): subcategory = "event" directory_fmt = ("{category}", "Events", "{event[id]} {event[short_title]}") - pattern = rf"{BASE_PATTERN}/i/events/(\d+)" + pattern = BASE_PATTERN + r"/i/events/(\d+)" example = "https://x.com/i/events/12345" def metadata(self): @@ -1000,7 +1020,7 @@ class TwitterEventExtractor(TwitterExtractor): class TwitterTweetExtractor(TwitterExtractor): """Extractor for individual tweets""" subcategory = "tweet" - pattern = (rf"{BASE_PATTERN}/([^/?#]+|i/web)/status/(\d+)" + pattern = (BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" r"/?(?:$|\?|#|photo/|video/)") example = "https://x.com/USER/status/12345" @@ -1081,7 +1101,7 @@ class TwitterTweetExtractor(TwitterExtractor): class TwitterQuotesExtractor(TwitterExtractor): """Extractor for quotes of a Tweet""" subcategory = "quotes" - pattern = rf"{BASE_PATTERN}/(?:[^/?#]+|i/web)/status/(\d+)/quotes" + pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes" example = "https://x.com/USER/status/12345/quotes" def items(self): @@ -1093,7 +1113,7 @@ class TwitterQuotesExtractor(TwitterExtractor): class TwitterInfoExtractor(TwitterExtractor): """Extractor for a user's profile data""" subcategory = "info" - pattern = rf"{USER_PATTERN}/info" + pattern = USER_PATTERN + r"/info" example = "https://x.com/USER/info" def items(self): @@ -1112,7 +1132,7 @@ class TwitterAvatarExtractor(TwitterExtractor): subcategory = "avatar" filename_fmt = "avatar {date}.{extension}" archive_fmt = "AV_{user[id]}_{date}" - pattern = rf"{USER_PATTERN}/photo" + pattern = USER_PATTERN + r"/photo" example = "https://x.com/USER/photo" def tweets(self): @@ -1134,7 +1154,7 @@ class TwitterBackgroundExtractor(TwitterExtractor): subcategory = "background" filename_fmt = "background {date}.{extension}" archive_fmt = "BG_{user[id]}_{date}" - pattern = rf"{USER_PATTERN}/header_photo" + pattern = USER_PATTERN + r"/header_photo" example = "https://x.com/USER/header_photo" def tweets(self): @@ -1226,18 +1246,17 @@ class TwitterAPI(): "include_mute_edge": "1", "include_can_dm": "1", "include_can_media_tag": "1", - "include_ext_has_nft_avatar": "1", "include_ext_is_blue_verified": "1", "include_ext_verified_type": "1", + "include_ext_profile_image_shape": "1", "skip_status": "1", "cards_platform": "Web-12", "include_cards": "1", "include_ext_alt_text": "true", - "include_ext_limited_action_results": "false", + "include_ext_limited_action_results": "true", "include_quote_count": "true", "include_reply_count": "1", "tweet_mode": "extended", - "include_ext_collab_control": "true", "include_ext_views": "true", "include_entities": "true", "include_user_entities": "true", @@ -1247,16 +1266,11 @@ class TwitterAPI(): "include_ext_trusted_friends_metadata": "true", "send_error_codes": "true", "simple_quoted_tweet": "true", - "q": None, - "count": "100", - "query_source": None, "cursor": None, - "pc": None, - "spelling_corrections": None, - "include_ext_edit_control": "true", - "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo," - "enrichments,superFollowMetadata,unmentionInfo,editControl," - "collab_control,vibe", + "count": "20", + "ext": "mediaStats,highlightedLabel,parodyCommentaryFanLabel," + "voiceInfo,birdwatchPivot,superFollowMetadata," + "unmentionInfo,editControl,article", } self.features = { "hidden_profile_subscriptions_enabled": True, @@ -1576,7 +1590,7 @@ class TwitterAPI(): params["timeline_id"] = "recap" params["urt"] = "true" params["get_annotations"] = "true" - return self._pagination_legacy(endpoint, params) + return self._pagination_rest(endpoint, params) def live_event(self, event_id): endpoint = f"/1.1/live_event/1/{event_id}/timeline.json" @@ -1604,6 +1618,12 @@ class TwitterAPI(): return self._pagination_users( endpoint, variables, ("list", "members_timeline", "timeline")) + def notifications_devicefollow(self): + endpoint = "/2/notifications/device_follow.json" + params = self.params.copy() + params["count"] = self.extractor.config("limit", 50) + return self._pagination_rest(endpoint, params) + def user_followers(self, screen_name): endpoint = "/graphql/i6PPdIMm1MO7CpAqjau7sw/Followers" variables = { @@ -1797,7 +1817,7 @@ class TwitterAPI(): raise exception.AbortExtraction( f"{response.status_code} {response.reason} ({errors})") - def _pagination_legacy(self, endpoint, params): + def _pagination_rest(self, endpoint, params): extr = self.extractor if cursor := extr._init_cursor(): params["cursor"] = cursor @@ -1990,10 +2010,10 @@ class TwitterAPI(): extr.log.info("Retrying API request as guest") continue raise exception.AuthorizationError( - f"{user['screen_name']} blocked your account") + user["screen_name"] + " blocked your account") elif user.get("protected"): raise exception.AuthorizationError( - f"{user['screen_name']}'s Tweets are protected") + user["screen_name"] + "'s Tweets are protected") raise exception.AbortExtraction( "Unable to retrieve Tweets from this timeline") @@ -2042,7 +2062,7 @@ class TwitterAPI(): pinned = None elif pinned := extr._user_obj["legacy"].get( "pinned_tweet_ids_str"): - pinned = f"-tweet-{pinned[0]}" + pinned = "-tweet-" + pinned[0] for idx, entry in enumerate(tweets): if entry["entryId"].endswith(pinned): # mark as pinned / set 'pinned = True' @@ -2248,7 +2268,7 @@ class TwitterAPI(): def _update_variables_search(self, variables, cursor, tweet): try: tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"] - max_id = f"max_id:{int(tweet_id)-1}" + max_id = "max_id:" + str(int(tweet_id)-1) query, n = text.re(r"\bmax_id:\d+").subn( max_id, variables["rawQuery"]) diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py index b77be95..e2023f3 100644 --- a/gallery_dl/extractor/unsplash.py +++ b/gallery_dl/extractor/unsplash.py @@ -74,7 +74,7 @@ class UnsplashExtractor(Extractor): class UnsplashImageExtractor(UnsplashExtractor): """Extractor for a single unsplash photo""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)" + pattern = BASE_PATTERN + r"/photos/([^/?#]+)" example = "https://unsplash.com/photos/ID" def photos(self): @@ -85,7 +85,7 @@ class UnsplashImageExtractor(UnsplashExtractor): class UnsplashUserExtractor(UnsplashExtractor): """Extractor for all photos of an unsplash user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/@(\w+)/?$" + pattern = BASE_PATTERN + r"/@(\w+)/?$" example = "https://unsplash.com/@USER" def photos(self): @@ -97,7 +97,7 @@ class UnsplashUserExtractor(UnsplashExtractor): class UnsplashFavoriteExtractor(UnsplashExtractor): """Extractor for all likes of an unsplash user""" subcategory = "favorite" - pattern = rf"{BASE_PATTERN}/@(\w+)/likes" + pattern = BASE_PATTERN + r"/@(\w+)/likes" example = "https://unsplash.com/@USER/likes" def photos(self): @@ -109,7 +109,7 @@ class UnsplashFavoriteExtractor(UnsplashExtractor): class UnsplashCollectionExtractor(UnsplashExtractor): """Extractor for an unsplash collection""" subcategory = "collection" - pattern = rf"{BASE_PATTERN}/collections/([^/?#]+)(?:/([^/?#]+))?" + pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?" example = "https://unsplash.com/collections/12345/TITLE" def __init__(self, match): @@ -128,7 +128,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor): class UnsplashSearchExtractor(UnsplashExtractor): """Extractor for unsplash search results""" subcategory = "search" - pattern = rf"{BASE_PATTERN}/s/photos/([^/?#]+)(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?" example = "https://unsplash.com/s/photos/QUERY" def __init__(self, match): diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py index 0056d1b..7a9269d 100644 --- a/gallery_dl/extractor/urlshortener.py +++ b/gallery_dl/extractor/urlshortener.py @@ -32,7 +32,7 @@ BASE_PATTERN = UrlshortenerExtractor.update({ class UrlshortenerLinkExtractor(UrlshortenerExtractor): """Extractor for general-purpose URL shorteners""" subcategory = "link" - pattern = rf"{BASE_PATTERN}(/[^/?#]+)" + pattern = BASE_PATTERN + r"(/[^/?#]+)" example = "https://bit.ly/abcde" def items(self): diff --git a/gallery_dl/extractor/vichan.py b/gallery_dl/extractor/vichan.py index 86758f3..4a1c867 100644 --- a/gallery_dl/extractor/vichan.py +++ b/gallery_dl/extractor/vichan.py @@ -39,7 +39,7 @@ class VichanThreadExtractor(VichanExtractor): directory_fmt = ("{category}", "{board}", "{thread} {title}") filename_fmt = "{time}{num:?-//} {filename}.{extension}" archive_fmt = "{board}_{thread}_{tim}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)" example = "https://8kun.top/a/res/12345.html" def items(self): @@ -93,7 +93,7 @@ class VichanThreadExtractor(VichanExtractor): class VichanBoardExtractor(VichanExtractor): """Extractor for vichan boards""" subcategory = "board" - pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/index|/catalog|/\d+|/?$)" + pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)" example = "https://8kun.top/a/" def items(self): diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index 8f6368b..3981763 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -94,7 +94,7 @@ class VipergirlsExtractor(Extractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - url = f"{self.root}/login.php?do=login" + url = self.root + "/login.php?do=login" data = { "vb_login_username": username, "vb_login_password": password, @@ -124,8 +124,8 @@ class VipergirlsExtractor(Extractor): class VipergirlsThreadExtractor(VipergirlsExtractor): """Extractor for vipergirls threads""" subcategory = "thread" - pattern = (rf"{BASE_PATTERN}" - rf"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?(?:$|#|\?(?!p=))") + pattern = (BASE_PATTERN + + r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?(?:$|#|\?(?!p=))") example = "https://vipergirls.to/threads/12345-TITLE" def __init__(self, match): @@ -140,8 +140,8 @@ class VipergirlsThreadExtractor(VipergirlsExtractor): class VipergirlsPostExtractor(VipergirlsExtractor): """Extractor for vipergirls posts""" subcategory = "post" - pattern = (rf"{BASE_PATTERN}" - rf"/threads/(\d+)(?:-[^/?#]+)?\?p=\d+[^#]*#post(\d+)") + pattern = (BASE_PATTERN + + r"/threads/(\d+)(?:-[^/?#]+)?\?p=\d+[^#]*#post(\d+)") example = "https://vipergirls.to/threads/12345-TITLE?p=23456#post23456" def __init__(self, match): diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index e7453fc..ea42265 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -101,7 +101,7 @@ class VkExtractor(Extractor): url, method="POST", headers=headers, data=data) if response.history and "/challenge.html" in response.url: raise exception.AbortExtraction( - f"HTTP redirect to 'challenge' page:\n{response.url}") + "HTTP redirect to 'challenge' page:\n" + response.url) payload = response.json()["payload"][1] if len(payload) < 4: @@ -134,7 +134,7 @@ class VkExtractor(Extractor): class VkPhotosExtractor(VkExtractor): """Extractor for photos from a vk user""" subcategory = "photos" - pattern = (rf"{BASE_PATTERN}/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:albums|photos|id)(-?\d+)" r"|(?!(?:album|tag|wall)-?\d+_?)([^/?#]+))") example = "https://vk.com/id12345" @@ -184,7 +184,7 @@ class VkAlbumExtractor(VkExtractor): """Extractor for a vk album""" subcategory = "album" directory_fmt = ("{category}", "{user[id]}", "{album[id]}") - pattern = rf"{BASE_PATTERN}/album(-?\d+)_(\d+)$" + pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$" example = "https://vk.com/album12345_00" def photos(self): @@ -228,7 +228,7 @@ class VkTaggedExtractor(VkExtractor): """Extractor for a vk tagged photos""" subcategory = "tagged" directory_fmt = ("{category}", "{user[id]}", "tags") - pattern = rf"{BASE_PATTERN}/tag(-?\d+)$" + pattern = BASE_PATTERN + r"/tag(-?\d+)$" example = "https://vk.com/tag12345" def __init__(self, match): @@ -236,7 +236,7 @@ class VkTaggedExtractor(VkExtractor): self.user_id = match[1] def photos(self): - return self._pagination(f"tag{self.user_id}") + return self._pagination("tag" + self.user_id) def metadata(self): return {"user": {"id": self.user_id}} @@ -247,7 +247,7 @@ class VkWallPostExtractor(VkExtractor): subcategory = "wall-post" directory_fmt = ("{category}", "{user[id]}", "wall") filename_fmt = "{wall[id]}_{num}.{extension}" - pattern = rf"{BASE_PATTERN}/wall(-?\d+)_(\d+)" + pattern = BASE_PATTERN + r"/wall(-?\d+)_(\d+)" example = "https://vk.com/wall12345_123" def photos(self): diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index b8da813..6046a78 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -12,7 +12,7 @@ from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co" -USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)" class VscoExtractor(Extractor): @@ -133,7 +133,7 @@ class VscoExtractor(Extractor): class VscoUserExtractor(Dispatch, VscoExtractor): """Extractor for a vsco user profile""" - pattern = rf"{USER_PATTERN}/?$" + pattern = USER_PATTERN + r"/?$" example = "https://vsco.co/USER" def items(self): @@ -149,7 +149,7 @@ class VscoUserExtractor(Dispatch, VscoExtractor): class VscoGalleryExtractor(VscoExtractor): """Extractor for a vsco user's gallery""" subcategory = "gallery" - pattern = rf"{USER_PATTERN}/(?:gallery|images)" + pattern = USER_PATTERN + r"/(?:gallery|images)" example = "https://vsco.co/USER/gallery" def images(self): @@ -158,7 +158,7 @@ class VscoGalleryExtractor(VscoExtractor): tkn = data["users"]["currentUser"]["tkn"] sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"]) - url = f"{self.root}/api/3.0/medias/profile" + url = self.root + "/api/3.0/medias/profile" params = { "site_id" : sid, "limit" : "14", @@ -173,7 +173,7 @@ class VscoCollectionExtractor(VscoExtractor): subcategory = "collection" directory_fmt = ("{category}", "{user}", "collection") archive_fmt = "c_{user}_{id}" - pattern = rf"{USER_PATTERN}/collection" + pattern = USER_PATTERN + r"/collection" example = "https://vsco.co/USER/collection/1" def images(self): @@ -198,7 +198,7 @@ class VscoSpaceExtractor(VscoExtractor): subcategory = "space" directory_fmt = ("{category}", "space", "{user}") archive_fmt = "s_{user}_{id}" - pattern = rf"{BASE_PATTERN}/spaces/([^/?#]+)" + pattern = BASE_PATTERN + r"/spaces/([^/?#]+)" example = "https://vsco.co/spaces/a1b2c3d4e5f" def images(self): @@ -245,7 +245,7 @@ class VscoSpaceExtractor(VscoExtractor): class VscoSpacesExtractor(VscoExtractor): """Extractor for a vsco.co user's spaces""" subcategory = "spaces" - pattern = rf"{USER_PATTERN}/spaces" + pattern = USER_PATTERN + r"/spaces" example = "https://vsco.co/USER/spaces" def items(self): @@ -275,7 +275,7 @@ class VscoSpacesExtractor(VscoExtractor): class VscoAvatarExtractor(VscoExtractor): """Extractor for vsco.co user avatars""" subcategory = "avatar" - pattern = rf"{USER_PATTERN}/avatar" + pattern = USER_PATTERN + r"/avatar" example = "https://vsco.co/USER/avatar" def images(self): @@ -303,7 +303,7 @@ class VscoAvatarExtractor(VscoExtractor): class VscoImageExtractor(VscoExtractor): """Extractor for individual images on vsco.co""" subcategory = "image" - pattern = rf"{USER_PATTERN}/media/([0-9a-fA-F]+)" + pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)" example = "https://vsco.co/USER/media/0123456789abcdef" def images(self): @@ -316,7 +316,7 @@ class VscoImageExtractor(VscoExtractor): class VscoVideoExtractor(VscoExtractor): """Extractor for vsco.co videos links""" subcategory = "video" - pattern = rf"{USER_PATTERN}/video/([^/?#]+)" + pattern = USER_PATTERN + r"/video/([^/?#]+)" example = "https://vsco.co/USER/video/012345678-9abc-def0" def images(self): diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index 9ea3c36..3025c56 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -113,7 +113,7 @@ class WallhavenCollectionsExtractor(WallhavenExtractor): base = f"{self.root}/user/{self.username}/favorites/" for collection in self.api.collections(self.username): collection["_extractor"] = WallhavenCollectionExtractor - url = f"{base}{collection['id']}" + url = base + str(collection["id"]) yield Message.Queue, url, collection diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index e718e51..bc1bc71 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -70,7 +70,7 @@ class WeasylExtractor(Extractor): class WeasylSubmissionExtractor(WeasylExtractor): subcategory = "submission" - pattern = rf"{BASE_PATTERN}(?:~[\w~-]+/submissions|submission|view)/(\d+)" + pattern = BASE_PATTERN + r"(?:~[\w~-]+/submissions|submission|view)/(\d+)" example = "https://www.weasyl.com/~USER/submissions/12345/TITLE" def __init__(self, match): @@ -86,7 +86,7 @@ class WeasylSubmissionExtractor(WeasylExtractor): class WeasylSubmissionsExtractor(WeasylExtractor): subcategory = "submissions" - pattern = rf"{BASE_PATTERN}(?:~|submissions/)([\w~-]+)/?$" + pattern = BASE_PATTERN + r"(?:~|submissions/)([\w~-]+)/?$" example = "https://www.weasyl.com/submissions/USER" def __init__(self, match): @@ -101,7 +101,7 @@ class WeasylSubmissionsExtractor(WeasylExtractor): class WeasylFolderExtractor(WeasylExtractor): subcategory = "folder" directory_fmt = ("{category}", "{owner_login}", "{folder_name}") - pattern = rf"{BASE_PATTERN}submissions/([\w~-]+)\?folderid=(\d+)" + pattern = BASE_PATTERN + r"submissions/([\w~-]+)\?folderid=(\d+)" example = "https://www.weasyl.com/submissions/USER?folderid=12345" def __init__(self, match): @@ -122,7 +122,7 @@ class WeasylJournalExtractor(WeasylExtractor): subcategory = "journal" filename_fmt = "{journalid} {title}.{extension}" archive_fmt = "{journalid}" - pattern = rf"{BASE_PATTERN}journal/(\d+)" + pattern = BASE_PATTERN + r"journal/(\d+)" example = "https://www.weasyl.com/journal/12345" def __init__(self, match): @@ -139,7 +139,7 @@ class WeasylJournalsExtractor(WeasylExtractor): subcategory = "journals" filename_fmt = "{journalid} {title}.{extension}" archive_fmt = "{journalid}" - pattern = rf"{BASE_PATTERN}journals/([\w~-]+)" + pattern = BASE_PATTERN + r"journals/([\w~-]+)" example = "https://www.weasyl.com/journals/USER" def __init__(self, match): @@ -159,7 +159,7 @@ class WeasylJournalsExtractor(WeasylExtractor): class WeasylFavoriteExtractor(WeasylExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") - pattern = rf"{BASE_PATTERN}favorites(?:\?userid=(\d+)|/([^/?#]+))" + pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|/([^/?#]+))" example = "https://www.weasyl.com/favorites?userid=12345" def items(self): diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index bed251b..92cf6b1 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -48,10 +48,11 @@ class WebtoonsBase(): class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): """Extractor for an episode on webtoons.com""" subcategory = "episode" - pattern = (rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+)/[^/?#]+)" + pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)" r"/viewer\?([^#'\"]+)") example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer" "?title_no=123&episode_no=12345") + images_urls = [] def _init(self): self.setup_agegate_cookies() @@ -61,6 +62,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): self.title_no = params.get("title_no") self.episode_no = params.get("episode_no") self.page_url = f"{self.root}/{base}/viewer?{query}" + self.bgm = self.config("bgm", True) def metadata(self, page): extr = text.extract_from(page) @@ -114,12 +116,21 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): elif not isinstance(quality, dict): quality = None + if self.bgm: + num = 0 + self.paths = paths = {} + else: + num = None + results = [] for url in text.extract_iter( page, 'class="_images" data-url="', '"'): + path, _, query = url.rpartition("?") + if num is not None: + num += 1 + paths[path[path.find("/", 8):]] = num if quality is not None: - path, _, query = url.rpartition("?") type = quality.get(path.rpartition(".")[2].lower()) if type is False: url = path @@ -130,10 +141,60 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): return results def assets(self, page): + assets = [] + if self.config("thumbnails", False): active = text.extr(page, 'class="on', '</a>') url = _url(text.extr(active, 'data-url="', '"')) - return ({"url": url, "type": "thumbnail"},) + assets.append({"url": url, "type": "thumbnail"}) + + if self.bgm: + if bgm := text.extr(page, "episodeBgmList:", ",\n"): + self._asset_bgm(assets, util.json_loads(bgm)) + + return assets + + def _asset_bgm(self, assets, bgm_list): + import binascii + params = { + # "quality" : "MIDDLE", + "quality" : "HIGH", # no difference to 'MIDDLE' + "acceptCodecs": "AAC,MP3", + } + headers = { + "Accept" : "application/json", + "Content-Type" : "application/json", + "Origin" : self.root, + "Referer" : self.root + "/", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "cross-site", + } + paths = self.paths + + for bgm in bgm_list: + url = (f"https://apis.naver.com/audiocweb/audiocplayogwweb/play" + f"/audio/{bgm['audioId']}/hls/token") + data = self.request_json( + url, params=params, headers=headers, interval=False) + token = data["result"]["playToken"] + data = util.json_loads(binascii.a2b_base64(token).decode()) + audio = data["audioInfo"] + play = bgm.get("playImageUrl", "") + stop = bgm.get("stopImageUrl", "") + + assets.append({ + **bgm, + **audio, + "num_play": paths.get(play) or 0, + "num_stop": paths.get(stop) or 0, + "filename_play": play[play.rfind("/")+1:play.rfind(".")], + "filename_stop": stop[stop.rfind("/")+1:stop.rfind(".")], + "type": "bgm", + "url" : "ytdl:" + audio["url"], + "_ytdl_manifest": audio["type"].lower(), + "extension": "mp3", + }) class WebtoonsComicExtractor(WebtoonsBase, Extractor): @@ -142,7 +203,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): categorytransfer = True filename_fmt = "{type}.{extension}" archive_fmt = "{title_no}_{type}" - pattern = rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+))/list\?([^#]+)" + pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123" def items(self): @@ -197,7 +258,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): class WebtoonsArtistExtractor(WebtoonsBase, Extractor): """Extractor for webtoons.com artists""" subcategory = "artist" - pattern = rf"{BASE_PATTERN}/p/community/([^/?#]+)/u/([^/?#]+)" + pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)" example = "https://www.webtoons.com/p/community/LANG/u/ARTIST" def items(self): diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py index 31cdaac..c5496e3 100644 --- a/gallery_dl/extractor/weebcentral.py +++ b/gallery_dl/extractor/weebcentral.py @@ -44,7 +44,7 @@ class WeebcentralBase(): class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor): """Extractor for manga chapters from weebcentral.com""" - pattern = rf"{BASE_PATTERN}(/chapters/(\w+))" + pattern = BASE_PATTERN + r"(/chapters/(\w+))" example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV" def metadata(self, page): @@ -95,7 +95,7 @@ class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor): class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor): """Extractor for manga from weebcentral.com""" chapterclass = WeebcentralChapterExtractor - pattern = rf"{BASE_PATTERN}/series/(\w+)" + pattern = BASE_PATTERN + r"/series/(\w+)" example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE" def chapters(self, _): diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index abec0f7..ef50b9d 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -14,7 +14,7 @@ from ..cache import cache import random BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)" -USER_PATTERN = rf"{BASE_PATTERN}/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?" +USER_PATTERN = BASE_PATTERN + r"/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?" class WeiboExtractor(Extractor): @@ -114,13 +114,13 @@ class WeiboExtractor(Extractor): if not url: continue if url.startswith("http:"): - url = f"https:{url[5:]}" + url = "https:" + url[5:] if "filename" not in file: text.nameext_from_url(url, file) if file["extension"] == "json": file["extension"] = "mp4" if file["extension"] == "m3u8": - url = f"ytdl:{url}" + url = "ytdl:" + url file["_ytdl_manifest"] = "hls" file["extension"] = "mp4" num += 1 @@ -296,7 +296,7 @@ class WeiboExtractor(Extractor): class WeiboUserExtractor(WeiboExtractor): """Extractor for weibo user profiles""" subcategory = "user" - pattern = rf"{USER_PATTERN}(?:$|#)" + pattern = USER_PATTERN + r"(?:$|#)" example = "https://weibo.com/USER" # do NOT override 'initialize()' @@ -307,18 +307,19 @@ class WeiboUserExtractor(WeiboExtractor): def items(self): base = f"{self.root}/u/{self._user_id()}?tabtype=" return Dispatch._dispatch_extractors(self, ( - (WeiboHomeExtractor , f"{base}home"), - (WeiboFeedExtractor , f"{base}feed"), - (WeiboVideosExtractor , f"{base}video"), - (WeiboNewvideoExtractor, f"{base}newVideo"), - (WeiboAlbumExtractor , f"{base}album"), + (WeiboHomeExtractor , base + "home"), + (WeiboFeedExtractor , base + "feed"), + (WeiboVideosExtractor , base + "video"), + (WeiboNewvideoExtractor, base + "newVideo"), + (WeiboArticleExtractor , base + "article"), + (WeiboAlbumExtractor , base + "album"), ), ("feed",)) class WeiboHomeExtractor(WeiboExtractor): """Extractor for weibo 'home' listings""" subcategory = "home" - pattern = rf"{USER_PATTERN}\?tabtype=home" + pattern = USER_PATTERN + r"\?tabtype=home" example = "https://weibo.com/USER?tabtype=home" def statuses(self): @@ -330,7 +331,7 @@ class WeiboHomeExtractor(WeiboExtractor): class WeiboFeedExtractor(WeiboExtractor): """Extractor for weibo user feeds""" subcategory = "feed" - pattern = rf"{USER_PATTERN}\?tabtype=feed" + pattern = USER_PATTERN + r"\?tabtype=feed" example = "https://weibo.com/USER?tabtype=feed" def statuses(self): @@ -344,7 +345,7 @@ class WeiboFeedExtractor(WeiboExtractor): class WeiboVideosExtractor(WeiboExtractor): """Extractor for weibo 'video' listings""" subcategory = "videos" - pattern = rf"{USER_PATTERN}\?tabtype=video" + pattern = USER_PATTERN + r"\?tabtype=video" example = "https://weibo.com/USER?tabtype=video" def statuses(self): @@ -358,7 +359,7 @@ class WeiboVideosExtractor(WeiboExtractor): class WeiboNewvideoExtractor(WeiboExtractor): """Extractor for weibo 'newVideo' listings""" subcategory = "newvideo" - pattern = rf"{USER_PATTERN}\?tabtype=newVideo" + pattern = USER_PATTERN + r"\?tabtype=newVideo" example = "https://weibo.com/USER?tabtype=newVideo" def statuses(self): @@ -370,7 +371,7 @@ class WeiboNewvideoExtractor(WeiboExtractor): class WeiboArticleExtractor(WeiboExtractor): """Extractor for weibo 'article' listings""" subcategory = "article" - pattern = rf"{USER_PATTERN}\?tabtype=article" + pattern = USER_PATTERN + r"\?tabtype=article" example = "https://weibo.com/USER?tabtype=article" def statuses(self): @@ -382,7 +383,7 @@ class WeiboArticleExtractor(WeiboExtractor): class WeiboAlbumExtractor(WeiboExtractor): """Extractor for weibo 'album' listings""" subcategory = "album" - pattern = rf"{USER_PATTERN}\?tabtype=album" + pattern = USER_PATTERN + r"\?tabtype=album" example = "https://weibo.com/USER?tabtype=album" def statuses(self): @@ -404,7 +405,7 @@ class WeiboAlbumExtractor(WeiboExtractor): class WeiboStatusExtractor(WeiboExtractor): """Extractor for a weibo status""" subcategory = "status" - pattern = rf"{BASE_PATTERN}/(detail|status|\d+)/(\w+)" + pattern = BASE_PATTERN + r"/(detail|status|\d+)/(\w+)" example = "https://weibo.com/detail/12345" def statuses(self): diff --git a/gallery_dl/extractor/whyp.py b/gallery_dl/extractor/whyp.py new file mode 100644 index 0000000..d5bc6ab --- /dev/null +++ b/gallery_dl/extractor/whyp.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://whyp.it/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?whyp\.it" + + +class WhypExtractor(Extractor): + """Base class for whyp extractors""" + category = "whyp" + root = "https://whyp.it" + root_api = "https://api.whyp.it" + directory_fmt = ("{category}", "{user[username]} ({user[id]})") + filename_fmt = "{id} {title}.{extension}" + archive_fmt = "{id}" + + def _init(self): + self.headers_api = { + "Accept" : "application/json", + "Origin" : self.root, + "Referer": self.root + "/", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-site", + } + + def items(self): + for track in self.tracks(): + if url := track.get("lossless_url"): + track["original"] = True + else: + url = track["lossy_url"] + track["original"] = False + + if "created_at" in track: + track["date"] = self.parse_datetime_iso(track["created_at"]) + + yield Message.Directory, "", track + yield Message.Url, url, text.nameext_from_url(url, track) + + +class WhypAudioExtractor(WhypExtractor): + subcategory = "audio" + pattern = BASE_PATTERN + r"/tracks/(\d+)" + example = "https://whyp.it/tracks/12345/SLUG" + + def tracks(self): + url = f"{self.root_api}/api/tracks/{self.groups[0]}" + track = self.request_json(url, headers=self.headers_api)["track"] + return (track,) + + +class WhypUserExtractor(WhypExtractor): + subcategory = "user" + pattern = BASE_PATTERN + r"/users/(\d+)" + example = "https://whyp.it/users/123/NAME" + + def tracks(self): + url = f"{self.root_api}/api/users/{self.groups[0]}/tracks" + params = {} + headers = self.headers_api + + while True: + data = self.request_json(url, params=params, headers=headers) + + yield from data["tracks"] + + if not (cursor := data.get("next_cursor")): + break + params["cursor"] = cursor + + +class WhypCollectionExtractor(WhypExtractor): + subcategory = "collection" + pattern = BASE_PATTERN + r"/collections/(\d+)" + example = "https://whyp.it/collections/123/NAME" + + def tracks(self): + cid = self.groups[0] + + url = f"{self.root_api}/api/collections/{cid}" + headers = self.headers_api + self.kwdict["collection"] = collection = self.request_json( + url, headers=headers)["collection"] + + url = f"{self.root_api}/api/collections/{cid}/tracks" + params = {"token": collection["token"]} + data = self.request_json(url, params=params, headers=headers) + return data["tracks"] diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index 8f3a1c9..edbdf44 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -68,7 +68,7 @@ class WikiartArtistExtractor(WikiartExtractor): """Extractor for an artist's paintings on wikiart.org""" subcategory = "artist" directory_fmt = ("{category}", "{artist[artistName]}") - pattern = rf"{BASE_PATTERN}/(?!\w+-by-)([\w-]+)/?$" + pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$" example = "https://www.wikiart.org/en/ARTIST" def __init__(self, match): @@ -89,7 +89,7 @@ class WikiartArtistExtractor(WikiartExtractor): class WikiartImageExtractor(WikiartArtistExtractor): """Extractor for individual paintings on wikiart.org""" subcategory = "image" - pattern = rf"{BASE_PATTERN}/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)" + pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)" example = "https://www.wikiart.org/en/ARTIST/TITLE" def __init__(self, match): @@ -109,7 +109,7 @@ class WikiartArtworksExtractor(WikiartExtractor): """Extractor for artwork collections on wikiart.org""" subcategory = "artworks" directory_fmt = ("{category}", "Artworks by {group!c}", "{type}") - pattern = rf"{BASE_PATTERN}/paintings-by-([\w-]+)/([\w-]+)" + pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)" example = "https://www.wikiart.org/en/paintings-by-GROUP/TYPE" def __init__(self, match): @@ -128,7 +128,7 @@ class WikiartArtworksExtractor(WikiartExtractor): class WikiartArtistsExtractor(WikiartExtractor): """Extractor for artist collections on wikiart.org""" subcategory = "artists" - pattern = (rf"{BASE_PATTERN}/artists-by-([\w-]+)/([\w-]+)") + pattern = (BASE_PATTERN + r"/artists-by-([\w-]+)/([\w-]+)") example = "https://www.wikiart.org/en/artists-by-GROUP/TYPE" def __init__(self, match): diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py index 70e42c6..aa19fdb 100644 --- a/gallery_dl/extractor/wikimedia.py +++ b/gallery_dl/extractor/wikimedia.py @@ -47,7 +47,7 @@ class WikimediaExtractor(BaseExtractor): def _init(self): if api_path := self.config_instance("api-path"): if api_path[0] == "/": - self.api_url = f"{self.root}{api_path}" + self.api_url = self.root + api_path else: self.api_url = api_path else: @@ -66,7 +66,7 @@ class WikimediaExtractor(BaseExtractor): def _search_api_path(self, root): self.log.debug("Probing possible API endpoints") for path in ("/api.php", "/w/api.php", "/wiki/api.php"): - url = f"{root}{path}" + url = root + path response = self.request(url, method="HEAD", fatal=None) if response.status_code < 400: return url @@ -122,10 +122,10 @@ class WikimediaExtractor(BaseExtractor): yield Message.Url, image["url"], image if self.subcategories: - base = f"{self.root}/wiki/" + base = self.root + "/wiki/" params["gcmtype"] = "subcat" for subcat in self._pagination(params): - url = f"{base}{subcat['title'].replace(' ', '_')}" + url = base + subcat["title"].replace(" ", "_") subcat["_extractor"] = WikimediaArticleExtractor yield Message.Queue, url, subcat @@ -236,7 +236,7 @@ class WikimediaArticleExtractor(WikimediaExtractor): """Extractor for wikimedia articles""" subcategory = "article" directory_fmt = ("{category}", "{page}") - pattern = rf"{BASE_PATTERN}/(?!static/)([^?#]+)" + pattern = BASE_PATTERN + r"/(?!static/)([^?#]+)" example = "https://en.wikipedia.org/wiki/TITLE" def __init__(self, match): @@ -286,7 +286,7 @@ class WikimediaArticleExtractor(WikimediaExtractor): class WikimediaWikiExtractor(WikimediaExtractor): """Extractor for all files on a MediaWiki instance""" subcategory = "wiki" - pattern = rf"{BASE_PATTERN}/?$" + pattern = BASE_PATTERN + r"/?$" example = "https://en.wikipedia.org/" def params(self): diff --git a/gallery_dl/extractor/xasiat.py b/gallery_dl/extractor/xasiat.py index d4dbea1..0b12f0d 100644 --- a/gallery_dl/extractor/xasiat.py +++ b/gallery_dl/extractor/xasiat.py @@ -52,7 +52,7 @@ class XasiatExtractor(Extractor): class XasiatAlbumExtractor(XasiatExtractor): subcategory = "album" - pattern = rf"{BASE_PATTERN}/(\d+)/[^/?#]+)" + pattern = BASE_PATTERN + r"/(\d+)/[^/?#]+)" example = "https://www.xasiat.com/albums/12345/TITLE/" def items(self): @@ -86,17 +86,17 @@ class XasiatAlbumExtractor(XasiatExtractor): class XasiatTagExtractor(XasiatExtractor): subcategory = "tag" - pattern = rf"{BASE_PATTERN}/tags/[^/?#]+)" + pattern = BASE_PATTERN + r"/tags/[^/?#]+)" example = "https://www.xasiat.com/albums/tags/TAG/" class XasiatCategoryExtractor(XasiatExtractor): subcategory = "category" - pattern = rf"{BASE_PATTERN}/categories/[^/?#]+)" + pattern = BASE_PATTERN + r"/categories/[^/?#]+)" example = "https://www.xasiat.com/albums/categories/CATEGORY/" class XasiatModelExtractor(XasiatExtractor): subcategory = "model" - pattern = rf"{BASE_PATTERN}/models/[^/?#]+)" + pattern = BASE_PATTERN + r"/models/[^/?#]+)" example = "https://www.xasiat.com/albums/models/MODEL/" diff --git a/gallery_dl/extractor/xenforo.py b/gallery_dl/extractor/xenforo.py index d8536b0..838a404 100644 --- a/gallery_dl/extractor/xenforo.py +++ b/gallery_dl/extractor/xenforo.py @@ -41,6 +41,7 @@ class XenforoExtractor(BaseExtractor): r')' ).findall + root_media = self.config_instance("root-media") or self.root for post in self.posts(): urls = extract_urls(post["content"]) if post["attachments"]: @@ -74,6 +75,8 @@ class XenforoExtractor(BaseExtractor): text.nameext_from_url(url, data) data["id"] = text.parse_int( data["filename"].partition("-")[0]) + if url[0] == "/": + url = root_media + url yield Message.Url, url, data elif (inline := inl1 or inl2): @@ -118,7 +121,7 @@ class XenforoExtractor(BaseExtractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - url = f"{self.root}/login/login" + url = self.root + "/login/login" page = self.request(url).text data = { "_xfToken": text.extr(page, 'name="_xfToken" value="', '"'), @@ -140,10 +143,10 @@ class XenforoExtractor(BaseExtractor): } def _pagination(self, base, pnum=None): - base = f"{self.root}{base}" + base = self.root + base if pnum is None: - url = f"{base}/" + url = base + "/" pnum = 1 else: url = f"{base}/page-{pnum}" @@ -160,7 +163,7 @@ class XenforoExtractor(BaseExtractor): url = f"{base}/page-{pnum}" def _pagination_reverse(self, base, pnum=None): - base = f"{self.root}{base}" + base = self.root + base url = f"{base}/page-{'9999' if pnum is None else pnum}" with self.request_page(url) as response: @@ -180,7 +183,7 @@ class XenforoExtractor(BaseExtractor): if pnum > 1: url = f"{base}/page-{pnum}" elif pnum == 1: - url = f"{base}/" + url = base + "/" else: return @@ -274,6 +277,7 @@ BASE_PATTERN = XenforoExtractor.update({ }, "atfforum": { "root": "https://allthefallen.moe/forum", + "root-media": "https://allthefallen.moe", "pattern": r"(?:www\.)?allthefallen\.moe/forum", "cookies": ("xf_user",), }, @@ -282,8 +286,8 @@ BASE_PATTERN = XenforoExtractor.update({ class XenforoPostExtractor(XenforoExtractor): subcategory = "post" - pattern = (rf"{BASE_PATTERN}(/(?:index\.php\?)?threads" - rf"/[^/?#]+/post-|/posts/)(\d+)") + pattern = (BASE_PATTERN + r"(/(?:index\.php\?)?threads" + r"/[^/?#]+/#?post-|/posts/)(\d+)") example = "https://simpcity.cr/threads/TITLE.12345/post-54321" def posts(self): @@ -303,8 +307,8 @@ class XenforoPostExtractor(XenforoExtractor): class XenforoThreadExtractor(XenforoExtractor): subcategory = "thread" - pattern = (rf"{BASE_PATTERN}(/(?:index\.php\?)?threads" - rf"/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?") + pattern = (BASE_PATTERN + r"(/(?:index\.php\?)?threads" + r"/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?") example = "https://simpcity.cr/threads/TITLE.12345/" def posts(self): @@ -332,8 +336,8 @@ class XenforoThreadExtractor(XenforoExtractor): class XenforoForumExtractor(XenforoExtractor): subcategory = "forum" - pattern = (rf"{BASE_PATTERN}(/(?:index\.php\?)?forums" - rf"/(?:[^/?#]+\.)?[^/?#]+)(?:/page-(\d+))?") + pattern = (BASE_PATTERN + r"(/(?:index\.php\?)?forums" + r"/(?:[^/?#]+\.)?[^/?#]+)(?:/page-(\d+))?") example = "https://simpcity.cr/forums/TITLE.123/" def items(self): @@ -345,4 +349,4 @@ class XenforoForumExtractor(XenforoExtractor): pnum = self.groups[-1] for page in self._pagination(path, pnum): for path in extract_threads(page): - yield Message.Queue, f"{self.root}{text.unquote(path)}", data + yield Message.Queue, self.root + text.unquote(path), data diff --git a/gallery_dl/extractor/xfolio.py b/gallery_dl/extractor/xfolio.py index 8caff85..9a10c55 100644 --- a/gallery_dl/extractor/xfolio.py +++ b/gallery_dl/extractor/xfolio.py @@ -45,7 +45,7 @@ class XfolioExtractor(Extractor): class XfolioWorkExtractor(XfolioExtractor): subcategory = "work" - pattern = rf"{BASE_PATTERN}/portfolio/([^/?#]+)/works/(\d+)" + pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)" example = "https://xfolio.jp/portfolio/USER/works/12345" def items(self): @@ -107,7 +107,7 @@ class XfolioWorkExtractor(XfolioExtractor): class XfolioUserExtractor(XfolioExtractor): subcategory = "user" - pattern = rf"{BASE_PATTERN}/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)" + pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)" example = "https://xfolio.jp/portfolio/USER" def works(self): @@ -129,7 +129,7 @@ class XfolioUserExtractor(XfolioExtractor): class XfolioSeriesExtractor(XfolioExtractor): subcategory = "series" - pattern = rf"{BASE_PATTERN}/portfolio/([^/?#]+)/series/(\d+)" + pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/series/(\d+)" example = "https://xfolio.jp/portfolio/USER/series/12345" def works(self): diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py index 64113d3..2806cd1 100644 --- a/gallery_dl/extractor/xhamster.py +++ b/gallery_dl/extractor/xhamster.py @@ -31,7 +31,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor): "{gallery[id]} {gallery[title]}") filename_fmt = "{num:>03}_{id}.{extension}" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}(/photos/gallery/[^/?#]+)" + pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)" example = "https://xhamster.com/photos/gallery/12345" def items(self): @@ -102,7 +102,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor): class XhamsterUserExtractor(XhamsterExtractor): """Extractor for all galleries of an xhamster user""" subcategory = "user" - pattern = rf"{BASE_PATTERN}/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])" + pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])" example = "https://xhamster.com/users/USER/photos" def items(self): diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 1f33eac..6f64c86 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -28,7 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): "{gallery[id]} {gallery[title]}") filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}" archive_fmt = "{gallery[id]}_{num}" - pattern = rf"{BASE_PATTERN}/([^/?#]+)/photos/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/photos/(\d+)" example = "https://www.xvideos.com/profiles/USER/photos/12345" def __init__(self, match): @@ -86,7 +86,7 @@ class XvideosUserExtractor(XvideosBase, Extractor): """Extractor for user profiles on xvideos.com""" subcategory = "user" categorytransfer = True - pattern = rf"{BASE_PATTERN}/([^/?#]+)/?(?:#.*)?$" + pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:#.*)?$" example = "https://www.xvideos.com/profiles/USER" def __init__(self, match): @@ -117,5 +117,5 @@ class XvideosUserExtractor(XvideosBase, Extractor): base = f"{self.root}/profiles/{self.user}/photos/" for gallery in galleries: - url = f"{base}{gallery['id']}" + url = base + str(gallery["id"]) yield Message.Queue, url, gallery diff --git a/gallery_dl/extractor/yiffverse.py b/gallery_dl/extractor/yiffverse.py index 65289e2..f1073ed 100644 --- a/gallery_dl/extractor/yiffverse.py +++ b/gallery_dl/extractor/yiffverse.py @@ -98,7 +98,7 @@ class YiffverseExtractor(BooruExtractor): class YiffversePostExtractor(YiffverseExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = rf"{BASE_PATTERN}/post/(\d+)" + pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://yiffverse.com/post/12345" def posts(self): @@ -109,7 +109,7 @@ class YiffversePlaylistExtractor(YiffverseExtractor): subcategory = "playlist" directory_fmt = ("{category}", "{playlist_id}") archive_fmt = "p_{playlist_id}_{id}" - pattern = rf"{BASE_PATTERN}/playlist/(\d+)" + pattern = BASE_PATTERN + r"/playlist/(\d+)" example = "https://yiffverse.com/playlist/12345" def metadata(self): @@ -124,7 +124,7 @@ class YiffverseTagExtractor(YiffverseExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)" + pattern = BASE_PATTERN + r"/(?:tag/([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)" example = "https://yiffverse.com/tag/TAG" def _init(self): diff --git a/gallery_dl/extractor/yourlesbians.py b/gallery_dl/extractor/yourlesbians.py new file mode 100644 index 0000000..5e14f6e --- /dev/null +++ b/gallery_dl/extractor/yourlesbians.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://yourlesbians.com/""" + +from .common import GalleryExtractor +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?yourlesbians\.com" + + +class YourlesbiansAlbumExtractor(GalleryExtractor): + category = "yourlesbians" + subcategory = "album" + root = "https://yourlesbians.com" + directory_fmt = ("{category}", "{title}") + filename_fmt = "{num:>03} {filename}.{extension}" + archive_fmt = "{title}/{num}" + pattern = BASE_PATTERN + r"(/album/([^/?#]+)/?)" + example = "https://yourlesbians.com/album/SLUG/" + + def metadata(self, page): + extr = text.extract_from(page) + data = { + "album_url": extr('property="og:url" content="', '"'), + "title": text.unescape(extr( + 'property="og:title" content="', '"')[:-8].rstrip()), + "album_thumbnail": extr('property="og:image" content="', '"'), + "description": extr('property="og:description" content="', '"'), + "tags": text.split_html(extr('tags-row', '</div>'))[1:], + } + if data["description"].endswith(", right after."): + data["description"] = "" + self.album = extr('class="album-inner', "</div>") + return data + + def images(self, _): + results = [] + for url in text.extract_iter(self.album, '<a href="', '"'): + fn, _, ext = url.rsplit("/", 2)[1].rpartition(".") + results.append((url, { + "filename" : fn, + "extension": ext, + })) + return results diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py index ea3b615..c135896 100644 --- a/gallery_dl/extractor/ytdl.py +++ b/gallery_dl/extractor/ytdl.py @@ -93,8 +93,12 @@ class YoutubeDLExtractor(Extractor): self.ytdl_url, ytdl_instance.get_info_extractor(self.ytdl_ie_key), False, {}, True) - except ytdl_module.utils.YoutubeDLError: - raise exception.AbortExtraction("Failed to extract video data") + # except ytdl_module.utils.YoutubeDLError: + # raise exception.AbortExtraction("Failed to extract video data") + except Exception as exc: + raise exception.AbortExtraction( + f"Failed to extract video data " + f"({exc.__class__.__name__}: {exc})") if not info_dict: return diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index b4bbd5a..44497cf 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -160,7 +160,7 @@ class ZerochanExtractor(BooruExtractor): class ZerochanTagExtractor(ZerochanExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") - pattern = rf"{BASE_PATTERN}/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?" + pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?" example = "https://www.zerochan.net/TAG" def __init__(self, match): @@ -286,7 +286,7 @@ class ZerochanTagExtractor(ZerochanExtractor): class ZerochanImageExtractor(ZerochanExtractor): subcategory = "image" - pattern = rf"{BASE_PATTERN}/(\d+)" + pattern = BASE_PATTERN + r"/(\d+)" example = "https://www.zerochan.net/12345" def posts(self): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 7a52bd6..9d387a3 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -533,12 +533,15 @@ class DownloadJob(Job): callback(pathfmt) self.out.skip(pathfmt.path) - if self._skipexc: + if self._skipexc is not None: if self._skipftr is None or self._skipftr(pathfmt.kwdict): self._skipcnt += 1 if self._skipcnt >= self._skipmax: raise self._skipexc + if self.sleep_skip is not None: + self.extractor.sleep(self.sleep_skip(), "skip") + def download(self, url): """Download 'url'""" if downloader := self.get_downloader(url[:url.find(":")]): @@ -582,6 +585,7 @@ class DownloadJob(Job): pathfmt.set_directory(kwdict) self.sleep = util.build_duration_func(cfg("sleep")) + self.sleep_skip = util.build_duration_func(cfg("sleep-skip")) self.fallback = cfg("fallback", True) if not cfg("download", True): # monkey-patch method to do nothing and always return True diff --git a/gallery_dl/option.py b/gallery_dl/option.py index a47d8cd..82efcc9 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -529,6 +529,11 @@ def build_parser(): "(e.g. 2.7 or 2.0-3.5)"), ) downloader.add_argument( + "--sleep-skip", + dest="sleep-skip", metavar="SECONDS", action=ConfigAction, + help=("Number of seconds to wait after skipping a file download"), + ) + downloader.add_argument( "--sleep-request", dest="sleep-request", metavar="SECONDS", action=ConfigAction, help=("Number of seconds to wait between HTTP requests " diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 0dcb01a..10dce13 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.31.1" +__version__ = "1.31.2" __variant__ = None |
