From bbe7fac03d881662a458e7fbf870c9d71f5257f4 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 7 Oct 2025 02:11:45 -0400 Subject: New upstream version 1.30.9. --- gallery_dl/cookies.py | 49 +++++++--- gallery_dl/downloader/http.py | 8 +- gallery_dl/extractor/__init__.py | 3 + gallery_dl/extractor/chevereto.py | 20 ++-- gallery_dl/extractor/imagehosts.py | 14 ++- gallery_dl/extractor/instagram.py | 75 ++++++++------ gallery_dl/extractor/mangadex.py | 119 +++++++++++------------ gallery_dl/extractor/mangafire.py | 168 ++++++++++++++++++++++++++++++++ gallery_dl/extractor/mangareader.py | 173 +++++++++++++++++++++++++++++++++ gallery_dl/extractor/misskey.py | 6 +- gallery_dl/extractor/nozomi.py | 2 +- gallery_dl/extractor/paheal.py | 16 ++- gallery_dl/extractor/patreon.py | 37 +++++++ gallery_dl/extractor/pixiv.py | 44 ++++++++- gallery_dl/extractor/s3ndpics.py | 101 +++++++++++++++++++ gallery_dl/extractor/schalenetwork.py | 57 ++++++----- gallery_dl/extractor/simpcity.py | 7 +- gallery_dl/extractor/thehentaiworld.py | 26 ++--- gallery_dl/extractor/twitter.py | 3 +- gallery_dl/extractor/weibo.py | 36 +++++-- gallery_dl/extractor/wikimedia.py | 34 +++++-- gallery_dl/extractor/zerochan.py | 35 +++++-- gallery_dl/postprocessor/common.py | 8 +- gallery_dl/postprocessor/exec.py | 3 +- gallery_dl/postprocessor/metadata.py | 4 +- gallery_dl/postprocessor/python.py | 7 +- gallery_dl/version.py | 2 +- 27 files changed, 858 insertions(+), 199 deletions(-) create mode 100644 gallery_dl/extractor/mangafire.py create mode 100644 gallery_dl/extractor/mangareader.py create mode 100644 gallery_dl/extractor/s3ndpics.py (limited to 'gallery_dl') diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 5d6c3d7..6c19e23 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -27,8 +27,11 @@ from . import aes, text, util SUPPORTED_BROWSERS_CHROMIUM = { "brave", "chrome", "chromium", "edge", "opera", "thorium", "vivaldi"} SUPPORTED_BROWSERS_FIREFOX = {"firefox", "librewolf", "zen"} +SUPPORTED_BROWSERS_WEBKIT = {"safari", "orion"} SUPPORTED_BROWSERS = \ - SUPPORTED_BROWSERS_CHROMIUM | SUPPORTED_BROWSERS_FIREFOX | {"safari"} + SUPPORTED_BROWSERS_CHROMIUM \ + | SUPPORTED_BROWSERS_FIREFOX \ + | SUPPORTED_BROWSERS_WEBKIT logger = logging.getLogger("cookies") @@ -38,8 +41,8 @@ def load_cookies(browser_specification): _parse_browser_specification(*browser_specification) if browser_name in SUPPORTED_BROWSERS_FIREFOX: return load_cookies_firefox(browser_name, profile, container, domain) - elif browser_name == "safari": - return load_cookies_safari(profile, domain) + elif browser_name in SUPPORTED_BROWSERS_WEBKIT: + return load_cookies_webkit(browser_name, profile, domain) elif browser_name in SUPPORTED_BROWSERS_CHROMIUM: return load_cookies_chromium(browser_name, profile, keyring, domain) else: @@ -92,7 +95,7 @@ def load_cookies_firefox(browser_name, profile=None, return cookies -def load_cookies_safari(profile=None, domain=None): +def load_cookies_webkit(browser_name, profile=None, domain=None): """Ref.: https://github.com/libyal/dtformats/blob /main/documentation/Safari%20Cookies.asciidoc - This data appears to be out of date @@ -100,15 +103,24 @@ def load_cookies_safari(profile=None, domain=None): - There are a few bytes here and there which are skipped during parsing """ - with _safari_cookies_database() as fp: - data = fp.read() - page_sizes, body_start = _safari_parse_cookies_header(data) + if browser_name == "safari": + with _safari_cookies_database() as fp: + data = fp.read() + elif browser_name == "orion": + with _orion_cookies_database() as fp: + data = fp.read() + else: + raise ValueError(f"unknown webkit browser '{browser_name}'") + + page_sizes, body_start = _webkit_parse_cookies_header(data) p = DataParser(data[body_start:]) cookies = [] for page_size in page_sizes: - _safari_parse_cookies_page(p.read_bytes(page_size), cookies) - _log_info("Extracted %s cookies from Safari", len(cookies)) + _webkit_parse_cookies_page(p.read_bytes(page_size), cookies) + _log_info("Extracted %s cookies from %s", + browser_name.capitalize(), len(cookies)) + return cookies @@ -278,7 +290,8 @@ def _firefox_browser_directory(browser_name): # -------------------------------------------------------------------- -# safari +# safari/orion/webkit + def _safari_cookies_database(): try: @@ -291,7 +304,13 @@ def _safari_cookies_database(): return open(path, "rb") -def _safari_parse_cookies_header(data): +def _orion_cookies_database(): + path = os.path.expanduser( + "~/Library/HTTPStorages/com.kagi.kagimacOS.binarycookies") + return open(path, "rb") + + +def _webkit_parse_cookies_header(data): p = DataParser(data) p.expect_bytes(b"cook", "database signature") number_of_pages = p.read_uint(big_endian=True) @@ -300,7 +319,7 @@ def _safari_parse_cookies_header(data): return page_sizes, p.cursor -def _safari_parse_cookies_page(data, cookies, domain=None): +def _webkit_parse_cookies_page(data, cookies, domain=None): p = DataParser(data) p.expect_bytes(b"\x00\x00\x01\x00", "page signature") number_of_cookies = p.read_uint() @@ -313,13 +332,13 @@ def _safari_parse_cookies_page(data, cookies, domain=None): for i, record_offset in enumerate(record_offsets): p.skip_to(record_offset, "space between records") - record_length = _safari_parse_cookies_record( + record_length = _webkit_parse_cookies_record( data[record_offset:], cookies, domain) p.read_bytes(record_length) p.skip_to_end("space in between pages") -def _safari_parse_cookies_record(data, cookies, host=None): +def _webkit_parse_cookies_record(data, cookies, host=None): p = DataParser(data) record_size = p.read_uint() p.skip(4, "unknown record field 1") @@ -355,7 +374,7 @@ def _safari_parse_cookies_record(data, cookies, host=None): p.skip_to(value_offset) value = p.read_cstring() except UnicodeDecodeError: - _log_warning("Failed to parse Safari cookie") + _log_warning("Failed to parse WebKit cookie") return record_size p.skip_to(record_size, "space at the end of the record") diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 111fd9b..248bf70 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -413,7 +413,7 @@ class HttpDownloader(DownloaderBase): def _find_extension(self, response): """Get filename extension from MIME type""" mtype = response.headers.get("Content-Type", "image/jpeg") - mtype = mtype.partition(";")[0] + mtype = mtype.partition(";")[0].lower() if "/" not in mtype: mtype = "image/" + mtype @@ -475,6 +475,10 @@ MIME_TYPES = { "audio/ogg" : "ogg", "audio/mpeg" : "mp3", + "application/vnd.apple.mpegurl": "m3u8", + "application/x-mpegurl" : "m3u8", + "application/dash+xml" : "mpd", + "application/zip" : "zip", "application/x-zip": "zip", "application/x-zip-compressed": "zip", @@ -526,6 +530,8 @@ SIGNATURE_CHECKS = { s[8:12] == b"WAVE"), "mp3" : lambda s: (s[0:3] == b"ID3" or s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")), + "m3u8": lambda s: s[0:7] == b"#EXTM3U", + "mpd" : lambda s: b"").rpartition(">")[2]), @@ -144,7 +140,8 @@ class CheveretoAlbumExtractor(CheveretoExtractor): def items(self): url = self.root + self.path - data = {"_extractor": CheveretoImageExtractor} + data_image = {"_extractor": CheveretoImageExtractor} + data_video = {"_extractor": CheveretoVideoExtractor} if self.path.endswith("/sub"): albums = self._pagination(url) @@ -152,8 +149,9 @@ class CheveretoAlbumExtractor(CheveretoExtractor): albums = (url,) for album in albums: - for image in self._pagination(album): - yield Message.Queue, image, data + for item_url in self._pagination(album): + data = data_video if "/video/" in item_url else data_image + yield Message.Queue, item_url, data class CheveretoCategoryExtractor(CheveretoExtractor): diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index fccc466..817d2c4 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -125,8 +125,18 @@ class ImxtoGalleryExtractor(ImagehostImageExtractor): "title": text.unescape(title.partition(">")[2]).strip(), } - for url in text.extract_iter(page, "Last' in page: + return + + params["page"] += 1 + page = self.request(self.page_url, params=params).text class AcidimgImageExtractor(ImagehostImageExtractor): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 00e06b5..0e6c480 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -39,7 +39,6 @@ class InstagramExtractor(Extractor): self.www_claim = "0" self.csrf_token = util.generate_token() self._find_tags = util.re(r"#\w+").findall - self._warn_video_ua = True self._logged_in = True self._cursor = None self._user = None @@ -52,6 +51,12 @@ class InstagramExtractor(Extractor): else: self.api = InstagramRestAPI(self) + self._warn_video = True if self.config("warn-videos", True) else False + self._warn_image = ( + 9 if not (wi := self.config("warn-images", True)) else + 1 if wi in ("all", "both") else + 0) + def items(self): self.login() @@ -172,6 +177,7 @@ class InstagramExtractor(Extractor): "post_id": reel_id, "post_shortcode": shortcode_from_id(reel_id), "post_url": post_url, + "type": "story" if expires else "highlight", } if "title" in post: data["highlight_title"] = post["title"] @@ -182,7 +188,6 @@ class InstagramExtractor(Extractor): data = { "post_id" : post["pk"], "post_shortcode": post["code"], - "post_url": f"{self.root}/p/{post['code']}/", "likes": post.get("like_count", 0), "liked": post.get("has_liked", False), "pinned": self._extract_pinned(post), @@ -239,8 +244,8 @@ class InstagramExtractor(Extractor): manifest = item.get("video_dash_manifest") media = video - if self._warn_video_ua: - self._warn_video_ua = False + if self._warn_video: + self._warn_video = False pattern = text.re( r"Chrome/\d{3,}\.\d+\.\d+\.\d+(?!\d* Mobile)") if not pattern.search(self.session.headers["User-Agent"]): @@ -250,8 +255,9 @@ class InstagramExtractor(Extractor): video = manifest = None media = image - if image["width"] < item.get("original_width", 0) or \ - image["height"] < item.get("original_height", 0): + if self._warn_image < ( + (image["width"] < item.get("original_width", 0)) + + (image["height"] < item.get("original_height", 0))): self.log.warning( "%s: Available image resolutions lower than the " "original (%sx%s < %sx%s). " @@ -278,7 +284,7 @@ class InstagramExtractor(Extractor): if manifest is not None: media["_ytdl_manifest_data"] = manifest if "owner" in item: - media["owner2"] = item["owner"] + media["owner"] = item["owner"] if "reshared_story_media_author" in item: media["author"] = item["reshared_story_media_author"] if "expiring_at" in item: @@ -287,6 +293,14 @@ class InstagramExtractor(Extractor): self._extract_tagged_users(item, media) files.append(media) + if "type" not in data: + if len(files) == 1 and files[0]["video_url"]: + data["type"] = "reel" + data["post_url"] = f"{self.root}/reel/{post['code']}/" + else: + data["type"] = "post" + data["post_url"] = f"{self.root}/p/{post['code']}/" + return data def _parse_post_graphql(self, post): @@ -443,6 +457,32 @@ class InstagramExtractor(Extractor): user[key] = 0 +class InstagramPostExtractor(InstagramExtractor): + """Extractor for an Instagram post""" + subcategory = "post" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/(?:share/()|[^/?#]+/)?(?:p|tv|reels?())/([^/?#]+)") + example = "https://www.instagram.com/p/abcdefg/" + + def __init__(self, match): + if match[2] is not None: + self.subcategory = "reel" + InstagramExtractor.__init__(self, match) + + def posts(self): + share, reel, shortcode = self.groups + if share is not None: + url = text.ensure_http_scheme(self.url) + headers = { + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "same-origin", + } + location = self.request_location(url, headers=headers) + shortcode = location.split("/")[-2] + return self.api.media(shortcode) + + class InstagramUserExtractor(Dispatch, InstagramExtractor): """Extractor for an Instagram user profile""" pattern = USER_PATTERN + r"/?(?:$|[?#])" @@ -740,27 +780,6 @@ class InstagramAvatarExtractor(InstagramExtractor): },) -class InstagramPostExtractor(InstagramExtractor): - """Extractor for an Instagram post""" - subcategory = "post" - pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/(?:share/()|[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)") - example = "https://www.instagram.com/p/abcdefg/" - - def posts(self): - share, shortcode = self.groups - if share is not None: - url = text.ensure_http_scheme(self.url) - headers = { - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "navigate", - "Sec-Fetch-Site": "same-origin", - } - location = self.request_location(url, headers=headers) - shortcode = location.split("/")[-2] - return self.api.media(shortcode) - - class InstagramRestAPI(): def __init__(self, extractor): diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index fbed328..30d6848 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -39,7 +39,7 @@ class MangadexExtractor(Extractor): data = self._transform(chapter) data["_extractor"] = MangadexChapterExtractor self._cache[uuid] = data - yield Message.Queue, self.root + "/chapter/" + uuid, data + yield Message.Queue, f"{self.root}/chapter/{uuid}", data def _items_manga(self): data = {"_extractor": MangadexMangaExtractor} @@ -51,13 +51,8 @@ class MangadexExtractor(Extractor): relationships = defaultdict(list) for item in chapter["relationships"]: relationships[item["type"]].append(item) - manga = self.api.manga(relationships["manga"][0]["id"]) - for item in manga["relationships"]: - relationships[item["type"]].append(item) cattributes = chapter["attributes"] - mattributes = manga["attributes"] - if lang := cattributes.get("translatedLanguage"): lang = lang.partition("-")[0] @@ -66,35 +61,21 @@ class MangadexExtractor(Extractor): else: chnum, sep, minor = 0, "", "" - data = { - "manga" : (mattributes["title"].get("en") or - next(iter(mattributes["title"].values()))), - "manga_id": manga["id"], + return { + **_manga_info(self, relationships["manga"][0]["id"]), "title" : cattributes["title"], "volume" : text.parse_int(cattributes["volume"]), "chapter" : text.parse_int(chnum), - "chapter_minor": sep + minor, + "chapter_minor": f"{sep}{minor}", "chapter_id": chapter["id"], "date" : text.parse_datetime(cattributes["publishAt"]), + "group" : [group["attributes"]["name"] + for group in relationships["scanlation_group"]], "lang" : lang, - "language": util.code_to_language(lang), "count" : cattributes["pages"], "_external_url": cattributes.get("externalUrl"), } - data["artist"] = [artist["attributes"]["name"] - for artist in relationships["artist"]] - data["author"] = [author["attributes"]["name"] - for author in relationships["author"]] - data["group"] = [group["attributes"]["name"] - for group in relationships["scanlation_group"]] - - data["status"] = mattributes["status"] - data["tags"] = [tag["attributes"]["name"]["en"] - for tag in mattributes["tags"]] - - return data - class MangadexCoversExtractor(MangadexExtractor): """Extractor for mangadex manga covers""" @@ -103,7 +84,7 @@ class MangadexCoversExtractor(MangadexExtractor): filename_fmt = "{volume:>02}_{lang}.{extension}" archive_fmt = "c_{cover_id}" pattern = (rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" - r"(?:/[^/?#]+)?\?tab=art") + rf"(?:/[^/?#]+)?\?tab=art") example = ("https://mangadex.org/title" "/01234567-89ab-cdef-0123-456789abcdef?tab=art") @@ -121,24 +102,10 @@ class MangadexCoversExtractor(MangadexExtractor): relationships = defaultdict(list) for item in cover["relationships"]: relationships[item["type"]].append(item) - manga = self.api.manga(relationships["manga"][0]["id"]) - for item in manga["relationships"]: - relationships[item["type"]].append(item) - cattributes = cover["attributes"] - mattributes = manga["attributes"] return { - "manga" : (mattributes["title"].get("en") or - next(iter(mattributes["title"].values()))), - "manga_id": manga["id"], - "status" : mattributes["status"], - "author" : [author["attributes"]["name"] - for author in relationships["author"]], - "artist" : [artist["attributes"]["name"] - for artist in relationships["artist"]], - "tags" : [tag["attributes"]["name"]["en"] - for tag in mattributes["tags"]], + **_manga_info(self, relationships["manga"][0]["id"]), "cover" : cattributes["fileName"], "lang" : cattributes.get("locale"), "volume" : text.parse_int(cattributes["volume"]), @@ -150,7 +117,7 @@ class MangadexCoversExtractor(MangadexExtractor): class MangadexChapterExtractor(MangadexExtractor): """Extractor for manga-chapters from mangadex.org""" subcategory = "chapter" - pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" + pattern = rf"{BASE_PATTERN}/chapter/([0-9a-f-]+)" example = ("https://mangadex.org/chapter" "/01234567-89ab-cdef-0123-456789abcdef") @@ -177,13 +144,13 @@ class MangadexChapterExtractor(MangadexExtractor): "page-reverse") else enumerate for data["page"], page in enum(chapter["data"], 1): text.nameext_from_url(page, data) - yield Message.Url, base + page, data + yield Message.Url, f"{base}{page}", data class MangadexMangaExtractor(MangadexExtractor): """Extractor for manga from mangadex.org""" subcategory = "manga" - pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" + pattern = rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" example = ("https://mangadex.org/title" "/01234567-89ab-cdef-0123-456789abcdef") @@ -194,7 +161,7 @@ class MangadexMangaExtractor(MangadexExtractor): class MangadexFeedExtractor(MangadexExtractor): """Extractor for chapters from your Updates Feed""" subcategory = "feed" - pattern = BASE_PATTERN + r"/titles?/feed$()" + pattern = rf"{BASE_PATTERN}/titles?/feed$()" example = "https://mangadex.org/title/feed" def chapters(self): @@ -204,7 +171,7 @@ class MangadexFeedExtractor(MangadexExtractor): class MangadexFollowingExtractor(MangadexExtractor): """Extractor for followed manga from your Library""" subcategory = "following" - pattern = BASE_PATTERN + r"/titles?/follows(?:\?([^#]+))?$" + pattern = rf"{BASE_PATTERN}/titles?/follows(?:\?([^#]+))?$" example = "https://mangadex.org/title/follows" items = MangadexExtractor._items_manga @@ -216,8 +183,8 @@ class MangadexFollowingExtractor(MangadexExtractor): class MangadexListExtractor(MangadexExtractor): """Extractor for mangadex MDLists""" subcategory = "list" - pattern = (BASE_PATTERN + - r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?") + pattern = (rf"{BASE_PATTERN}" + rf"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?") example = ("https://mangadex.org/list" "/01234567-89ab-cdef-0123-456789abcdef/NAME") @@ -242,7 +209,7 @@ class MangadexListExtractor(MangadexExtractor): class MangadexAuthorExtractor(MangadexExtractor): """Extractor for mangadex authors""" subcategory = "author" - pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)" + pattern = rf"{BASE_PATTERN}/author/([0-9a-f-]+)" example = ("https://mangadex.org/author" "/01234567-89ab-cdef-0123-456789abcdef/NAME") @@ -280,30 +247,30 @@ class MangadexAPI(): else text.ensure_http_scheme(server).rstrip("/")) def athome_server(self, uuid): - return self._call("/at-home/server/" + uuid) + return self._call(f"/at-home/server/{uuid}") def author(self, uuid, manga=False): params = {"includes[]": ("manga",)} if manga else None - return self._call("/author/" + uuid, params)["data"] + return self._call(f"/author/{uuid}", params)["data"] def chapter(self, uuid): params = {"includes[]": ("scanlation_group",)} - return self._call("/chapter/" + uuid, params)["data"] + return self._call(f"/chapter/{uuid}", params)["data"] def covers_manga(self, uuid): params = {"manga[]": uuid} return self._pagination_covers("/cover", params) def list(self, uuid): - return self._call("/list/" + uuid, None, True)["data"] + return self._call(f"/list/{uuid}", None, True)["data"] def list_feed(self, uuid): - return self._pagination_chapters("/list/" + uuid + "/feed", None, True) + return self._pagination_chapters(f"/list/{uuid}/feed", None, True) @memcache(keyarg=1) def manga(self, uuid): params = {"includes[]": ("artist", "author")} - return self._call("/manga/" + uuid, params)["data"] + return self._call(f"/manga/{uuid}", params)["data"] def manga_author(self, uuid_author): params = {"authorOrArtist": uuid_author} @@ -315,7 +282,7 @@ class MangadexAPI(): "order[volume]" : order, "order[chapter]": order, } - return self._pagination_chapters("/manga/" + uuid + "/feed", params) + return self._pagination_chapters(f"/manga/{uuid}/feed", params) def user_follows_manga(self): params = {"contentRating": None} @@ -366,17 +333,17 @@ class MangadexAPI(): _refresh_token_cache.update( (username, "personal"), data["refresh_token"]) - return "Bearer " + access_token + return f"Bearer {access_token}" @cache(maxage=900, keyarg=1) def _authenticate_impl_legacy(self, username, password): if refresh_token := _refresh_token_cache(username): self.extractor.log.info("Refreshing access token") - url = self.root + "/auth/refresh" + url = f"{self.root}/auth/refresh" json = {"token": refresh_token} else: self.extractor.log.info("Logging in as %s", username) - url = self.root + "/auth/login" + url = f"{self.root}/auth/login" json = {"username": username, "password": password} self.extractor.log.debug("Using legacy login method") @@ -387,10 +354,10 @@ class MangadexAPI(): if refresh_token != data["token"]["refresh"]: _refresh_token_cache.update(username, data["token"]["refresh"]) - return "Bearer " + data["token"]["session"] + return f"Bearer {data['token']['session']}" def _call(self, endpoint, params=None, auth=False): - url = self.root + endpoint + url = f"{self.root}{endpoint}" headers = self.headers_auth if auth else self.headers while True: @@ -470,3 +437,33 @@ class MangadexAPI(): @cache(maxage=90*86400, keyarg=0) def _refresh_token_cache(username): return None + + +@memcache(keyarg=1) +def _manga_info(self, uuid): + manga = self.api.manga(uuid) + + rel = defaultdict(list) + for item in manga["relationships"]: + rel[item["type"]].append(item) + mattr = manga["attributes"] + + return { + "manga" : (mattr["title"].get("en") or + next(iter(mattr["title"].values()))), + "manga_id": manga["id"], + "manga_titles": [t.popitem()[1] + for t in mattr.get("altTitles") or ()], + "manga_date" : text.parse_datetime(mattr.get("createdAt")), + "description" : (mattr["description"].get("en") or + next(iter(mattr["description"].values()))), + "demographic": mattr.get("publicationDemographic"), + "origin": mattr.get("originalLanguage"), + "status": mattr.get("status"), + "year" : mattr.get("year"), + "rating": mattr.get("contentRating"), + "links" : mattr.get("links"), + "tags" : [tag["attributes"]["name"]["en"] for tag in mattr["tags"]], + "artist": [artist["attributes"]["name"] for artist in rel["artist"]], + "author": [author["attributes"]["name"] for author in rel["author"]], + } diff --git a/gallery_dl/extractor/mangafire.py b/gallery_dl/extractor/mangafire.py new file mode 100644 index 0000000..5ccb732 --- /dev/null +++ b/gallery_dl/extractor/mangafire.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://mangafire.to/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to" + + +class MangafireBase(): + """Base class for mangafire extractors""" + category = "mangafire" + root = "https://mangafire.to" + + +class MangafireChapterExtractor(MangafireBase, ChapterExtractor): + """Extractor for mangafire manga chapters""" + directory_fmt = ( + "{category}", "{manga}", + "{volume:?v/ />02}{chapter:?c//>03}{chapter_minor:?//}{title:?: //}") + filename_fmt = ( + "{manga}{volume:?_v//>02}{chapter:?_c//>03}{chapter_minor:?//}_" + "{page:>03}.{extension}") + archive_fmt = ( + "{manga_id}_{chapter_id}_{page}") + pattern = (rf"{BASE_PATTERN}/read/([\w-]+\.(\w+))/([\w-]+)" + rf"/((chapter|volume)-\d+(?:\D.*)?)") + example = "https://mangafire.to/read/MANGA.ID/LANG/chapter-123" + + def metadata(self, _): + manga_path, manga_id, lang, chapter_info, self.type = self.groups + + try: + chapters = _manga_chapters(self, (manga_id, self.type, lang)) + anchor = chapters[chapter_info] + except KeyError: + raise exception.NotFoundError("chapter") + self.chapter_id = text.extr(anchor, 'data-id="', '"') + + return { + **_manga_info(self, manga_path), + **_chapter_info(anchor), + } + + def images(self, page): + url = f"{self.root}/ajax/read/{self.type}/{self.chapter_id}" + headers = {"x-requested-with": "XMLHttpRequest"} + data = self.request_json(url, headers=headers) + + return [ + (image[0], None) + for image in data["result"]["images"] + ] + + +class MangafireMangaExtractor(MangafireBase, MangaExtractor): + """Extractor for mangafire manga""" + chapterclass = MangafireChapterExtractor + pattern = rf"{BASE_PATTERN}/manga/([\w-]+)\.(\w+)" + example = "https://mangafire.to/manga/MANGA.ID" + + def chapters(self, page): + manga_slug, manga_id = self.groups + lang = self.config("lang") or "en" + + manga = _manga_info(self, f"{manga_slug}.{manga_id}") + chapters = _manga_chapters(self, (manga_id, "chapter", lang)) + + return [ + (f"""{self.root}{text.extr(anchor, 'href="', '"')}""", { + **manga, + **_chapter_info(anchor), + }) + for anchor in chapters.values() + ] + + +@memcache(keyarg=1) +def _manga_info(self, manga_path, page=None): + if page is None: + url = f"{self.root}/manga/{manga_path}" + page = self.request(url).text + slug, _, mid = manga_path.rpartition(".") + + extr = text.extract_from(page) + manga = { + "cover": text.extr(extr( + 'class="poster">', ''), 'src="', '"'), + "status": extr("

", "<").replace("_", " ").title(), + "manga" : text.unescape(extr( + 'itemprop="name">', "<")), + "manga_id": mid, + "manga_slug": slug, + "manga_titles": text.unescape(extr( + "

", "<")).split("; "), + "type": text.remove_html(extr( + 'class="min-info">', "")), + "author": text.unescape(text.remove_html(extr( + "Author:", ""))).split(" , "), + "published": text.remove_html(extr( + "Published:", "")), + "tags": text.split_html(extr( + "Genres:", ""))[::2], + "publisher": text.unescape(text.remove_html(extr( + "Mangazines:", ""))).split(" , "), + "score": text.parse_float(text.remove_html(extr( + 'class="score">', " / "))), + "description": text.remove_html(extr( + 'id="synopsis">', "