diff options
Diffstat (limited to 'gallery_dl/extractor/mangafire.py')
| -rw-r--r-- | gallery_dl/extractor/mangafire.py | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mangafire.py b/gallery_dl/extractor/mangafire.py new file mode 100644 index 0000000..5ccb732 --- /dev/null +++ b/gallery_dl/extractor/mangafire.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://mangafire.to/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to" + + +class MangafireBase(): + """Base class for mangafire extractors""" + category = "mangafire" + root = "https://mangafire.to" + + +class MangafireChapterExtractor(MangafireBase, ChapterExtractor): + """Extractor for mangafire manga chapters""" + directory_fmt = ( + "{category}", "{manga}", + "{volume:?v/ />02}{chapter:?c//>03}{chapter_minor:?//}{title:?: //}") + filename_fmt = ( + "{manga}{volume:?_v//>02}{chapter:?_c//>03}{chapter_minor:?//}_" + "{page:>03}.{extension}") + archive_fmt = ( + "{manga_id}_{chapter_id}_{page}") + pattern = (rf"{BASE_PATTERN}/read/([\w-]+\.(\w+))/([\w-]+)" + rf"/((chapter|volume)-\d+(?:\D.*)?)") + example = "https://mangafire.to/read/MANGA.ID/LANG/chapter-123" + + def metadata(self, _): + manga_path, manga_id, lang, chapter_info, self.type = self.groups + + try: + chapters = _manga_chapters(self, (manga_id, self.type, lang)) + anchor = chapters[chapter_info] + except KeyError: + raise exception.NotFoundError("chapter") + self.chapter_id = text.extr(anchor, 'data-id="', '"') + + return { + **_manga_info(self, manga_path), + **_chapter_info(anchor), + } + + def images(self, page): + url = f"{self.root}/ajax/read/{self.type}/{self.chapter_id}" + headers = {"x-requested-with": "XMLHttpRequest"} + data = self.request_json(url, headers=headers) + + return [ + (image[0], None) + for image in data["result"]["images"] + ] + + +class MangafireMangaExtractor(MangafireBase, MangaExtractor): + """Extractor for mangafire manga""" + chapterclass = MangafireChapterExtractor + pattern = rf"{BASE_PATTERN}/manga/([\w-]+)\.(\w+)" + example = "https://mangafire.to/manga/MANGA.ID" + + def chapters(self, page): + manga_slug, manga_id = self.groups + lang = self.config("lang") or "en" + + manga = _manga_info(self, f"{manga_slug}.{manga_id}") + chapters = _manga_chapters(self, (manga_id, "chapter", lang)) + + return [ + (f"""{self.root}{text.extr(anchor, 'href="', '"')}""", { + **manga, + **_chapter_info(anchor), + }) + for anchor in chapters.values() + ] + + +@memcache(keyarg=1) +def _manga_info(self, manga_path, page=None): + if page is None: + url = f"{self.root}/manga/{manga_path}" + page = self.request(url).text + slug, _, mid = manga_path.rpartition(".") + + extr = text.extract_from(page) + manga = { + "cover": text.extr(extr( + 'class="poster">', '</div>'), 'src="', '"'), + "status": extr("<p>", "<").replace("_", " ").title(), + "manga" : text.unescape(extr( + 'itemprop="name">', "<")), + "manga_id": mid, + "manga_slug": slug, + "manga_titles": text.unescape(extr( + "<h6>", "<")).split("; "), + "type": text.remove_html(extr( + 'class="min-info">', "</a>")), + "author": text.unescape(text.remove_html(extr( + "<span>Author:</span>", "</div>"))).split(" , "), + "published": text.remove_html(extr( + "<span>Published:</span>", "</div>")), + "tags": text.split_html(extr( + "<span>Genres:</span>", "</div>"))[::2], + "publisher": text.unescape(text.remove_html(extr( + "<span>Mangazines:</span>", "</div>"))).split(" , "), + "score": text.parse_float(text.remove_html(extr( + 'class="score">', " / "))), + "description": text.remove_html(extr( + 'id="synopsis">', "<script>")), + } + + if len(lst := manga["author"]) == 1 and not lst[0]: + manga["author"] = () + if len(lst := manga["publisher"]) == 1 and not lst[0]: + manga["publisher"] = () + + return manga + + +@memcache(keyarg=1) +def _manga_chapters(self, manga_info): + manga_id, type, lang = manga_info + url = f"{self.root}/ajax/read/{manga_id}/{type}/{lang}" + headers = {"x-requested-with": "XMLHttpRequest"} + data = self.request_json(url, headers=headers) + + needle = f"{manga_id}/{lang}/" + return { + text.extr(anchor, needle, '"'): anchor + for anchor in text.extract_iter(data["result"]["html"], "<a ", ">") + } + + +@memcache(keyarg=0) +def _chapter_info(info): + _, lang, chapter_info = text.extr(info, 'href="', '"').rsplit("/", 2) + + if chapter_info.startswith("vol"): + volume = text.extr(info, 'data-number="', '"') + volume_id = text.parse_int(text.extr(info, 'data-id="', '"')) + return { + "volume" : text.parse_int(volume), + "volume_id" : volume_id, + "chapter" : 0, + "chapter_minor" : "", + "chapter_string": chapter_info, + "chapter_id" : volume_id, + "title" : text.unescape(text.extr(info, 'title="', '"')), + "lang" : lang, + } + + chapter, sep, minor = text.extr(info, 'data-number="', '"').partition(".") + return { + "chapter" : text.parse_int(chapter), + "chapter_minor" : f"{sep}{minor}", + "chapter_string": chapter_info, + "chapter_id" : text.parse_int(text.extr(info, 'data-id="', '"')), + "title" : text.unescape(text.extr(info, 'title="', '"')), + "lang" : lang, + } |
