aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/mangafire.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/mangafire.py')
-rw-r--r--gallery_dl/extractor/mangafire.py168
1 files changed, 168 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mangafire.py b/gallery_dl/extractor/mangafire.py
new file mode 100644
index 0000000..5ccb732
--- /dev/null
+++ b/gallery_dl/extractor/mangafire.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangafire.to/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, exception
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to"
+
+
+class MangafireBase():
+ """Base class for mangafire extractors"""
+ category = "mangafire"
+ root = "https://mangafire.to"
+
+
+class MangafireChapterExtractor(MangafireBase, ChapterExtractor):
+ """Extractor for mangafire manga chapters"""
+ directory_fmt = (
+ "{category}", "{manga}",
+ "{volume:?v/ />02}{chapter:?c//>03}{chapter_minor:?//}{title:?: //}")
+ filename_fmt = (
+ "{manga}{volume:?_v//>02}{chapter:?_c//>03}{chapter_minor:?//}_"
+ "{page:>03}.{extension}")
+ archive_fmt = (
+ "{manga_id}_{chapter_id}_{page}")
+ pattern = (rf"{BASE_PATTERN}/read/([\w-]+\.(\w+))/([\w-]+)"
+ rf"/((chapter|volume)-\d+(?:\D.*)?)")
+ example = "https://mangafire.to/read/MANGA.ID/LANG/chapter-123"
+
+ def metadata(self, _):
+ manga_path, manga_id, lang, chapter_info, self.type = self.groups
+
+ try:
+ chapters = _manga_chapters(self, (manga_id, self.type, lang))
+ anchor = chapters[chapter_info]
+ except KeyError:
+ raise exception.NotFoundError("chapter")
+ self.chapter_id = text.extr(anchor, 'data-id="', '"')
+
+ return {
+ **_manga_info(self, manga_path),
+ **_chapter_info(anchor),
+ }
+
+ def images(self, page):
+ url = f"{self.root}/ajax/read/{self.type}/{self.chapter_id}"
+ headers = {"x-requested-with": "XMLHttpRequest"}
+ data = self.request_json(url, headers=headers)
+
+ return [
+ (image[0], None)
+ for image in data["result"]["images"]
+ ]
+
+
+class MangafireMangaExtractor(MangafireBase, MangaExtractor):
+ """Extractor for mangafire manga"""
+ chapterclass = MangafireChapterExtractor
+ pattern = rf"{BASE_PATTERN}/manga/([\w-]+)\.(\w+)"
+ example = "https://mangafire.to/manga/MANGA.ID"
+
+ def chapters(self, page):
+ manga_slug, manga_id = self.groups
+ lang = self.config("lang") or "en"
+
+ manga = _manga_info(self, f"{manga_slug}.{manga_id}")
+ chapters = _manga_chapters(self, (manga_id, "chapter", lang))
+
+ return [
+ (f"""{self.root}{text.extr(anchor, 'href="', '"')}""", {
+ **manga,
+ **_chapter_info(anchor),
+ })
+ for anchor in chapters.values()
+ ]
+
+
+@memcache(keyarg=1)
+def _manga_info(self, manga_path, page=None):
+ if page is None:
+ url = f"{self.root}/manga/{manga_path}"
+ page = self.request(url).text
+ slug, _, mid = manga_path.rpartition(".")
+
+ extr = text.extract_from(page)
+ manga = {
+ "cover": text.extr(extr(
+ 'class="poster">', '</div>'), 'src="', '"'),
+ "status": extr("<p>", "<").replace("_", " ").title(),
+ "manga" : text.unescape(extr(
+ 'itemprop="name">', "<")),
+ "manga_id": mid,
+ "manga_slug": slug,
+ "manga_titles": text.unescape(extr(
+ "<h6>", "<")).split("; "),
+ "type": text.remove_html(extr(
+ 'class="min-info">', "</a>")),
+ "author": text.unescape(text.remove_html(extr(
+ "<span>Author:</span>", "</div>"))).split(" , "),
+ "published": text.remove_html(extr(
+ "<span>Published:</span>", "</div>")),
+ "tags": text.split_html(extr(
+ "<span>Genres:</span>", "</div>"))[::2],
+ "publisher": text.unescape(text.remove_html(extr(
+ "<span>Mangazines:</span>", "</div>"))).split(" , "),
+ "score": text.parse_float(text.remove_html(extr(
+ 'class="score">', " / "))),
+ "description": text.remove_html(extr(
+ 'id="synopsis">', "<script>")),
+ }
+
+ if len(lst := manga["author"]) == 1 and not lst[0]:
+ manga["author"] = ()
+ if len(lst := manga["publisher"]) == 1 and not lst[0]:
+ manga["publisher"] = ()
+
+ return manga
+
+
+@memcache(keyarg=1)
+def _manga_chapters(self, manga_info):
+ manga_id, type, lang = manga_info
+ url = f"{self.root}/ajax/read/{manga_id}/{type}/{lang}"
+ headers = {"x-requested-with": "XMLHttpRequest"}
+ data = self.request_json(url, headers=headers)
+
+ needle = f"{manga_id}/{lang}/"
+ return {
+ text.extr(anchor, needle, '"'): anchor
+ for anchor in text.extract_iter(data["result"]["html"], "<a ", ">")
+ }
+
+
+@memcache(keyarg=0)
+def _chapter_info(info):
+ _, lang, chapter_info = text.extr(info, 'href="', '"').rsplit("/", 2)
+
+ if chapter_info.startswith("vol"):
+ volume = text.extr(info, 'data-number="', '"')
+ volume_id = text.parse_int(text.extr(info, 'data-id="', '"'))
+ return {
+ "volume" : text.parse_int(volume),
+ "volume_id" : volume_id,
+ "chapter" : 0,
+ "chapter_minor" : "",
+ "chapter_string": chapter_info,
+ "chapter_id" : volume_id,
+ "title" : text.unescape(text.extr(info, 'title="', '"')),
+ "lang" : lang,
+ }
+
+ chapter, sep, minor = text.extr(info, 'data-number="', '"').partition(".")
+ return {
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor" : f"{sep}{minor}",
+ "chapter_string": chapter_info,
+ "chapter_id" : text.parse_int(text.extr(info, 'data-id="', '"')),
+ "title" : text.unescape(text.extr(info, 'title="', '"')),
+ "lang" : lang,
+ }