aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/mangareader.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/mangareader.py')
-rw-r--r--gallery_dl/extractor/mangareader.py173
1 files changed, 173 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
new file mode 100644
index 0000000..eb53998
--- /dev/null
+++ b/gallery_dl/extractor/mangareader.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangareader.to/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, util
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangareader\.to"
+
+
+class MangareaderBase():
+ """Base class for mangareader extractors"""
+ category = "mangareader"
+ root = "https://mangareader.to"
+
+
+class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
+ """Extractor for mangareader manga chapters"""
+ directory_fmt = (
+ "{category}", "{manga}",
+ "{volume:?v/ />02}{chapter:?c//>03}{chapter_minor:?//}{title:?: //}")
+ filename_fmt = (
+ "{manga}{volume:?_v//>02}{chapter:?_c//>03}{chapter_minor:?//}_"
+ "{page:>03}.{extension}")
+ archive_fmt = (
+ "{manga_id}_{chapter_id}_{page}")
+ pattern = (rf"{BASE_PATTERN}/read/([\w-]+-\d+)/([^/?#]+)"
+ rf"/(chapter|volume)-(\d+[^/?#]*)")
+ example = "https://mangareader.to/read/MANGA-123/LANG/chapter-123"
+
+ def metadata(self, _):
+ path, lang, type, chstr = self.groups
+
+ settings = util.json_dumps({
+ "readingMode" : "vertical",
+ "readingDirection": "rtl",
+ "quality" : "high",
+ })
+ self.cookies.set("mr_settings", settings, domain="mangareader.to")
+
+ url = f"{self.root}/read/{path}/{lang}/{type}-{chstr}"
+ page = self.request(url).text
+ self.cid = cid = text.extr(page, 'data-reading-id="', '"')
+
+ manga = _manga_info(self, path)
+ return {
+ **manga,
+ **manga[f"_{type}s"][lang][chstr],
+ "chapter_id": text.parse_int(cid),
+ }
+
+ def images(self, page):
+ key = "chap" if self.groups[2] == "chapter" else "vol"
+ url = f"{self.root}/ajax/image/list/{key}/{self.cid}"
+ params = {
+ "mode" : "vertical,",
+ "quality" : "high,",
+ "hozPageSize": "1,",
+ }
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ "Sec-Fetch-Dest" : "empty",
+ "Sec-Fetch-Mode" : "cors",
+ "Sec-Fetch-Site" : "same-origin",
+ }
+ html = self.request_json(url, params=params, headers=headers)["html"]
+
+ return [
+ (url, None)
+ for url in text.extract_iter(html, 'data-url="', '"')
+ ]
+
+
+class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
+ """Extractor for mangareader manga"""
+ chapterclass = MangareaderChapterExtractor
+ pattern = rf"{BASE_PATTERN}/([\w-]+-\d+)"
+ example = "https://mangareader.to/MANGA-123"
+
+ def chapters(self, page):
+ manga = _manga_info(self, self.groups[0])
+ lang = self.config("lang") or "en"
+
+ return [
+ (info["chapter_url"], {**manga, **info})
+ for info in manga["_chapters"][lang].values()
+ ]
+
+
+@memcache(keyarg=1)
+def _manga_info(self, manga_path):
+ url = f"{self.root}/{manga_path}"
+ html = self.request(url).text
+
+ slug, _, mid = manga_path.rpartition("-")
+ extr = text.extract_from(html)
+ url = extr('property="og:url" content="', '"')
+ manga = {
+ "manga_url": url,
+ "manga_slug": url.rpartition("/")[2].rpartition("-")[0],
+ "manga_id": text.parse_int(mid),
+ "manga": text.unescape(extr('class="manga-name">', "<")),
+ "manga_alt": text.unescape(extr('class="manga-name-or">', "<")),
+ "tags": text.split_html(extr('class="genres">', "</div>")),
+ "type": text.remove_html(extr('>Type:', "</div>")),
+ "status": text.remove_html(extr('>Status:', "</div>")),
+ "author": text.split_html(extr('>Authors:', "</div>"))[0::2],
+ "published": text.remove_html(extr('>Published:', "</div>")),
+ "score": text.parse_float(text.remove_html(extr(
+ '>Score:', "</div>"))),
+ "views": text.parse_int(text.remove_html(extr(
+ '>Views:', "</div>")).replace(",", "")),
+ }
+
+ base = self.root
+
+ # extract all chapters
+ html = extr('class="chapters-list-ul">', " </div>")
+ manga["_chapters"] = chapters = {}
+ for group in text.extract_iter(html, "<ul", "</ul>"):
+ lang = text.extr(group, ' id="', '-chapters"')
+
+ chapters[lang] = current = {}
+ lang = lang.partition("-")[0]
+ for ch in text.extract_iter(group, "<li ", "</li>"):
+ path = text.extr(ch, 'href="', '"')
+ chap = text.extr(ch, 'data-number="', '"')
+ name = text.unescape(text.extr(ch, 'class="name">', "<"))
+
+ chapter, sep, minor = chap.partition(".")
+ current[chap] = {
+ "title" : name.partition(":")[2].strip(),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor" : f"{sep}{minor}",
+ "chapter_string": chap,
+ "chapter_url" : f"{base}{path}",
+ "lang" : lang,
+ }
+
+ # extract all volumes
+ html = extr('class="volume-list-ul">', "</section>")
+ manga["_volumes"] = volumes = {}
+ for group in html.split('<div class="manga_list-wrap')[1:]:
+ lang = text.extr(group, ' id="', '-volumes"')
+
+ volumes[lang] = current = {}
+ lang = lang.partition("-")[0]
+ for vol in text.extract_iter(group, 'class="item">', "</div>"):
+ path = text.extr(vol, 'href="', '"')
+ voln = text.extr(vol, 'tick-vol">', '<').rpartition(" ")[2]
+
+ current[voln] = {
+ "volume" : text.parse_int(voln),
+ "volume_cover" : text.extr(vol, ' src="', '"'),
+ "chapter" : 0,
+ "chapter_minor" : "",
+ "chapter_string": voln,
+ "chapter_url" : f"{base}{path}",
+ "lang" : lang,
+ }
+
+ # extract remaining metadata
+ manga["description"] = text.unescape(extr(
+ 'class="description-modal">', "</div>")).strip()
+
+ return manga