summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/mangataro.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-09-23 07:44:37 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-09-23 07:44:37 -0400
commit42b62671fabfdcf983a9575221420d85f7fbcac1 (patch)
treefa6b2af249a7216aae5c70a926c6d08be1ac55a6 /gallery_dl/extractor/mangataro.py
parent3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (diff)
New upstream version 1.30.8.upstream/1.30.8
Diffstat (limited to 'gallery_dl/extractor/mangataro.py')
-rw-r--r--gallery_dl/extractor/mangataro.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mangataro.py b/gallery_dl/extractor/mangataro.py
new file mode 100644
index 0000000..f4cc058
--- /dev/null
+++ b/gallery_dl/extractor/mangataro.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangataro.org/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?mangataro\.org"
+
+
+class MangataroBase():
+ """Base class for mangataro extractors"""
+ category = "mangataro"
+ root = "https://mangataro.org"
+
+
+class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
+ """Extractor for mangataro manga chapters"""
+ pattern = rf"{BASE_PATTERN}(/read/([^/?#]+)/(?:[^/?#]*-)?(\d+))"
+ example = "https://mangataro.org/read/MANGA/ch123-12345"
+
+ def metadata(self, page):
+ _, slug, chapter_id = self.groups
+ comic = self._extract_jsonld(page)["@graph"][0]
+ chapter = comic["position"]
+ minor = chapter - int(chapter)
+ desc = comic["description"].split(" - ", 3)
+
+ return {
+ **_manga_info(self, slug),
+ "title" : desc[1] if len(desc) > 3 else "",
+ "chapter" : int(chapter),
+ "chapter_minor": str(round(minor, 5))[1:] if minor else "",
+ "chapter_id" : text.parse_int(chapter_id),
+ "chapter_url" : comic["url"],
+ "date" : text.parse_datetime(
+ comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
+ "date_updated" : text.parse_datetime(
+ comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
+ }
+
+ def images(self, page):
+ pos = page.find('class="comic-image-container')
+ img, pos = text.extract(page, ' src="', '"', pos)
+
+ images = [(img, None)]
+ images.extend(
+ (url, None)
+ for url in text.extract_iter(page, 'data-src="', '"', pos)
+ )
+ return images
+
+
+class MangataroMangaExtractor(MangataroBase, MangaExtractor):
+ """Extractor for mangataro manga"""
+ chapterclass = MangataroChapterExtractor
+ pattern = rf"{BASE_PATTERN}(/manga/([^/?#]+))"
+ example = "https://mangataro.org/manga/MANGA"
+
+ def chapters(self, page):
+ slug = self.groups[1]
+ manga = _manga_info(self, slug)
+
+ results = []
+ for url in text.extract_iter(text.extr(
+ page, '<div class="chapter-list', '<div id="tab-gallery"'),
+ '<a href="', '"'):
+ chapter, _, chapter_id = url[url.rfind("/")+3:].rpartition("-")
+ chapter, sep, minor = chapter.partition("-")
+ results.append((url, {
+ **manga,
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": f".{minor}" if sep else "",
+ "chapter_id" : text.parse_int(chapter_id),
+ }))
+ return results
+
+
+@memcache(keyarg=1)
+def _manga_info(self, slug):
+ url = f"{self.root}/manga/{slug}"
+ page = self.request(url).text
+ manga = self._extract_jsonld(page)
+
+ return {
+ "manga" : manga["name"].rpartition(" | ")[0].rpartition(" ")[0],
+ "manga_url" : manga["url"],
+ "cover" : manga["image"],
+ "author" : manga["author"]["name"].split(", "),
+ "genre" : manga["genre"],
+ "status" : manga["status"],
+ "description": text.unescape(text.extr(
+ page, 'id="description-content-tab">', "</div></div>")),
+ "tags" : text.split_html(text.extr(
+ page, ">Genres</h4>", "</div>")),
+ "publisher" : text.remove_html(text.extr(
+ page, '>Serialization</h4>', "</div>")),
+ }