diff options
| author | 2025-09-23 07:44:37 -0400 | |
|---|---|---|
| committer | 2025-09-23 07:44:37 -0400 | |
| commit | 42b62671fabfdcf983a9575221420d85f7fbcac1 (patch) | |
| tree | fa6b2af249a7216aae5c70a926c6d08be1ac55a6 /gallery_dl/extractor/mangataro.py | |
| parent | 3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (diff) | |
New upstream version 1.30.8.upstream/1.30.8
Diffstat (limited to 'gallery_dl/extractor/mangataro.py')
| -rw-r--r-- | gallery_dl/extractor/mangataro.py | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mangataro.py b/gallery_dl/extractor/mangataro.py new file mode 100644 index 0000000..f4cc058 --- /dev/null +++ b/gallery_dl/extractor/mangataro.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://mangataro.org/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?mangataro\.org" + + +class MangataroBase(): + """Base class for mangataro extractors""" + category = "mangataro" + root = "https://mangataro.org" + + +class MangataroChapterExtractor(MangataroBase, ChapterExtractor): + """Extractor for mangataro manga chapters""" + pattern = rf"{BASE_PATTERN}(/read/([^/?#]+)/(?:[^/?#]*-)?(\d+))" + example = "https://mangataro.org/read/MANGA/ch123-12345" + + def metadata(self, page): + _, slug, chapter_id = self.groups + comic = self._extract_jsonld(page)["@graph"][0] + chapter = comic["position"] + minor = chapter - int(chapter) + desc = comic["description"].split(" - ", 3) + + return { + **_manga_info(self, slug), + "title" : desc[1] if len(desc) > 3 else "", + "chapter" : int(chapter), + "chapter_minor": str(round(minor, 5))[1:] if minor else "", + "chapter_id" : text.parse_int(chapter_id), + "chapter_url" : comic["url"], + "date" : text.parse_datetime( + comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"), + "date_updated" : text.parse_datetime( + comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"), + } + + def images(self, page): + pos = page.find('class="comic-image-container') + img, pos = text.extract(page, ' src="', '"', pos) + + images = [(img, None)] + images.extend( + (url, None) + for url in text.extract_iter(page, 'data-src="', '"', pos) + ) + return images + + +class MangataroMangaExtractor(MangataroBase, MangaExtractor): + """Extractor for mangataro manga""" + chapterclass = MangataroChapterExtractor + pattern = rf"{BASE_PATTERN}(/manga/([^/?#]+))" + example = "https://mangataro.org/manga/MANGA" + + def chapters(self, page): + slug = self.groups[1] + manga = _manga_info(self, slug) + + results = [] + for url in text.extract_iter(text.extr( + page, '<div class="chapter-list', '<div id="tab-gallery"'), + '<a href="', '"'): + chapter, _, chapter_id = url[url.rfind("/")+3:].rpartition("-") + chapter, sep, minor = chapter.partition("-") + results.append((url, { + **manga, + "chapter" : text.parse_int(chapter), + "chapter_minor": f".{minor}" if sep else "", + "chapter_id" : text.parse_int(chapter_id), + })) + return results + + +@memcache(keyarg=1) +def _manga_info(self, slug): + url = f"{self.root}/manga/{slug}" + page = self.request(url).text + manga = self._extract_jsonld(page) + + return { + "manga" : manga["name"].rpartition(" | ")[0].rpartition(" ")[0], + "manga_url" : manga["url"], + "cover" : manga["image"], + "author" : manga["author"]["name"].split(", "), + "genre" : manga["genre"], + "status" : manga["status"], + "description": text.unescape(text.extr( + page, 'id="description-content-tab">', "</div></div>")), + "tags" : text.split_html(text.extr( + page, ">Genres</h4>", "</div>")), + "publisher" : text.remove_html(text.extr( + page, '>Serialization</h4>', "</div>")), + } |
