diff options
| author | 2025-05-05 01:19:04 -0400 | |
|---|---|---|
| committer | 2025-05-05 01:19:04 -0400 | |
| commit | 2486bca7db446271312f1fc0f46b032154c65f1b (patch) | |
| tree | 86deb50b258b2dab02936802b79d1af7e3c254ab /gallery_dl/extractor/manganelo.py | |
| parent | f98d637baa18530edb64e5f71bb9feefbd9e80b4 (diff) | |
| parent | c679cd7a13bdbf6896e53d68fe2093910bc6625a (diff) | |
Update upstream source from tag 'upstream/1.29.6'
Update to upstream version '1.29.6'
with Debian dir fb955c7c635591b07c8b52773c233ca312089e7a
Diffstat (limited to 'gallery_dl/extractor/manganelo.py')
| -rw-r--r-- | gallery_dl/extractor/manganelo.py | 179 |
1 files changed, 100 insertions, 79 deletions
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py index 232b98d..5e92aee 100644 --- a/gallery_dl/extractor/manganelo.py +++ b/gallery_dl/extractor/manganelo.py @@ -1,107 +1,128 @@ # -*- coding: utf-8 -*- +# Copyright 2020 Jake Mannens +# Copyright 2021-2025 Mike Fährmann +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://manganato.com/""" +"""Extractors for https://www.mangakakalot.gg/ and mirror sites""" -from .common import ChapterExtractor, MangaExtractor -from .. import text -import re +from .common import BaseExtractor, ChapterExtractor, MangaExtractor +from .. import text, util -BASE_PATTERN = ( - r"(?:https?://)?" - r"((?:chap|read|www\.|m\.)?mangan(?:at|el)o" - r"\.(?:to|com))" -) +class ManganeloExtractor(BaseExtractor): + basecategory = "manganelo" -class ManganeloBase(): - category = "manganelo" - root = "https://chapmanganato.com" - _match_chapter = None - def __init__(self, match): - domain, path = match.groups() - super().__init__(match, "https://" + domain + path) - - def _init(self): - if self._match_chapter is None: - ManganeloBase._match_chapter = re.compile( - r"(?:[Vv]ol\.?\s*(\d+)\s?)?" - r"[Cc]hapter\s*(\d+)([^:]*)" - r"(?::\s*(.+))?").match - - def _parse_chapter(self, info, manga, author, date=None): - match = self._match_chapter(info) - if match: - volume, chapter, minor, title = match.groups() - else: - volume = chapter = minor = "" - title = info - - return { - "manga" : manga, - "author" : author, - "date" : date, - "title" : text.unescape(title) if title else "", - "volume" : text.parse_int(volume), - "chapter" : text.parse_int(chapter), - "chapter_minor": minor, - "lang" : "en", - "language" : "English", - } +BASE_PATTERN = ManganeloExtractor.update({ + "nelomanga": { + "root" : "https://www.nelomanga.net", + "pattern": r"(?:www\.)?nelomanga\.net", + }, + "natomanga": { + "root" : "https://www.natomanga.com", + "pattern": r"(?:www\.)?natomanga\.com", + }, + "manganato": { + "root" : "https://www.manganato.gg", + "pattern": r"(?:www\.)?manganato\.gg", + }, + "mangakakalot": { + "root" : "https://www.mangakakalot.gg", + "pattern": r"(?:www\.)?mangakakalot\.gg", + }, +}) -class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor): - """Extractor for manga chapters from manganelo.com""" - pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)" - example = "https://chapmanganato.com/manga-ID/chapter-01" +class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor): + """Extractor for manganelo manga chapters""" + pattern = BASE_PATTERN + r"(/manga/[^/?#]+/chapter-[^/?#]+)" + example = "https://www.mangakakalot.gg/manga/MANGA_NAME/chapter-123" + + def __init__(self, match): + ManganeloExtractor.__init__(self, match) + self.gallery_url = self.root + self.groups[-1] def metadata(self, page): extr = text.extract_from(page) - extr('class="a-h"', ">") - manga = extr('title="', '"') - info = extr('title="', '"') - author = extr("- Author(s) : ", "</p>") - return self._parse_chapter( - info, text.unescape(manga), text.unescape(author)) + data = { + "date" : text.parse_datetime(extr( + '"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), + "date_updated": text.parse_datetime(extr( + '"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), + "manga_id" : text.parse_int(extr("comic_id =", ";")), + "chapter_id" : text.parse_int(extr("chapter_id =", ";")), + "manga" : extr("comic_name =", ";").strip('" '), + "lang" : "en", + "language" : "English", + } + + chapter_name = extr("chapter_name =", ";").strip('" ') + chapter, sep, minor = chapter_name.rpartition(" ")[2].partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + data["author"] = extr(". Author:", " already has ").strip() + + return data def images(self, page): - page = text.extr( - page, 'class="container-chapter-reader', 'class="container') + extr = text.extract_from(page) + cdns = util.json_loads(extr("var cdns =", ";"))[0] + imgs = util.json_loads(extr("var chapterImages =", ";")) + + if cdns[-1] != "/": + cdns += "/" + return [ - (url, None) - for url in text.extract_iter(page, '<img src="', '"') - if not url.endswith("/gohome.png") - ] or [ - (url, None) - for url in text.extract_iter( - page, '<img class="reader-content" src="', '"') + (cdns + path, None) + for path in imgs ] -class ManganeloMangaExtractor(ManganeloBase, MangaExtractor): - """Extractor for manga from manganelo.com""" +class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor): + """Extractor for manganelo manga""" chapterclass = ManganeloChapterExtractor - pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$" - example = "https://manganato.com/manga-ID" + pattern = BASE_PATTERN + r"(/manga/[^/?#]+)$" + example = "https://www.mangakakalot.gg/manga/MANGA_NAME" - def chapters(self, page): - results = [] - append = results.append + def __init__(self, match): + ManganeloExtractor.__init__(self, match) + self.manga_url = self.root + self.groups[-1] + def chapters(self, page): extr = text.extract_from(page) + manga = text.unescape(extr("<h1>", "<")) - author = text.remove_html(extr("</i>Author(s) :</td>", "</tr>")) - - extr('class="row-content-chapter', '') - while True: - url = extr('class="chapter-name text-nowrap" href="', '"') - if not url: - return results - info = extr(">", "<") - date = extr('class="chapter-time text-nowrap" title="', '"') - append((url, self._parse_chapter(info, manga, author, date))) + author = text.remove_html(extr("<li>Author(s) :", "</a>")) + status = extr("<li>Status :", "<").strip() + update = text.parse_datetime(extr( + "<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p") + tags = text.split_html(extr(">Genres :", "</li>"))[::2] + + results = [] + for chapter in text.extract_iter(page, '<div class="row">', '</div>'): + url, pos = text.extract(chapter, '<a href="', '"') + title, pos = text.extract(chapter, '>', '</a>', pos) + date, pos = text.extract(chapter, '<span title="', '"', pos) + chapter, sep, minor = url.rpartition("/chapter-")[2].partition("-") + + if url[0] == "/": + url = self.root + url + results.append((url, { + "manga" : manga, + "author" : author, + "status" : status, + "tags" : tags, + "date_updated": update, + "chapter" : text.parse_int(chapter), + "chapter_minor": (sep and ".") + minor, + "title" : title.partition(": ")[2], + "date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"), + "lang" : "en", + "language": "English", + })) + return results |
