aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/manganelo.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-05-05 01:19:04 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-05-05 01:19:04 -0400
commit2486bca7db446271312f1fc0f46b032154c65f1b (patch)
tree86deb50b258b2dab02936802b79d1af7e3c254ab /gallery_dl/extractor/manganelo.py
parentf98d637baa18530edb64e5f71bb9feefbd9e80b4 (diff)
parentc679cd7a13bdbf6896e53d68fe2093910bc6625a (diff)
Update upstream source from tag 'upstream/1.29.6'
Update to upstream version '1.29.6' with Debian dir fb955c7c635591b07c8b52773c233ca312089e7a
Diffstat (limited to 'gallery_dl/extractor/manganelo.py')
-rw-r--r--gallery_dl/extractor/manganelo.py179
1 files changed, 100 insertions, 79 deletions
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 232b98d..5e92aee 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -1,107 +1,128 @@
# -*- coding: utf-8 -*-
+# Copyright 2020 Jake Mannens
+# Copyright 2021-2025 Mike Fährmann
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://manganato.com/"""
+"""Extractors for https://www.mangakakalot.gg/ and mirror sites"""
-from .common import ChapterExtractor, MangaExtractor
-from .. import text
-import re
+from .common import BaseExtractor, ChapterExtractor, MangaExtractor
+from .. import text, util
-BASE_PATTERN = (
- r"(?:https?://)?"
- r"((?:chap|read|www\.|m\.)?mangan(?:at|el)o"
- r"\.(?:to|com))"
-)
+class ManganeloExtractor(BaseExtractor):
+ basecategory = "manganelo"
-class ManganeloBase():
- category = "manganelo"
- root = "https://chapmanganato.com"
- _match_chapter = None
- def __init__(self, match):
- domain, path = match.groups()
- super().__init__(match, "https://" + domain + path)
-
- def _init(self):
- if self._match_chapter is None:
- ManganeloBase._match_chapter = re.compile(
- r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
- r"[Cc]hapter\s*(\d+)([^:]*)"
- r"(?::\s*(.+))?").match
-
- def _parse_chapter(self, info, manga, author, date=None):
- match = self._match_chapter(info)
- if match:
- volume, chapter, minor, title = match.groups()
- else:
- volume = chapter = minor = ""
- title = info
-
- return {
- "manga" : manga,
- "author" : author,
- "date" : date,
- "title" : text.unescape(title) if title else "",
- "volume" : text.parse_int(volume),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": minor,
- "lang" : "en",
- "language" : "English",
- }
+BASE_PATTERN = ManganeloExtractor.update({
+ "nelomanga": {
+ "root" : "https://www.nelomanga.net",
+ "pattern": r"(?:www\.)?nelomanga\.net",
+ },
+ "natomanga": {
+ "root" : "https://www.natomanga.com",
+ "pattern": r"(?:www\.)?natomanga\.com",
+ },
+ "manganato": {
+ "root" : "https://www.manganato.gg",
+ "pattern": r"(?:www\.)?manganato\.gg",
+ },
+ "mangakakalot": {
+ "root" : "https://www.mangakakalot.gg",
+ "pattern": r"(?:www\.)?mangakakalot\.gg",
+ },
+})
-class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
- """Extractor for manga chapters from manganelo.com"""
- pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
- example = "https://chapmanganato.com/manga-ID/chapter-01"
+class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
+ """Extractor for manganelo manga chapters"""
+ pattern = BASE_PATTERN + r"(/manga/[^/?#]+/chapter-[^/?#]+)"
+ example = "https://www.mangakakalot.gg/manga/MANGA_NAME/chapter-123"
+
+ def __init__(self, match):
+ ManganeloExtractor.__init__(self, match)
+ self.gallery_url = self.root + self.groups[-1]
def metadata(self, page):
extr = text.extract_from(page)
- extr('class="a-h"', ">")
- manga = extr('title="', '"')
- info = extr('title="', '"')
- author = extr("- Author(s) : ", "</p>")
- return self._parse_chapter(
- info, text.unescape(manga), text.unescape(author))
+ data = {
+ "date" : text.parse_datetime(extr(
+ '"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
+ "date_updated": text.parse_datetime(extr(
+ '"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
+ "manga_id" : text.parse_int(extr("comic_id =", ";")),
+ "chapter_id" : text.parse_int(extr("chapter_id =", ";")),
+ "manga" : extr("comic_name =", ";").strip('" '),
+ "lang" : "en",
+ "language" : "English",
+ }
+
+ chapter_name = extr("chapter_name =", ";").strip('" ')
+ chapter, sep, minor = chapter_name.rpartition(" ")[2].partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+ data["author"] = extr(". Author:", " already has ").strip()
+
+ return data
def images(self, page):
- page = text.extr(
- page, 'class="container-chapter-reader', 'class="container')
+ extr = text.extract_from(page)
+ cdns = util.json_loads(extr("var cdns =", ";"))[0]
+ imgs = util.json_loads(extr("var chapterImages =", ";"))
+
+ if cdns[-1] != "/":
+ cdns += "/"
+
return [
- (url, None)
- for url in text.extract_iter(page, '<img src="', '"')
- if not url.endswith("/gohome.png")
- ] or [
- (url, None)
- for url in text.extract_iter(
- page, '<img class="reader-content" src="', '"')
+ (cdns + path, None)
+ for path in imgs
]
-class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
- """Extractor for manga from manganelo.com"""
+class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
+ """Extractor for manganelo manga"""
chapterclass = ManganeloChapterExtractor
- pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
- example = "https://manganato.com/manga-ID"
+ pattern = BASE_PATTERN + r"(/manga/[^/?#]+)$"
+ example = "https://www.mangakakalot.gg/manga/MANGA_NAME"
- def chapters(self, page):
- results = []
- append = results.append
+ def __init__(self, match):
+ ManganeloExtractor.__init__(self, match)
+ self.manga_url = self.root + self.groups[-1]
+ def chapters(self, page):
extr = text.extract_from(page)
+
manga = text.unescape(extr("<h1>", "<"))
- author = text.remove_html(extr("</i>Author(s) :</td>", "</tr>"))
-
- extr('class="row-content-chapter', '')
- while True:
- url = extr('class="chapter-name text-nowrap" href="', '"')
- if not url:
- return results
- info = extr(">", "<")
- date = extr('class="chapter-time text-nowrap" title="', '"')
- append((url, self._parse_chapter(info, manga, author, date)))
+ author = text.remove_html(extr("<li>Author(s) :", "</a>"))
+ status = extr("<li>Status :", "<").strip()
+ update = text.parse_datetime(extr(
+ "<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
+ tags = text.split_html(extr(">Genres :", "</li>"))[::2]
+
+ results = []
+ for chapter in text.extract_iter(page, '<div class="row">', '</div>'):
+ url, pos = text.extract(chapter, '<a href="', '"')
+ title, pos = text.extract(chapter, '>', '</a>', pos)
+ date, pos = text.extract(chapter, '<span title="', '"', pos)
+ chapter, sep, minor = url.rpartition("/chapter-")[2].partition("-")
+
+ if url[0] == "/":
+ url = self.root + url
+ results.append((url, {
+ "manga" : manga,
+ "author" : author,
+ "status" : status,
+ "tags" : tags,
+ "date_updated": update,
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": (sep and ".") + minor,
+ "title" : title.partition(": ")[2],
+ "date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"),
+ "lang" : "en",
+ "language": "English",
+ }))
+ return results