summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/mangaread.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/mangaread.py')
-rw-r--r--gallery_dl/extractor/mangaread.py191
1 files changed, 191 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py
new file mode 100644
index 0000000..49d4d7d
--- /dev/null
+++ b/gallery_dl/extractor/mangaread.py
@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangaread.org/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, exception
+import re
+
+
+class MangareadBase():
+ """Base class for Mangaread extractors"""
+ category = "mangaread"
+ root = "https://www.mangaread.org"
+
+ @staticmethod
+ def parse_chapter_string(chapter_string, data):
+ match = re.match(
+ r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?",
+ text.unescape(chapter_string).strip())
+ manga, chapter, minor, title = match.groups()
+ manga = manga.strip() if manga else ""
+ data["manga"] = data.pop("manga", manga)
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = minor or ""
+ data["title"] = title or ""
+ data["lang"] = "en"
+ data["language"] = "English"
+
+
+class MangareadChapterExtractor(MangareadBase, ChapterExtractor):
+ """Extractor for manga-chapters from mangaread.org"""
+ pattern = (r"(?:https?://)?(?:www\.)?mangaread\.org"
+ r"(/manga/[^/?#]+/[^/?#]+)")
+ test = (
+ ("https://www.mangaread.org/manga/one-piece/chapter-1053-3/", {
+ "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
+ r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
+ "count": 11,
+ "keyword": {
+ "manga" : "One Piece",
+ "title" : "",
+ "chapter" : 1053,
+ "chapter_minor": ".3",
+ "tags" : ["Oda Eiichiro"],
+ "lang" : "en",
+ "language": "English",
+ }
+ }),
+ ("https://www.mangaread.org/manga/one-piece/chapter-1000000/", {
+ "exception": exception.NotFoundError,
+ }),
+ (("https://www.mangaread.org"
+ "/manga/kanan-sama-wa-akumade-choroi/chapter-10/"), {
+ "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
+ r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
+ "count": 9,
+ "keyword": {
+ "manga" : "Kanan-sama wa Akumade Choroi",
+ "title" : "",
+ "chapter" : 10,
+ "chapter_minor": "",
+ "tags" : list,
+ "lang" : "en",
+ "language": "English",
+ }
+ }),
+ # 'Chapter146.5'
+ # ^^ no whitespace
+ ("https://www.mangaread.org/manga/above-all-gods/chapter146-5/", {
+ "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
+ r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
+ "count": 6,
+ "keyword": {
+ "manga" : "Above All Gods",
+ "title" : "",
+ "chapter" : 146,
+ "chapter_minor": ".5",
+ "tags" : list,
+ "lang" : "en",
+ "language": "English",
+ }
+ }),
+ )
+
+ def metadata(self, page):
+ data = {"tags": list(text.extract_iter(page, "class>", "<"))}
+ info = text.extr(page, '<h1 id="chapter-heading">', "</h1>")
+ if not info:
+ raise exception.NotFoundError("chapter")
+ self.parse_chapter_string(info, data)
+ return data
+
+ def images(self, page):
+ page = text.extr(
+ page, '<div class="reading-content">', '<div class="entry-header')
+ return [
+ (url.strip(), None)
+ for url in text.extract_iter(page, 'data-src="', '"')
+ ]
+
+
+class MangareadMangaExtractor(MangareadBase, MangaExtractor):
+ """Extractor for manga from mangaread.org"""
+ chapterclass = MangareadChapterExtractor
+ pattern = r"(?:https?://)?(?:www\.)?mangaread\.org(/manga/[^/?#]+)/?$"
+ test = (
+ ("https://www.mangaread.org/manga/kanan-sama-wa-akumade-choroi", {
+ "pattern": (r"https://www\.mangaread\.org/manga"
+ r"/kanan-sama-wa-akumade-choroi"
+ r"/chapter-\d+(-.+)?/"),
+ "count" : ">= 13",
+ "keyword": {
+ "manga" : "Kanan-sama wa Akumade Choroi",
+ "author" : ["nonco"],
+ "artist" : ["nonco"],
+ "type" : "Manga",
+ "genres" : ["Comedy", "Romance", "Shounen", "Supernatural"],
+ "rating" : float,
+ "release": 2022,
+ "status" : "OnGoing",
+ "lang" : "en",
+ "language" : "English",
+ "manga_alt" : list,
+ "description": str,
+ }
+ }),
+ ("https://www.mangaread.org/manga/one-piece", {
+ "pattern": (r"https://www\.mangaread\.org/manga"
+ r"/one-piece/chapter-\d+(-.+)?/"),
+ "count" : ">= 1066",
+ "keyword": {
+ "manga" : "One Piece",
+ "author" : ["Oda Eiichiro"],
+ "artist" : ["Oda Eiichiro"],
+ "type" : "Manga",
+ "genres" : list,
+ "rating" : float,
+ "release": 1997,
+ "status" : "OnGoing",
+ "lang" : "en",
+ "language" : "English",
+ "manga_alt" : ["One Piece"],
+ "description": str,
+ }
+ }),
+ ("https://www.mangaread.org/manga/doesnotexist", {
+ "exception": exception.NotFoundError,
+ }),
+ )
+
+ def chapters(self, page):
+ if 'class="error404' in page:
+ raise exception.NotFoundError("manga")
+ data = self.metadata(page)
+ result = []
+ for chapter in text.extract_iter(
+ page, '<li class="wp-manga-chapter', "</li>"):
+ url , pos = text.extract(chapter, '<a href="', '"')
+ info, _ = text.extract(chapter, ">", "</a>", pos)
+ self.parse_chapter_string(info, data)
+ result.append((url, data.copy()))
+ return result
+
+ def metadata(self, page):
+ extr = text.extract_from(text.extr(
+ page, 'class="summary_content">', 'class="manga-action"'))
+ return {
+ "manga" : text.extr(page, "<h1>", "</h1>").strip(),
+ "description": text.unescape(text.remove_html(text.extract(
+ page, ">", "</div>", page.index("summary__content"))[0])),
+ "rating" : text.parse_float(
+ extr('total_votes">', "</span>").strip()),
+ "manga_alt" : text.remove_html(
+ extr("Alternative </h5>\n</div>", "</div>")).split("; "),
+ "author" : list(text.extract_iter(
+ extr('class="author-content">', "</div>"), '"tag">', "</a>")),
+ "artist" : list(text.extract_iter(
+ extr('class="artist-content">', "</div>"), '"tag">', "</a>")),
+ "genres" : list(text.extract_iter(
+ extr('class="genres-content">', "</div>"), '"tag">', "</a>")),
+ "type" : text.remove_html(
+ extr("Type </h5>\n</div>", "</div>")),
+ "release" : text.parse_int(text.remove_html(
+ extr("Release </h5>\n</div>", "</div>"))),
+ "status" : text.remove_html(
+ extr("Status </h5>\n</div>", "</div>")),
+ }