diff options
Diffstat (limited to 'gallery_dl/extractor/senmanga.py')
| -rw-r--r-- | gallery_dl/extractor/senmanga.py | 96 |
1 files changed, 60 insertions, 36 deletions
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 34177b4..6d025f4 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -1,64 +1,88 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters from from https://raw.senmanga.com/""" +"""Extractors for https://raw.senmanga.com/""" -from .common import Extractor, Message +from .common import ChapterExtractor from .. import text -class SenmangaChapterExtractor(Extractor): - """Extractor for manga-chapters from raw.senmanga.com""" +class SenmangaChapterExtractor(ChapterExtractor): + """Extractor for manga chapters from raw.senmanga.com""" category = "senmanga" - subcategory = "chapter" - directory_fmt = ("{category}", "{manga}", "{chapter_string}") - filename_fmt = "{manga}_{chapter_string}_{page:>03}.{extension}" - archive_fmt = "{manga}_{chapter_string}_{page}" - pattern = r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)" + root = "https://raw.senmanga.com" + pattern = r"(?:https?://)?raw\.senmanga\.com(/[^/?#]+/[^/?#]+)" test = ( - ("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", { + ("https://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", { + "pattern": r"https://raw\.senmanga\.com/viewer" + r"/Bokura-wa-Minna-Kawaisou/37A/[12]", "url": "5f95140ff511d8497e2ec08fa7267c6bb231faec", - "keyword": "705d941a150765edb33cd2707074bd703a93788c", "content": "556a16d5ca3441d7a5807b6b5ac06ec458a3e4ba", + "keyword": { + "chapter": "37A", + "count": 2, + "extension": "", + "filename": "re:[12]", + "lang": "ja", + "language": "Japanese", + "manga": "Bokura wa Minna Kawaisou", + "page": int, + }, }), ("http://raw.senmanga.com/Love-Lab/2016-03/1", { + "pattern": r"https://raw\.senmanga\.com/viewer" + r"/Love-Lab/2016-03/\d", "url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de", - "keyword": "8a8ab2529ba2edfc83a6b3a8bede1d6c580db7b4", + "keyword": { + "chapter": "2016-03", + "count": 9, + "extension": "", + "filename": r"re:\d", + "manga": "Renai Lab 恋愛ラボ", + }, + }), + ("https://raw.senmanga.com/akabane-honeko-no-bodyguard/1", { + "pattern": r"https://i\d\.wp\.com/kumacdn.club/image-new-2/a" + r"/akabane-honeko-no-bodyguard/chapter-1" + r"/\d+-[0-9a-f]{13}\.jpg", + "keyword": { + "chapter": "1", + "count": 65, + "extension": "jpg", + "filename": r"re:\d+-\w+", + "manga": "Akabane Honeko no Bodyguard", + }, }), ) - root = "https://raw.senmanga.com" def __init__(self, match): - Extractor.__init__(self, match) - part = match.group(1) - self.chapter_url = "{}/{}/".format(self.root, part) - self.img_url = "{}/viewer/{}/".format(self.root, part) - self.session.headers["Referer"] = self.chapter_url + ChapterExtractor.__init__(self, match) + self.session.headers["Referer"] = self.gallery_url - def items(self): - data = self.metadata() - yield Message.Directory, data - for data["page"] in range(1, data["count"]+1): - data["extension"] = None - yield Message.Url, self.img_url + str(data["page"]), data + # select "All pages" viewer + self.session.cookies.set( + "viewer", "1", domain="raw.senmanga.com") - def metadata(self): - """Collect metadata for extractor-job""" - page = self.request(self.chapter_url).text - self.session.cookies.clear() - title, pos = text.extract(page, '<title>', '</title>') - count, pos = text.extract(page, '</select> of ', '\n', pos) + def metadata(self, page): + title = text.extr(page, "<title>", "</title>") manga, _, chapter = title.partition(" - Chapter ") return { - "manga": text.unescape(manga).replace("-", " "), - "chapter_string": chapter.partition(" - Page ")[0], - "count": text.parse_int(count), - "lang": "jp", - "language": "Japanese", + "manga" : text.unescape(manga).replace("-", " "), + "chapter" : chapter.partition(" - Page ")[0], + "chapter_minor": "", + "lang" : "ja", + "language" : "Japanese", } + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, '<img class="picture" src="', '"') + ] |
