diff options
Diffstat (limited to 'gallery_dl/extractor/weebcentral.py')
| -rw-r--r-- | gallery_dl/extractor/weebcentral.py | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py new file mode 100644 index 0000000..39f998a --- /dev/null +++ b/gallery_dl/extractor/weebcentral.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://weebcentral.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com" + + +class WeebcentralBase(): + category = "weebcentral" + root = "https://weebcentral.com" + request_interval = (0.5, 1.5) + + @memcache(keyarg=1) + def _extract_manga_data(self, manga_id): + url = "{}/series/{}".format(self.root, manga_id) + page = self.request(url).text + extr = text.extract_from(page) + + return { + "manga_id": manga_id, + "lang" : "en", + "language": "English", + "manga" : text.unescape(extr("<title>", " | Weeb Central")), + "author" : text.split_html(extr("<strong>Author", "</li>"))[1::2], + "tags" : text.split_html(extr("<strong>Tag", "</li>"))[1::2], + "type" : text.remove_html(extr("<strong>Type: ", "</li>")), + "status" : text.remove_html(extr("<strong>Status: ", "</li>")), + "release" : text.remove_html(extr("<strong>Released: ", "</li>")), + "official": ">Yes" in extr("<strong>Official Translatio", "</li>"), + "description": text.unescape(text.remove_html(extr( + "<strong>Description", "</li>"))), + } + + +class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor): + """Extractor for manga chapters from weebcentral.com""" + pattern = BASE_PATTERN + r"(/chapters/(\w+))" + example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV" + + def metadata(self, page): + extr = text.extract_from(page) + manga_id = extr("'series_id': '", "'") + + data = self._extract_manga_data(manga_id) + data["chapter_id"] = self.groups[1] + data["chapter_type"] = extr("'chapter_type': '", "'") + + chapter, sep, minor = extr("'number': '", "'").partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + + return data + + def images(self, page): + referer = self.gallery_url + url = referer + "/images" + params = { + "is_prev" : "False", + "current_page" : "1", + "reading_style": "long_strip", + } + headers = { + "Accept" : "*/*", + "Referer" : referer, + "HX-Request" : "true", + "HX-Current-URL": referer, + } + page = self.request(url, params=params, headers=headers).text + extr = text.extract_from(page) + + results = [] + while True: + src = extr(' src="', '"') + if not src: + break + results.append((src, { + "width" : text.parse_int(extr(' width="' , '"')), + "height": text.parse_int(extr(' height="', '"')), + })) + return results + + +class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor): + """Extractor for manga from weebcentral.com""" + chapterclass = WeebcentralChapterExtractor + pattern = BASE_PATTERN + r"/series/(\w+)" + example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE" + + def __init__(self, match): + MangaExtractor.__init__(self, match, False) + + def chapters(self, _): + manga_id = self.groups[0] + referer = "{}/series/{}".format(self.root, manga_id) + url = referer + "/full-chapter-list" + headers = { + "Accept" : "*/*", + "Referer" : referer, + "HX-Request" : "true", + "HX-Target" : "chapter-list", + "HX-Current-URL": referer, + } + page = self.request(url, headers=headers).text + extr = text.extract_from(page) + data = self._extract_manga_data(manga_id) + base = self.root + "/chapters/" + + results = [] + while True: + chapter_id = extr("/chapters/", '"') + if not chapter_id: + break + type, _, chapter = extr('<span class="">', "<").partition(" ") + chapter, sep, minor = chapter.partition(".") + + chapter = { + "chapter_id" : chapter_id, + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + "chapter_type" : type, + "date" : text.parse_datetime( + extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"), + } + chapter.update(data) + results.append((base + chapter_id, chapter)) + return results |
