diff options
Diffstat (limited to 'gallery_dl/extractor/comick.py')
| -rw-r--r-- | gallery_dl/extractor/comick.py | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py new file mode 100644 index 0000000..7ef4607 --- /dev/null +++ b/gallery_dl/extractor/comick.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://comick.io/""" + +from .common import ChapterExtractor, MangaExtractor, Message +from .. import text +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io" + + +class ComickBase(): + """Base class for comick.io extractors""" + category = "comick" + root = "https://comick.io" + + @memcache(keyarg=1) + def _manga_info(self, slug): + url = f"{self.root}/comic/{slug}" + page = self.request(url).text + data = self._extract_nextdata(page) + props = data["props"]["pageProps"] + comic = props["comic"] + + genre = [] + theme = [] + format = "" + for item in comic["md_comic_md_genres"]: + item = item["md_genres"] + group = item["group"] + if group == "Genre": + genre.append(item["name"]) + elif group == "Theme": + theme.append(item["name"]) + else: + format = item["name"] + + if mu := comic["mu_comics"]: + tags = [c["mu_categories"]["title"] + for c in mu["mu_comic_categories"]] + publisher = [p["mu_publishers"]["title"] + for p in mu["mu_comic_publishers"]] + else: + tags = publisher = () + + return { + "manga": comic["title"], + "manga_id": comic["id"], + "manga_hid": comic["hid"], + "manga_slug": slug, + "manga_titles": [t["title"] for t in comic["md_titles"]], + "artist": [a["name"] for a in props["artists"]], + "author": [a["name"] for a in props["authors"]], + "genre" : genre, + "theme" : theme, + "format": format, + "tags" : tags, + "publisher": publisher, + "published": text.parse_int(comic["year"]), + "description": comic["desc"], + "demographic": props["demographic"], + "origin": comic["iso639_1"], + "mature": props["matureContent"], + "rating": comic["content_rating"], + "rank" : comic["follow_rank"], + "score" : text.parse_float(comic["bayesian_rating"]), + "status": "Complete" if comic["status"] == 2 else "Ongoing", + "links" : comic["links"], + "_build_id": data["buildId"], + } + + def _chapter_info(self, manga, chstr): + slug = manga['manga_slug'] + url = (f"{self.root}/_next/data/{manga['_build_id']}" + f"/comic/{slug}/{chstr}.json") + params = {"slug": slug, "chapter": chstr} + return self.request_json(url, params=params)["pageProps"] + + +class ComickChapterExtractor(ComickBase, ChapterExtractor): + """Extractor for comick.io manga chapters""" + archive_fmt = "{chapter_hid}_{page}" + pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)" + example = "https://comick.io/comic/MANGA/ID-chapter-123-en" + + def metadata(self, page): + slug, chstr = self.groups + manga = self._manga_info(slug) + props = self._chapter_info(manga, chstr) + + ch = props["chapter"] + self._images = ch["md_images"] + chapter, sep, minor = ch["chap"].partition(".") + + return { + **manga, + "title" : props["chapTitle"], + "volume" : text.parse_int(ch["vol"]), + "chapter" : text.parse_int(chapter), + "chapter_minor" : sep + minor, + "chapter_id" : ch["id"], + "chapter_hid" : ch["hid"], + "chapter_string": chstr, + "group" : ch["group_name"], + "date" : text.parse_datetime( + ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"), + "date_updated" : text.parse_datetime( + ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"), + "lang" : ch["lang"], + } + + def images(self, page): + return [ + ("https://meo.comick.pictures/" + img["b2key"], { + "width" : img["w"], + "height" : img["h"], + "size" : img["s"], + "optimized": img["optimized"], + }) + for img in self._images + ] + + +class ComickMangaExtractor(ComickBase, MangaExtractor): + """Extractor for comick.io manga""" + pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?" + example = "https://comick.io/comic/MANGA" + + def items(self): + slug = self.groups[0] + manga = self._manga_info(slug) + + for ch in self.chapters(manga): + url = (f"{self.root}/comic/{slug}" + f"/{ch['hid']}-chapter-{ch['chap']}-{ch['lang']}") + + ch.update(manga) + chapter, sep, minor = ch["chap"].partition(".") + ch["chapter"] = text.parse_int(chapter) + ch["chapter_minor"] = sep + minor + ch["_extractor"] = ComickChapterExtractor + + yield Message.Queue, url, ch + + def chapters(self, manga): + info = True + slug, query = self.groups + + url = f"https://api.comick.io/comic/{manga['manga_hid']}/chapters" + headers = { + "Origin": "https://comick.io", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-site", + } + + query = text.parse_query(query) + params = {"lang": query.get("lang") or None} + params["page"] = page = text.parse_int(query.get("page"), 1) + + if date_order := query.get("date-order"): + params["date-order"] = date_order + elif chap_order := query.get("chap-order"): + params["chap-order"] = chap_order + else: + params["chap-order"] = \ + "0" if self.config("chapter-reverse", False) else "1" + + group = query.get("group", None) + if group == "0": + group = None + + while True: + data = self.request_json(url, params=params, headers=headers) + limit = data["limit"] + + if info: + info = False + total = data["total"] - limit * page + if total > limit: + self.log.info("Collecting %s chapters", total) + + if group is None: + yield from data["chapters"] + else: + for ch in data["chapters"]: + if group in ch["group_name"]: + yield ch + + if data["total"] <= limit * page: + return + params["page"] = page = page + 1 |
