diff options
Diffstat (limited to 'gallery_dl/extractor/mangadex.py')
| -rw-r--r-- | gallery_dl/extractor/mangadex.py | 354 |
1 files changed, 211 insertions, 143 deletions
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 6a88d58..0fe46b1 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -10,202 +10,270 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import memcache +from ..cache import cache, memcache +from collections import defaultdict + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)" class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" + directory_fmt = ( + "{category}", "{manga}", + "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}") + filename_fmt = ( + "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") + archive_fmt = "{chapter_id}_{page}" root = "https://mangadex.org" - api_root = "https://api.mangadex.org" - - # mangadex-to-iso639-1 codes - iso639_map = { - "br": "pt", - "ct": "ca", - "gb": "en", - "vn": "vi", - } + _cache = {} def __init__(self, match): Extractor.__init__(self, match) + self.api = MangadexAPI(self) + self.uuid = match.group(1) + + def items(self): + for chapter in self.chapters(): + uuid = chapter["data"]["id"] + data = self._transform(chapter) + data["_extractor"] = MangadexChapterExtractor + self._cache[uuid] = (chapter, data) + yield Message.Queue, self.root + "/chapter/" + uuid, data - server = self.config("api-server") - if server is not None: - self.api_root = server.rstrip("/") + def _transform(self, chapter): + relationships = defaultdict(list) + for item in chapter["relationships"]: + relationships[item["type"]].append(item["id"]) + manga = self.api.manga(relationships["manga"][0]) + for item in manga["relationships"]: + relationships[item["type"]].append(item["id"]) - def chapter_data(self, chapter_id): - """Request API results for 'chapter_id'""" - url = "{}/v2/chapter/{}".format(self.api_root, chapter_id) - return self.request(url).json()["data"] + cattributes = chapter["data"]["attributes"] + mattributes = manga["data"]["attributes"] + lang = cattributes["translatedLanguage"].partition("-")[0] - @memcache(keyarg=1) - def manga_data(self, manga_id): - """Request API results for 'manga_id'""" - url = "{}/v2/manga/{}".format(self.api_root, manga_id) - return self.request(url).json()["data"] - - def manga_chapters(self, manga_id): - """Request chapter list for 'manga_id'""" - url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id) - data = self.request(url).json()["data"] - - groups = { - group["id"]: group["name"] - for group in data["groups"] + if cattributes["chapter"]: + chnum, sep, minor = cattributes["chapter"].partition(".") + else: + chnum, sep, minor = 0, "", "" + + data = { + "manga" : mattributes["title"]["en"], + "manga_id": manga["data"]["id"], + "title" : cattributes["title"], + "volume" : text.parse_int(cattributes["volume"]), + "chapter" : text.parse_int(chnum), + "chapter_minor": sep + minor, + "chapter_id": chapter["data"]["id"], + "date" : text.parse_datetime(cattributes["publishAt"]), + "lang" : lang, + "language": util.code_to_language(lang), + "count" : len(cattributes["data"]), } - for chapter in data["chapters"]: - cgroups = chapter["groups"] - for idx, group_id in enumerate(cgroups): - cgroups[idx] = groups[group_id] - yield chapter + if self.config("metadata"): + data["artist"] = [ + self.api.author(uuid)["data"]["attributes"]["name"] + for uuid in relationships["artist"]] + data["author"] = [ + self.api.author(uuid)["data"]["attributes"]["name"] + for uuid in relationships["author"]] + data["group"] = [ + self.api.group(uuid)["data"]["attributes"]["name"] + for uuid in relationships["scanlation_group"]] + + return data class MangadexChapterExtractor(MangadexExtractor): """Extractor for manga-chapters from mangadex.org""" subcategory = "chapter" - directory_fmt = ( - "{category}", "{manga}", - "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}") - filename_fmt = ( - "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") - archive_fmt = "{chapter_id}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)" + pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" test = ( - ("https://mangadex.org/chapter/122094", { - "keyword": "89d1b24b4baa1fb737d32711d9f2ade6ea426987", + ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", { + "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd", # "content": "50383a4c15124682057b197d40261641a98db514", }), # oneshot - ("https://mangadex.cc/chapter/138086", { + ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", { + "options": (("metadata", True),), "count": 64, - "keyword": "c53a0e4c12250578a4e630281085875e59532c03", + "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb", }), # MANGA Plus (#1154) - ("https://mangadex.org/chapter/1122815", { - "exception": exception.HttpError, + ("https://mangadex.org/chapter/8d50ed68-8298-4ac9-b63d-cb2aea143dd0", { + "exception": exception.StopExtraction, }), ) - def __init__(self, match): - MangadexExtractor.__init__(self, match) - self.chapter_id = match.group(1) - def items(self): - cdata = self.chapter_data(self.chapter_id) - if "server" not in cdata: - if cdata["status"] == "external": - raise exception.StopExtraction( - "Chapter is not available on MangaDex and can be read on " - "the official publisher's website at %s.", cdata["pages"]) - raise exception.StopExtraction("No download server available.") - mdata = self.manga_data(cdata["mangaId"]) - - chapter, sep, minor = cdata["chapter"].partition(".") - lang = self.iso639_map.get(cdata["language"], cdata["language"]) - - base = cdata["server"] + cdata["hash"] + "/" - if base[0] == "/": - base = text.urljoin(self.root, base) - - if "serverFallback" in cdata: - fallback = cdata["serverFallback"] + cdata["hash"] + "/" - else: - fallback = None - - data = { - "manga" : text.unescape(mdata["title"]), - "manga_id": mdata["id"], - "artist" : mdata["artist"], - "author" : mdata["author"], - "title" : text.unescape(cdata["title"]), - "volume" : text.parse_int(cdata["volume"]), - "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, - "chapter_id": cdata["id"], - "group" : [group["name"] for group in cdata["groups"]], - "date" : text.parse_timestamp(cdata["timestamp"]), - "lang" : lang, - "language": util.code_to_language(lang), - "count" : len(cdata["pages"]), - } - + try: + chapter, data = self._cache.pop(self.uuid) + except KeyError: + chapter = self.api.chapter(self.uuid) + data = self._transform(chapter) yield Message.Directory, data - for data["page"], page in enumerate(cdata["pages"], 1): - if fallback: - data["_fallback"] = (fallback + page,) - yield Message.Url, base + page, text.nameext_from_url(page, data) + + cattributes = chapter["data"]["attributes"] + base = "{}/data/{}/".format( + self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"]) + for data["page"], page in enumerate(cattributes["data"], 1): + text.nameext_from_url(page, data) + yield Message.Url, base + page, data class MangadexMangaExtractor(MangadexExtractor): """Extractor for manga from mangadex.org""" subcategory = "manga" - categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)" - r"/(?:title|manga)/(\d+)") + pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)" test = ( - ("https://mangadex.org/manga/2946/souten-no-koumori", { - "pattern": r"https://mangadex.org/chapter/\d+", + ("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", { "keyword": { "manga" : "Souten no Koumori", - "manga_id": 2946, + "manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc", "title" : "re:One[Ss]hot", "volume" : 0, "chapter" : 0, "chapter_minor": "", - "chapter_id": int, - "group" : list, + "chapter_id": str, "date" : "type:datetime", "lang" : str, "language": str, }, }), - ("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", { + ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", { + "options": (("lang", "en"),), "count": ">= 100", }), - ("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", { - "count": 0, + ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", { + "count": 1, }), ) - def __init__(self, match): - MangadexExtractor.__init__(self, match) - self.manga_id = match.group(1) + def chapters(self): + return self.api.manga_feed(self.uuid) - def items(self): - yield Message.Version, 1 - for data in self.chapters(): - url = "{}/chapter/{}".format(self.root, data["chapter_id"]) - yield Message.Queue, url, data + +class MangadexFeedExtractor(MangadexExtractor): + """Extractor for chapters from your Followed Feed""" + subcategory = "feed" + pattern = BASE_PATTERN + r"/title/feed$()" + test = ("https://mangadex.org/title/feed",) def chapters(self): - """Return a sorted list of chapter-metadata dicts""" - manga = self.manga_data(int(self.manga_id)) - results = [] - - for cdata in self.manga_chapters(self.manga_id): - chapter, sep, minor = cdata["chapter"].partition(".") - lang = self.iso639_map.get(cdata["language"], cdata["language"]) - results.append({ - "manga" : text.unescape(manga["title"]), - "manga_id": text.parse_int(self.manga_id), - "artist" : manga["artist"], - "author" : manga["author"], - "title" : text.unescape(cdata["title"]), - "volume" : text.parse_int(cdata["volume"]), - "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, - "chapter_id": text.parse_int(cdata["id"]), - "group" : cdata["groups"], - "date" : text.parse_timestamp(cdata["timestamp"]), - "lang" : lang, - "language": util.code_to_language(lang), - "_extractor": MangadexChapterExtractor, - }) - - results.sort( - key=lambda x: (x["chapter"], x["chapter_minor"]), - reverse=self.config("chapter-reverse", False), - ) - return results + return self.api.user_follows_manga_feed() + + +class MangadexAPI(): + """Interface for the MangaDex API v5""" + + def __init__(self, extr): + self.extractor = extr + self.headers = {} + + self.username, self.password = self.extractor._get_auth_info() + if not self.username: + self.authenticate = util.noop + + server = extr.config("api-server") + self.root = ("https://api.mangadex.org" if server is None + else text.ensure_http_scheme(server).rstrip("/")) + + def athome_server(self, uuid): + return self._call("/at-home/server/" + uuid) + + @memcache(keyarg=1) + def author(self, uuid): + return self._call("/author/" + uuid) + + def chapter(self, uuid): + return self._call("/chapter/" + uuid) + + @memcache(keyarg=1) + def group(self, uuid): + return self._call("/group/" + uuid) + + @memcache(keyarg=1) + def manga(self, uuid): + return self._call("/manga/" + uuid) + + def manga_feed(self, uuid): + config = self.extractor.config + order = "desc" if config("chapter-reverse") else "asc" + params = { + "order[volume]" : order, + "order[chapter]" : order, + "translatedLanguage[]": config("lang"), + } + return self._pagination("/manga/" + uuid + "/feed", params) + + def user_follows_manga_feed(self): + params = { + "order[publishAt]" : "desc", + "translatedLanguage[]": self.extractor.config("lang"), + } + return self._pagination("/user/follows/manga/feed", params) + + def authenticate(self): + self.headers["Authorization"] = \ + self._authenticate_impl(self.username, self.password) + + @cache(maxage=900, keyarg=1) + def _authenticate_impl(self, username, password): + refresh_token = _refresh_token_cache(username) + if refresh_token: + self.extractor.log.info("Refreshing access token") + url = self.root + "/auth/refresh" + data = {"token": refresh_token} + else: + self.extractor.log.info("Logging in as %s", username) + url = self.root + "/auth/login" + data = {"username": username, "password": password} + + data = self.extractor.request( + url, method="POST", json=data, fatal=None).json() + if data.get("result") != "ok": + raise exception.AuthenticationError() + + if refresh_token != data["token"]["refresh"]: + _refresh_token_cache.update(username, data["token"]["refresh"]) + return "Bearer " + data["token"]["session"] + + def _call(self, endpoint, params=None): + url = self.root + endpoint + + while True: + self.authenticate() + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) + + if response.status_code < 400: + return response.json() + if response.status_code == 429: + until = response.headers.get("X-RateLimit-Retry-After") + self.extractor.wait(until=until) + continue + + msg = ", ".join('{title}: {detail}'.format_map(error) + for error in response.json()["errors"]) + raise exception.StopExtraction( + "%s %s (%s)", response.status_code, response.reason, msg) + + def _pagination(self, endpoint, params=None): + if params is None: + params = {} + params["offset"] = 0 + + while True: + data = self._call(endpoint, params) + yield from data["results"] + + params["offset"] = data["offset"] + data["limit"] + if params["offset"] >= data["total"]: + return + + +@cache(maxage=28*24*3600, keyarg=0) +def _refresh_token_cache(username): + return None |
