summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/mangadex.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/mangadex.py')
-rw-r--r--gallery_dl/extractor/mangadex.py354
1 files changed, 211 insertions, 143 deletions
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 6a88d58..0fe46b1 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -10,202 +10,270 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import memcache
+from ..cache import cache, memcache
+from collections import defaultdict
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
class MangadexExtractor(Extractor):
"""Base class for mangadex extractors"""
category = "mangadex"
+ directory_fmt = (
+ "{category}", "{manga}",
+ "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
+ filename_fmt = (
+ "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
+ archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org"
- api_root = "https://api.mangadex.org"
-
- # mangadex-to-iso639-1 codes
- iso639_map = {
- "br": "pt",
- "ct": "ca",
- "gb": "en",
- "vn": "vi",
- }
+ _cache = {}
def __init__(self, match):
Extractor.__init__(self, match)
+ self.api = MangadexAPI(self)
+ self.uuid = match.group(1)
+
+ def items(self):
+ for chapter in self.chapters():
+ uuid = chapter["data"]["id"]
+ data = self._transform(chapter)
+ data["_extractor"] = MangadexChapterExtractor
+ self._cache[uuid] = (chapter, data)
+ yield Message.Queue, self.root + "/chapter/" + uuid, data
- server = self.config("api-server")
- if server is not None:
- self.api_root = server.rstrip("/")
+ def _transform(self, chapter):
+ relationships = defaultdict(list)
+ for item in chapter["relationships"]:
+ relationships[item["type"]].append(item["id"])
+ manga = self.api.manga(relationships["manga"][0])
+ for item in manga["relationships"]:
+ relationships[item["type"]].append(item["id"])
- def chapter_data(self, chapter_id):
- """Request API results for 'chapter_id'"""
- url = "{}/v2/chapter/{}".format(self.api_root, chapter_id)
- return self.request(url).json()["data"]
+ cattributes = chapter["data"]["attributes"]
+ mattributes = manga["data"]["attributes"]
+ lang = cattributes["translatedLanguage"].partition("-")[0]
- @memcache(keyarg=1)
- def manga_data(self, manga_id):
- """Request API results for 'manga_id'"""
- url = "{}/v2/manga/{}".format(self.api_root, manga_id)
- return self.request(url).json()["data"]
-
- def manga_chapters(self, manga_id):
- """Request chapter list for 'manga_id'"""
- url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id)
- data = self.request(url).json()["data"]
-
- groups = {
- group["id"]: group["name"]
- for group in data["groups"]
+ if cattributes["chapter"]:
+ chnum, sep, minor = cattributes["chapter"].partition(".")
+ else:
+ chnum, sep, minor = 0, "", ""
+
+ data = {
+ "manga" : mattributes["title"]["en"],
+ "manga_id": manga["data"]["id"],
+ "title" : cattributes["title"],
+ "volume" : text.parse_int(cattributes["volume"]),
+ "chapter" : text.parse_int(chnum),
+ "chapter_minor": sep + minor,
+ "chapter_id": chapter["data"]["id"],
+ "date" : text.parse_datetime(cattributes["publishAt"]),
+ "lang" : lang,
+ "language": util.code_to_language(lang),
+ "count" : len(cattributes["data"]),
}
- for chapter in data["chapters"]:
- cgroups = chapter["groups"]
- for idx, group_id in enumerate(cgroups):
- cgroups[idx] = groups[group_id]
- yield chapter
+ if self.config("metadata"):
+ data["artist"] = [
+ self.api.author(uuid)["data"]["attributes"]["name"]
+ for uuid in relationships["artist"]]
+ data["author"] = [
+ self.api.author(uuid)["data"]["attributes"]["name"]
+ for uuid in relationships["author"]]
+ data["group"] = [
+ self.api.group(uuid)["data"]["attributes"]["name"]
+ for uuid in relationships["scanlation_group"]]
+
+ return data
class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
- directory_fmt = (
- "{category}", "{manga}",
- "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
- filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
- archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)"
+ pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
test = (
- ("https://mangadex.org/chapter/122094", {
- "keyword": "89d1b24b4baa1fb737d32711d9f2ade6ea426987",
+ ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
+ "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd",
# "content": "50383a4c15124682057b197d40261641a98db514",
}),
# oneshot
- ("https://mangadex.cc/chapter/138086", {
+ ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
+ "options": (("metadata", True),),
"count": 64,
- "keyword": "c53a0e4c12250578a4e630281085875e59532c03",
+ "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb",
}),
# MANGA Plus (#1154)
- ("https://mangadex.org/chapter/1122815", {
- "exception": exception.HttpError,
+ ("https://mangadex.org/chapter/8d50ed68-8298-4ac9-b63d-cb2aea143dd0", {
+ "exception": exception.StopExtraction,
}),
)
- def __init__(self, match):
- MangadexExtractor.__init__(self, match)
- self.chapter_id = match.group(1)
-
def items(self):
- cdata = self.chapter_data(self.chapter_id)
- if "server" not in cdata:
- if cdata["status"] == "external":
- raise exception.StopExtraction(
- "Chapter is not available on MangaDex and can be read on "
- "the official publisher's website at %s.", cdata["pages"])
- raise exception.StopExtraction("No download server available.")
- mdata = self.manga_data(cdata["mangaId"])
-
- chapter, sep, minor = cdata["chapter"].partition(".")
- lang = self.iso639_map.get(cdata["language"], cdata["language"])
-
- base = cdata["server"] + cdata["hash"] + "/"
- if base[0] == "/":
- base = text.urljoin(self.root, base)
-
- if "serverFallback" in cdata:
- fallback = cdata["serverFallback"] + cdata["hash"] + "/"
- else:
- fallback = None
-
- data = {
- "manga" : text.unescape(mdata["title"]),
- "manga_id": mdata["id"],
- "artist" : mdata["artist"],
- "author" : mdata["author"],
- "title" : text.unescape(cdata["title"]),
- "volume" : text.parse_int(cdata["volume"]),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": sep + minor,
- "chapter_id": cdata["id"],
- "group" : [group["name"] for group in cdata["groups"]],
- "date" : text.parse_timestamp(cdata["timestamp"]),
- "lang" : lang,
- "language": util.code_to_language(lang),
- "count" : len(cdata["pages"]),
- }
-
+ try:
+ chapter, data = self._cache.pop(self.uuid)
+ except KeyError:
+ chapter = self.api.chapter(self.uuid)
+ data = self._transform(chapter)
yield Message.Directory, data
- for data["page"], page in enumerate(cdata["pages"], 1):
- if fallback:
- data["_fallback"] = (fallback + page,)
- yield Message.Url, base + page, text.nameext_from_url(page, data)
+
+ cattributes = chapter["data"]["attributes"]
+ base = "{}/data/{}/".format(
+ self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
+ for data["page"], page in enumerate(cattributes["data"], 1):
+ text.nameext_from_url(page, data)
+ yield Message.Url, base + page, data
class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
- categorytransfer = True
- pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
- r"/(?:title|manga)/(\d+)")
+ pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
test = (
- ("https://mangadex.org/manga/2946/souten-no-koumori", {
- "pattern": r"https://mangadex.org/chapter/\d+",
+ ("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
"keyword": {
"manga" : "Souten no Koumori",
- "manga_id": 2946,
+ "manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
"title" : "re:One[Ss]hot",
"volume" : 0,
"chapter" : 0,
"chapter_minor": "",
- "chapter_id": int,
- "group" : list,
+ "chapter_id": str,
"date" : "type:datetime",
"lang" : str,
"language": str,
},
}),
- ("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", {
+ ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
+ "options": (("lang", "en"),),
"count": ">= 100",
}),
- ("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", {
- "count": 0,
+ ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
+ "count": 1,
}),
)
- def __init__(self, match):
- MangadexExtractor.__init__(self, match)
- self.manga_id = match.group(1)
+ def chapters(self):
+ return self.api.manga_feed(self.uuid)
- def items(self):
- yield Message.Version, 1
- for data in self.chapters():
- url = "{}/chapter/{}".format(self.root, data["chapter_id"])
- yield Message.Queue, url, data
+
+class MangadexFeedExtractor(MangadexExtractor):
+ """Extractor for chapters from your Followed Feed"""
+ subcategory = "feed"
+ pattern = BASE_PATTERN + r"/title/feed$()"
+ test = ("https://mangadex.org/title/feed",)
def chapters(self):
- """Return a sorted list of chapter-metadata dicts"""
- manga = self.manga_data(int(self.manga_id))
- results = []
-
- for cdata in self.manga_chapters(self.manga_id):
- chapter, sep, minor = cdata["chapter"].partition(".")
- lang = self.iso639_map.get(cdata["language"], cdata["language"])
- results.append({
- "manga" : text.unescape(manga["title"]),
- "manga_id": text.parse_int(self.manga_id),
- "artist" : manga["artist"],
- "author" : manga["author"],
- "title" : text.unescape(cdata["title"]),
- "volume" : text.parse_int(cdata["volume"]),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": sep + minor,
- "chapter_id": text.parse_int(cdata["id"]),
- "group" : cdata["groups"],
- "date" : text.parse_timestamp(cdata["timestamp"]),
- "lang" : lang,
- "language": util.code_to_language(lang),
- "_extractor": MangadexChapterExtractor,
- })
-
- results.sort(
- key=lambda x: (x["chapter"], x["chapter_minor"]),
- reverse=self.config("chapter-reverse", False),
- )
- return results
+ return self.api.user_follows_manga_feed()
+
+
+class MangadexAPI():
+ """Interface for the MangaDex API v5"""
+
+ def __init__(self, extr):
+ self.extractor = extr
+ self.headers = {}
+
+ self.username, self.password = self.extractor._get_auth_info()
+ if not self.username:
+ self.authenticate = util.noop
+
+ server = extr.config("api-server")
+ self.root = ("https://api.mangadex.org" if server is None
+ else text.ensure_http_scheme(server).rstrip("/"))
+
+ def athome_server(self, uuid):
+ return self._call("/at-home/server/" + uuid)
+
+ @memcache(keyarg=1)
+ def author(self, uuid):
+ return self._call("/author/" + uuid)
+
+ def chapter(self, uuid):
+ return self._call("/chapter/" + uuid)
+
+ @memcache(keyarg=1)
+ def group(self, uuid):
+ return self._call("/group/" + uuid)
+
+ @memcache(keyarg=1)
+ def manga(self, uuid):
+ return self._call("/manga/" + uuid)
+
+ def manga_feed(self, uuid):
+ config = self.extractor.config
+ order = "desc" if config("chapter-reverse") else "asc"
+ params = {
+ "order[volume]" : order,
+ "order[chapter]" : order,
+ "translatedLanguage[]": config("lang"),
+ }
+ return self._pagination("/manga/" + uuid + "/feed", params)
+
+ def user_follows_manga_feed(self):
+ params = {
+ "order[publishAt]" : "desc",
+ "translatedLanguage[]": self.extractor.config("lang"),
+ }
+ return self._pagination("/user/follows/manga/feed", params)
+
+ def authenticate(self):
+ self.headers["Authorization"] = \
+ self._authenticate_impl(self.username, self.password)
+
+ @cache(maxage=900, keyarg=1)
+ def _authenticate_impl(self, username, password):
+ refresh_token = _refresh_token_cache(username)
+ if refresh_token:
+ self.extractor.log.info("Refreshing access token")
+ url = self.root + "/auth/refresh"
+ data = {"token": refresh_token}
+ else:
+ self.extractor.log.info("Logging in as %s", username)
+ url = self.root + "/auth/login"
+ data = {"username": username, "password": password}
+
+ data = self.extractor.request(
+ url, method="POST", json=data, fatal=None).json()
+ if data.get("result") != "ok":
+ raise exception.AuthenticationError()
+
+ if refresh_token != data["token"]["refresh"]:
+ _refresh_token_cache.update(username, data["token"]["refresh"])
+ return "Bearer " + data["token"]["session"]
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+
+ while True:
+ self.authenticate()
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+
+ if response.status_code < 400:
+ return response.json()
+ if response.status_code == 429:
+ until = response.headers.get("X-RateLimit-Retry-After")
+ self.extractor.wait(until=until)
+ continue
+
+ msg = ", ".join('{title}: {detail}'.format_map(error)
+ for error in response.json()["errors"])
+ raise exception.StopExtraction(
+ "%s %s (%s)", response.status_code, response.reason, msg)
+
+ def _pagination(self, endpoint, params=None):
+ if params is None:
+ params = {}
+ params["offset"] = 0
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["results"]
+
+ params["offset"] = data["offset"] + data["limit"]
+ if params["offset"] >= data["total"]:
+ return
+
+
+@cache(maxage=28*24*3600, keyarg=0)
+def _refresh_token_cache(username):
+ return None