diff options
| author | 2026-01-06 04:24:52 -0500 | |
|---|---|---|
| committer | 2026-01-06 04:24:52 -0500 | |
| commit | 385e4bfb1e426d23417ac788a6f44d639e226c89 (patch) | |
| tree | e64f04e19d63014d48e3b5272ce112c637236ba7 /gallery_dl/extractor/webtoons.py | |
| parent | a24ec1647aeac35a63b744ea856011ad6e06be3b (diff) | |
New upstream version 1.31.2.upstream/1.31.2upstream
Diffstat (limited to 'gallery_dl/extractor/webtoons.py')
| -rw-r--r-- | gallery_dl/extractor/webtoons.py | 71 |
1 files changed, 66 insertions, 5 deletions
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index bed251b..92cf6b1 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -48,10 +48,11 @@ class WebtoonsBase(): class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): """Extractor for an episode on webtoons.com""" subcategory = "episode" - pattern = (rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+)/[^/?#]+)" + pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)" r"/viewer\?([^#'\"]+)") example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer" "?title_no=123&episode_no=12345") + images_urls = [] def _init(self): self.setup_agegate_cookies() @@ -61,6 +62,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): self.title_no = params.get("title_no") self.episode_no = params.get("episode_no") self.page_url = f"{self.root}/{base}/viewer?{query}" + self.bgm = self.config("bgm", True) def metadata(self, page): extr = text.extract_from(page) @@ -114,12 +116,21 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): elif not isinstance(quality, dict): quality = None + if self.bgm: + num = 0 + self.paths = paths = {} + else: + num = None + results = [] for url in text.extract_iter( page, 'class="_images" data-url="', '"'): + path, _, query = url.rpartition("?") + if num is not None: + num += 1 + paths[path[path.find("/", 8):]] = num if quality is not None: - path, _, query = url.rpartition("?") type = quality.get(path.rpartition(".")[2].lower()) if type is False: url = path @@ -130,10 +141,60 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): return results def assets(self, page): + assets = [] + if self.config("thumbnails", False): active = text.extr(page, 'class="on', '</a>') url = _url(text.extr(active, 'data-url="', '"')) - return ({"url": url, "type": "thumbnail"},) + assets.append({"url": url, "type": "thumbnail"}) + + if self.bgm: + if bgm := text.extr(page, "episodeBgmList:", ",\n"): + self._asset_bgm(assets, util.json_loads(bgm)) + + return assets + + def _asset_bgm(self, assets, bgm_list): + import binascii + params = { + # "quality" : "MIDDLE", + "quality" : "HIGH", # no difference to 'MIDDLE' + "acceptCodecs": "AAC,MP3", + } + headers = { + "Accept" : "application/json", + "Content-Type" : "application/json", + "Origin" : self.root, + "Referer" : self.root + "/", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "cross-site", + } + paths = self.paths + + for bgm in bgm_list: + url = (f"https://apis.naver.com/audiocweb/audiocplayogwweb/play" + f"/audio/{bgm['audioId']}/hls/token") + data = self.request_json( + url, params=params, headers=headers, interval=False) + token = data["result"]["playToken"] + data = util.json_loads(binascii.a2b_base64(token).decode()) + audio = data["audioInfo"] + play = bgm.get("playImageUrl", "") + stop = bgm.get("stopImageUrl", "") + + assets.append({ + **bgm, + **audio, + "num_play": paths.get(play) or 0, + "num_stop": paths.get(stop) or 0, + "filename_play": play[play.rfind("/")+1:play.rfind(".")], + "filename_stop": stop[stop.rfind("/")+1:stop.rfind(".")], + "type": "bgm", + "url" : "ytdl:" + audio["url"], + "_ytdl_manifest": audio["type"].lower(), + "extension": "mp3", + }) class WebtoonsComicExtractor(WebtoonsBase, Extractor): @@ -142,7 +203,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): categorytransfer = True filename_fmt = "{type}.{extension}" archive_fmt = "{title_no}_{type}" - pattern = rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+))/list\?([^#]+)" + pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123" def items(self): @@ -197,7 +258,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): class WebtoonsArtistExtractor(WebtoonsBase, Extractor): """Extractor for webtoons.com artists""" subcategory = "artist" - pattern = rf"{BASE_PATTERN}/p/community/([^/?#]+)/u/([^/?#]+)" + pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)" example = "https://www.webtoons.com/p/community/LANG/u/ARTIST" def items(self): |
