summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/webtoons.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2026-01-06 04:24:52 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2026-01-06 04:24:52 -0500
commit385e4bfb1e426d23417ac788a6f44d639e226c89 (patch)
treee64f04e19d63014d48e3b5272ce112c637236ba7 /gallery_dl/extractor/webtoons.py
parenta24ec1647aeac35a63b744ea856011ad6e06be3b (diff)
New upstream version 1.31.2.upstream/1.31.2upstream
Diffstat (limited to 'gallery_dl/extractor/webtoons.py')
-rw-r--r--gallery_dl/extractor/webtoons.py71
1 files changed, 66 insertions, 5 deletions
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index bed251b..92cf6b1 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -48,10 +48,11 @@ class WebtoonsBase():
class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"""Extractor for an episode on webtoons.com"""
subcategory = "episode"
- pattern = (rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+)/[^/?#]+)"
+ pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
r"/viewer\?([^#'\"]+)")
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
"?title_no=123&episode_no=12345")
+ images_urls = []
def _init(self):
self.setup_agegate_cookies()
@@ -61,6 +62,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
self.title_no = params.get("title_no")
self.episode_no = params.get("episode_no")
self.page_url = f"{self.root}/{base}/viewer?{query}"
+ self.bgm = self.config("bgm", True)
def metadata(self, page):
extr = text.extract_from(page)
@@ -114,12 +116,21 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
elif not isinstance(quality, dict):
quality = None
+ if self.bgm:
+ num = 0
+ self.paths = paths = {}
+ else:
+ num = None
+
results = []
for url in text.extract_iter(
page, 'class="_images" data-url="', '"'):
+ path, _, query = url.rpartition("?")
+ if num is not None:
+ num += 1
+ paths[path[path.find("/", 8):]] = num
if quality is not None:
- path, _, query = url.rpartition("?")
type = quality.get(path.rpartition(".")[2].lower())
if type is False:
url = path
@@ -130,10 +141,60 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
return results
def assets(self, page):
+ assets = []
+
if self.config("thumbnails", False):
active = text.extr(page, 'class="on', '</a>')
url = _url(text.extr(active, 'data-url="', '"'))
- return ({"url": url, "type": "thumbnail"},)
+ assets.append({"url": url, "type": "thumbnail"})
+
+ if self.bgm:
+ if bgm := text.extr(page, "episodeBgmList:", ",\n"):
+ self._asset_bgm(assets, util.json_loads(bgm))
+
+ return assets
+
+ def _asset_bgm(self, assets, bgm_list):
+ import binascii
+ params = {
+ # "quality" : "MIDDLE",
+ "quality" : "HIGH", # no difference to 'MIDDLE'
+ "acceptCodecs": "AAC,MP3",
+ }
+ headers = {
+ "Accept" : "application/json",
+ "Content-Type" : "application/json",
+ "Origin" : self.root,
+ "Referer" : self.root + "/",
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "cross-site",
+ }
+ paths = self.paths
+
+ for bgm in bgm_list:
+ url = (f"https://apis.naver.com/audiocweb/audiocplayogwweb/play"
+ f"/audio/{bgm['audioId']}/hls/token")
+ data = self.request_json(
+ url, params=params, headers=headers, interval=False)
+ token = data["result"]["playToken"]
+ data = util.json_loads(binascii.a2b_base64(token).decode())
+ audio = data["audioInfo"]
+ play = bgm.get("playImageUrl", "")
+ stop = bgm.get("stopImageUrl", "")
+
+ assets.append({
+ **bgm,
+ **audio,
+ "num_play": paths.get(play) or 0,
+ "num_stop": paths.get(stop) or 0,
+ "filename_play": play[play.rfind("/")+1:play.rfind(".")],
+ "filename_stop": stop[stop.rfind("/")+1:stop.rfind(".")],
+ "type": "bgm",
+ "url" : "ytdl:" + audio["url"],
+ "_ytdl_manifest": audio["type"].lower(),
+ "extension": "mp3",
+ })
class WebtoonsComicExtractor(WebtoonsBase, Extractor):
@@ -142,7 +203,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
categorytransfer = True
filename_fmt = "{type}.{extension}"
archive_fmt = "{title_no}_{type}"
- pattern = rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
+ pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
def items(self):
@@ -197,7 +258,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
"""Extractor for webtoons.com artists"""
subcategory = "artist"
- pattern = rf"{BASE_PATTERN}/p/community/([^/?#]+)/u/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)"
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
def items(self):