diff options
Diffstat (limited to 'gallery_dl/extractor/naverwebtoon.py')
| -rw-r--r-- | gallery_dl/extractor/naverwebtoon.py | 89 |
1 files changed, 59 insertions, 30 deletions
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py index fa91f76..d6292af 100644 --- a/gallery_dl/extractor/naverwebtoon.py +++ b/gallery_dl/extractor/naverwebtoon.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2021 Seonghyeon Cho -# Copyright 2022 Mike Fährmann +# Copyright 2022-2033 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ from .common import GalleryExtractor, Extractor, Message from .. import text -import re BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com" r"/(webtoon|challenge|bestChallenge)") @@ -34,18 +33,44 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): "?titleId=26458&no=1&weekday=tue"), { "url": "47a956ba8c7a837213d5985f50c569fcff986f75", "content": "3806b6e8befbb1920048de9888dfce6220f69a60", - "count": 14 + "count": 14, + "keyword": { + "author": ["김규삼"], + "artist": ["김규삼"], + "comic": "N의등대-눈의등대", + "count": 14, + "episode": "1", + "extension": "jpg", + "num": int, + "tags": ["스릴러", "완결무료", "완결스릴러"], + "title": "n의 등대 - 눈의 등대 1화", + "title_id": "26458", + }, }), (("https://comic.naver.com/challenge/detail" "?titleId=765124&no=1"), { - "pattern": r"https://image-comic\.pstatic\.net/nas" + "pattern": r"https://image-comic\.pstatic\.net" r"/user_contents_data/challenge_comic/2021/01/19" r"/342586/upload_7149856273586337846\.jpeg", "count": 1, + "keyword": { + "author": ["kemi****"], + "artist": [], + "comic": "우니 모두의 이야기", + "count": 1, + "episode": "1", + "extension": "jpeg", + "filename": "upload_7149856273586337846", + "num": 1, + "tags": ["일상툰", "우니모두의이야기", "퇴사", "입사", "신입사원", + "사회초년생", "회사원", "20대"], + "title": "퇴사하다", + "title_id": "765124", + }, }), (("https://comic.naver.com/bestChallenge/detail.nhn" "?titleId=771467&no=3"), { - "pattern": r"https://image-comic\.pstatic\.net/nas" + "pattern": r"https://image-comic\.pstatic\.net" r"/user_contents_data/challenge_comic/2021/04/28" r"/345534/upload_3617293622396203109\.jpeg", "count": 1, @@ -66,12 +91,14 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): return { "title_id": self.title_id, "episode" : self.episode, - "title" : extr('property="og:title" content="', '"'), - "comic" : extr('<h2>', '<span'), - "authors" : extr('class="wrt_nm">', '</span>').strip().split("/"), - "description": extr('<p class="txt">', '</p>'), - "genre" : extr('<span class="genre">', '</span>'), - "date" : extr('<dd class="date">', '</dd>'), + "comic" : extr("titleName: '", "'"), + "tags" : [t.strip() for t in text.extract_iter( + extr("tagList: [", "}],"), '"tagName":"', '"')], + "title" : extr('"subtitle":"', '"'), + "author" : [a.strip() for a in text.extract_iter( + extr('"writers":[', ']'), '"name":"', '"')], + "artist" : [a.strip() for a in text.extract_iter( + extr('"painters":[', ']'), '"name":"', '"')] } @staticmethod @@ -87,7 +114,7 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor): subcategory = "comic" categorytransfer = True - pattern = (BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)") + pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)" test = ( ("https://comic.naver.com/webtoon/list?titleId=22073", { "pattern": NaverwebtoonEpisodeExtractor.pattern, @@ -109,28 +136,30 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor): query = text.parse_query(query) self.title_id = query.get("titleId") self.page_no = text.parse_int(query.get("page"), 1) + self.sort = query.get("sort", "ASC") def items(self): - url = "{}/{}/list".format(self.root, self.path) - params = {"titleId": self.title_id, "page": self.page_no} - data = {"_extractor": NaverwebtoonEpisodeExtractor} + base = "{}/{}/detail?titleId={}&no=".format( + self.root, self.path, self.title_id) + + url = self.root + "/api/article/list" + headers = { + "Accept": "application/json, text/plain, */*", + "Referer": self.root + "/", + } + params = { + "titleId": self.title_id, + "page" : self.page_no, + "sort" : self.sort, + } while True: - page = self.request(url, params=params).text - data["page"] = self.page_no + data = self.request(url, headers=headers, params=params).json() - for episode_url in self.get_episode_urls(page): - yield Message.Queue, episode_url, data + for article in data["articleList"]: + article["_extractor"] = NaverwebtoonEpisodeExtractor + yield Message.Queue, base + str(article["no"]), article - if 'class="next"' not in page: + params["page"] = data["pageInfo"]["nextPage"] + if not params["page"]: return - params["page"] += 1 - - def get_episode_urls(self, page): - """Extract and return all episode urls in page""" - return [ - self.root + path - for path in re.findall( - r'<a href="(/(?:webtoon|challenge|bestChallenge)' - r'/detail\?[^"]+)', page) - ][::2] |
