diff options
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index b704031..cdaf595 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -172,6 +172,7 @@ class PixivUserExtractor(PixivExtractor): (PixivBackgroundExtractor, base + "background"), (PixivArtworksExtractor , base + "artworks"), (PixivFavoriteExtractor , base + "bookmarks/artworks"), + (PixivNovelUserExtractor , base + "novels"), ), ("artworks",)) @@ -750,6 +751,182 @@ class PixivSeriesExtractor(PixivExtractor): params["p"] += 1 +class PixivNovelExtractor(PixivExtractor): + """Extractor for pixiv novels""" + subcategory = "novel" + request_interval = 1.0 + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" + r"/n(?:ovel/show\.php\?id=|/)(\d+)") + test = ( + ("https://www.pixiv.net/novel/show.php?id=19612040", { + "count": 1, + "content": "8c818474153cbd2f221ee08766e1d634c821d8b4", + "keyword": { + "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、", + "comment_access_control": 0, + "create_date": "2023-04-02T15:18:58+09:00", + "date": "dt:2023-04-02 06:18:58", + "id": 19612040, + "is_bookmarked": False, + "is_muted": False, + "is_mypixiv_only": False, + "is_original": True, + "is_x_restricted": False, + "novel_ai_type": 1, + "page_count": 1, + "rating": "General", + "restrict": 0, + "series": { + "id": 10278364, + "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、" + "どうやら異母妹に霊力を搾取されていたようです〜", + }, + "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着", + "異能", "ざまぁ", "学園", "神嫁"], + "text_length": 5974, + "title": "異母妹から「無能な名無し」と虐げられていた私、" + "どうやら異母妹に霊力を搾取されていたようです(1)", + "user": { + "account": "yukinaga_chifuyu", + "id": 77055466, + }, + "visible": True, + "x_restrict": 0, + }, + }), + # embeds + ("https://www.pixiv.net/novel/show.php?id=16422450", { + "options": (("embeds", True),), + "count": 3, + }), + ("https://www.pixiv.net/n/19612040"), + ) + + def __init__(self, match): + PixivExtractor.__init__(self, match) + self.novel_id = match.group(1) + + def items(self): + tags = self.config("tags", "japanese") + if tags == "original": + transform_tags = None + elif tags == "translated": + def transform_tags(work): + work["tags"] = list(dict.fromkeys( + tag["translated_name"] or tag["name"] + for tag in work["tags"])) + else: + def transform_tags(work): + work["tags"] = [tag["name"] for tag in work["tags"]] + + ratings = {0: "General", 1: "R-18", 2: "R-18G"} + meta_user = self.config("metadata") + meta_bookmark = self.config("metadata-bookmark") + embeds = self.config("embeds") + + if embeds: + headers = { + "User-Agent" : "Mozilla/5.0", + "App-OS" : None, + "App-OS-Version": None, + "App-Version" : None, + "Referer" : self.root + "/", + "Authorization" : None, + } + + novels = self.novels() + if self.max_posts: + novels = itertools.islice(novels, self.max_posts) + for novel in novels: + if meta_user: + novel.update(self.api.user_detail(novel["user"]["id"])) + if meta_bookmark and novel["is_bookmarked"]: + detail = self.api.novel_bookmark_detail(novel["id"]) + novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"] + if tag["is_registered"]] + if transform_tags: + transform_tags(novel) + novel["num"] = 0 + novel["date"] = text.parse_datetime(novel["create_date"]) + novel["rating"] = ratings.get(novel["x_restrict"]) + novel["suffix"] = "" + + yield Message.Directory, novel + + novel["extension"] = "txt" + content = self.api.novel_text(novel["id"])["novel_text"] + yield Message.Url, "text:" + content, novel + + if embeds: + desktop = False + illusts = {} + + for marker in text.extract_iter(content, "[", "]"): + if marker.startswith("[jumpuri:"): + desktop = True + elif marker.startswith("pixivimage:"): + illusts[marker[11:].partition("-")[0]] = None + + if desktop: + novel_id = str(novel["id"]) + url = "{}/novel/show.php?id={}".format( + self.root, novel_id) + data = util.json_loads(text.extr( + self.request(url, headers=headers).text, + "id=\"meta-preload-data\" content='", "'")) + + for image in (data["novel"][novel_id] + ["textEmbeddedImages"]).values(): + url = image.pop("urls")["original"] + novel.update(image) + novel["date_url"] = self._date_from_url(url) + novel["num"] += 1 + novel["suffix"] = "_p{:02}".format(novel["num"]) + text.nameext_from_url(url, novel) + yield Message.Url, url, novel + + if illusts: + novel["_extractor"] = PixivWorkExtractor + novel["date_url"] = None + for illust_id in illusts: + novel["num"] += 1 + novel["suffix"] = "_p{:02}".format(novel["num"]) + url = "{}/artworks/{}".format(self.root, illust_id) + yield Message.Queue, url, novel + + def novels(self): + return (self.api.novel_detail(self.novel_id),) + + +class PixivNovelUserExtractor(PixivNovelExtractor): + """Extractor for pixiv users' novels""" + subcategory = "novel-user" + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" + r"/(?:en/)?users/(\d+)/novels") + test = ("https://www.pixiv.net/en/users/77055466/novels", { + "pattern": "^text:", + "range": "1-5", + "count": 5, + }) + + def novels(self): + return self.api.user_novels(self.novel_id) + + +class PixivNovelSeriesExtractor(PixivNovelExtractor): + """Extractor for pixiv novel series""" + subcategory = "novel-series" + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" + r"/novel/series/(\d+)") + test = ("https://www.pixiv.net/novel/series/10278364", { + "count": 4, + "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2", + }) + + def novels(self): + return self.api.novel_series(self.novel_id) + + class PixivSketchExtractor(Extractor): """Extractor for user pages on sketch.pixiv.net""" category = "pixiv" @@ -907,6 +1084,23 @@ class PixivAppAPI(): params = {"illust_id": illust_id} return self._pagination("/v2/illust/related", params) + def novel_bookmark_detail(self, novel_id): + params = {"novel_id": novel_id} + return self._call( + "/v2/novel/bookmark/detail", params)["bookmark_detail"] + + def novel_detail(self, novel_id): + params = {"novel_id": novel_id} + return self._call("/v2/novel/detail", params)["novel"] + + def novel_series(self, series_id): + params = {"series_id": series_id} + return self._pagination("/v1/novel/series", params, "novels") + + def novel_text(self, novel_id): + params = {"novel_id": novel_id} + return self._call("/v1/novel/text", params) + def search_illust(self, word, sort=None, target=None, duration=None, date_start=None, date_end=None): params = {"word": word, "search_target": target, @@ -938,6 +1132,10 @@ class PixivAppAPI(): params = {"user_id": user_id} return self._pagination("/v1/user/illusts", params) + def user_novels(self, user_id): + params = {"user_id": user_id} + return self._pagination("/v1/user/novels", params, "novels") + def ugoira_metadata(self, illust_id): params = {"illust_id": illust_id} return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"] |
