# -*- coding: utf-8 -*- # Copyright 2014-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.pixiv.net/""" from .common import Extractor, Message, Dispatch from .. import text, util, dt, exception from ..cache import cache, memcache import itertools import hashlib BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net" USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)" class PixivExtractor(Extractor): """Base class for pixiv extractors""" category = "pixiv" root = "https://www.pixiv.net" directory_fmt = ("{category}", "{user[id]} {user[account]}") filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" cookies_domain = ".pixiv.net" limit_url = "https://s.pximg.net/common/images/limit_" # https://s.pximg.net/common/images/limit_sanity_level_360.png # https://s.pximg.net/common/images/limit_unviewable_360.png # https://s.pximg.net/common/images/limit_mypixiv_360.png def _init(self): self.api = PixivAppAPI(self) self.load_ugoira = self.config("ugoira", True) self.load_ugoira_original = (self.load_ugoira == "original") self.max_posts = self.config("max-posts", 0) self.sanity_workaround = self.config("sanity", True) self.meta_user = self.config("metadata") self.meta_bookmark = self.config("metadata-bookmark") self.meta_comments = self.config("comments") self.meta_captions = self.config("captions") if self.sanity_workaround or self.meta_captions: self.meta_captions_sub = text.re( r' 9 else None), "height" : 0, "id" : kind, "image_urls" : None, "meta_pages" : (), "meta_single_page": {"original_image_url": url}, "page_count" : 1, "sanity_level" : 0, "tags" : (), "title" : kind, "type" : kind, "user" : user, "width" : 0, "x_restrict" : 0, } def works(self): """Return an iterable containing all relevant 'work' objects""" def metadata(self): """Collect metadata for extractor job""" return {} class PixivUserExtractor(Dispatch, PixivExtractor): """Extractor for a pixiv user profile""" pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") example = "https://www.pixiv.net/en/users/12345" def items(self): if (inc := self.config("include")) and ( "sketch" in inc or inc == "all"): Extractor.initialize(self) user = PixivAppAPI(self).user_detail(self.groups[0]) sketch = "https://sketch.pixiv.net/@" + user["user"]["account"] else: sketch = "" base = f"{self.root}/users/{self.groups[0]}/" return self._dispatch_extractors(( (PixivAvatarExtractor , base + "avatar"), (PixivBackgroundExtractor , base + "background"), (PixivArtworksExtractor , base + "artworks"), (PixivFavoriteExtractor , base + "bookmarks/artworks"), (PixivNovelBookmarkExtractor, base + "bookmarks/novels"), (PixivNovelUserExtractor , base + "novels"), (PixivSketchExtractor , sketch), ), ("artworks",), ( ("bookmark", "novel-bookmark"), ("user" , "novel-user"), )) class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") example = "https://www.pixiv.net/en/users/12345/artworks" _warn_phpsessid = True def _init(self): PixivExtractor._init(self) u1, t1, u2, t2 = self.groups if t1: t1 = text.unquote(t1) elif t2: t2 = text.parse_query(t2).get("tag") self.user_id = u1 or u2 self.tag = t1 or t2 if self.sanity_workaround and self._warn_phpsessid: PixivArtworksExtractor._warn_phpsessid = False if not self.cookies.get("PHPSESSID", domain=self.cookies_domain): self.log.warning("No 'PHPSESSID' cookie set. Can detect only " "non R-18 'limit_sanity_level' works.") def metadata(self): if self.config("metadata"): self.api.user_detail(self.user_id) return {} def works(self): works = self.api.user_illusts(self.user_id) if self.sanity_workaround and (body := self._request_ajax( f"/user/{self.user_id}/profile/all")): try: ajax_ids = list(map(int, body["illusts"])) ajax_ids.extend(map(int, body["manga"])) ajax_ids.sort() except Exception as exc: self.log.traceback(exc) self.log.warning("u%s: Failed to collect artwork IDs " "using AJAX API", self.user_id) else: works = self._extend_sanity(works, ajax_ids) if self.tag: tag = self.tag.lower() works = ( work for work in works if tag in [t["name"].lower() for t in work["tags"]] ) return works def _extend_sanity(self, works, ajax_ids): user = {"id": 1} index = len(ajax_ids) - 1 for work in works: while index >= 0: work_id = work["id"] ajax_id = ajax_ids[index] if ajax_id == work_id: index -= 1 break elif ajax_id > work_id: index -= 1 self.log.debug("Inserting work %s", ajax_id) yield self._make_work(ajax_id, self.limit_url, user) else: # ajax_id < work_id break yield work while index >= 0: ajax_id = ajax_ids[index] self.log.debug("Inserting work %s", ajax_id) yield self._make_work(ajax_id, self.limit_url, user) index -= 1 class PixivAvatarExtractor(PixivExtractor): """Extractor for pixiv avatars""" subcategory = "avatar" filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "avatar_{user[id]}_{date}" pattern = USER_PATTERN + r"/avatar" example = "https://www.pixiv.net/en/users/12345/avatar" def _init(self): PixivExtractor._init(self) self.sanity_workaround = self.meta_comments = False def works(self): user = self.api.user_detail(self.groups[0])["user"] url = user["profile_image_urls"]["medium"].replace("_170.", ".") return (self._make_work("avatar", url, user),) class PixivBackgroundExtractor(PixivExtractor): """Extractor for pixiv background banners""" subcategory = "background" filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" pattern = USER_PATTERN + "/background" example = "https://www.pixiv.net/en/users/12345/background" def _init(self): PixivExtractor._init(self) self.sanity_workaround = self.meta_comments = False def works(self): detail = self.api.user_detail(self.groups[0]) url = detail["profile"]["background_image_url"] if not url: return () if "/c/" in url: parts = url.split("/") del parts[3:5] url = "/".join(parts) url = url.replace("_master1200.", ".") work = self._make_work("background", url, detail["user"]) if url.endswith(".jpg"): url = url[:-4] work["_fallback"] = (url + ".png", url + ".gif") return (work,) class PixivMeExtractor(PixivExtractor): """Extractor for pixiv.me URLs""" subcategory = "me" pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)" example = "https://pixiv.me/USER" def items(self): url = "https://pixiv.me/" + self.groups[0] location = self.request_location(url, notfound="user") yield Message.Queue, location, {"_extractor": PixivUserExtractor} class PixivWorkExtractor(PixivExtractor): """Extractor for a single pixiv work/illustration""" subcategory = "work" pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?ph?ixiv\.net" r"/(?:(?:en/)?artworks/" r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)" r"|(?:i(?:\d+\.pixiv|\.pximg)\.net" r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)" r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))") example = "https://www.pixiv.net/artworks/12345" def __init__(self, match): PixivExtractor.__init__(self, match) self.illust_id = match[1] or match[2] def works(self): works = (self.api.illust_detail(self.illust_id),) if self.config("related", False): related = self.api.illust_related(self.illust_id) works = itertools.chain(works, related) return works class PixivUnlistedExtractor(PixivExtractor): """Extractor for a unlisted pixiv illustrations""" subcategory = "unlisted" pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)" example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0" def _extract_files(self, work): body = self._request_ajax("/illust/unlisted/" + work["id"]) work["id_unlisted"] = work["id"] work["id"] = text.parse_int(body["illustId"]) return self._extract_ajax(work, body) def works(self): return ({"id": self.groups[0], "user": {"id": 1}},) class PixivFavoriteExtractor(PixivExtractor): """Extractor for all favorites/bookmarks of a pixiv user""" subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" pattern = (BASE_PATTERN + r"/(?:(?:en/)?" r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?" r"|bookmark\.php)(?:\?([^#]*))?") example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks" def __init__(self, match): uid, kind, self.tag, query = match.groups() query = text.parse_query(query) if not uid: uid = query.get("id") if not uid: self.subcategory = "bookmark" if kind == "following" or query.get("type") == "user": self.subcategory = "following" self.items = self._items_following PixivExtractor.__init__(self, match) self.query = query self.user_id = uid def works(self): tag = None if "tag" in self.query: tag = text.unquote(self.query["tag"]) elif self.tag: tag = text.unquote(self.tag) restrict = "public" if self.query.get("rest") == "hide": restrict = "private" return self.api.user_bookmarks_illust(self.user_id, tag, restrict) def metadata(self): if self.user_id: user = self.api.user_detail(self.user_id)["user"] else: self.api.login() user = self.api.user self.user_id = user["id"] return {"user_bookmark": user} def _items_following(self): restrict = "public" if self.query.get("rest") == "hide": restrict = "private" for preview in self.api.user_following(self.user_id, restrict): user = preview["user"] user["_extractor"] = PixivUserExtractor url = "https://www.pixiv.net/users/" + str(user["id"]) yield Message.Queue, url, user class PixivRankingExtractor(PixivExtractor): """Extractor for pixiv ranking pages""" subcategory = "ranking" archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}" directory_fmt = ("{category}", "rankings", "{ranking[mode]}", "{ranking[date]}") pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?" example = "https://www.pixiv.net/ranking.php" def __init__(self, match): PixivExtractor.__init__(self, match) self.query = match[1] self.mode = self.date = None def works(self): ranking = self.ranking works = self.api.illust_ranking(self.mode, self.date) if self.type: works = filter(lambda work, t=self.type: work["type"] == t, works) for ranking["rank"], work in enumerate(works, 1): yield work def metadata(self): query = text.parse_query(self.query) mode = query.get("mode", "daily").lower() mode_map = { "daily": "day", "daily_r18": "day_r18", "daily_ai": "day_ai", "daily_r18_ai": "day_r18_ai", "weekly": "week", "weekly_r18": "week_r18", "monthly": "month", "male": "day_male", "male_r18": "day_male_r18", "female": "day_female", "female_r18": "day_female_r18", "original": "week_original", "rookie": "week_rookie", "r18g": "week_r18g", } try: self.mode = mode = mode_map[mode] except KeyError: raise exception.AbortExtraction(f"Invalid mode '{mode}'") if date := query.get("date"): if len(date) == 8 and date.isdecimal(): date = f"{date[0:4]}-{date[4:6]}-{date[6:8]}" else: self.log.warning("invalid date '%s'", date) date = None if not date: date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d") self.date = date self.type = type = query.get("content") self.ranking = ranking = { "mode": mode, "date": self.date, "rank": 0, "type": type or "all", } return {"ranking": ranking} class PixivSearchExtractor(PixivExtractor): """Extractor for pixiv search results""" subcategory = "search" archive_fmt = "s_{search[word]}_{id}{num}.{extension}" directory_fmt = ("{category}", "search", "{search[word]}") pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") example = "https://www.pixiv.net/en/tags/TAG" def __init__(self, match): PixivExtractor.__init__(self, match) self.word, self.query = match.groups() self.sort = self.target = None def works(self): return self.api.search_illust( self.word, self.sort, self.target, date_start=self.date_start, date_end=self.date_end) def metadata(self): query = text.parse_query(self.query) if self.word: self.word = text.unquote(self.word) else: try: self.word = query["word"] except KeyError: raise exception.AbortExtraction("Missing search term") sort = query.get("order", "date_d") sort_map = { "date": "date_asc", "date_d": "date_desc", "popular_d": "popular_desc", "popular_male_d": "popular_male_desc", "popular_female_d": "popular_female_desc", } try: self.sort = sort = sort_map[sort] except KeyError: raise exception.AbortExtraction(f"Invalid search order '{sort}'") target = query.get("s_mode", "s_tag_full") target_map = { "s_tag": "partial_match_for_tags", "s_tag_full": "exact_match_for_tags", "s_tc": "title_and_caption", } try: self.target = target = target_map[target] except KeyError: raise exception.AbortExtraction(f"Invalid search mode '{target}'") self.date_start = query.get("scd") self.date_end = query.get("ecd") return {"search": { "word": self.word, "sort": self.sort, "target": self.target, "date_start": self.date_start, "date_end": self.date_end, }} class PixivFollowExtractor(PixivExtractor): """Extractor for new illustrations from your followed artists""" subcategory = "follow" archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}" directory_fmt = ("{category}", "following") pattern = BASE_PATTERN + r"/bookmark_new_illust\.php" example = "https://www.pixiv.net/bookmark_new_illust.php" def works(self): return self.api.illust_follow() def metadata(self): self.api.login() return {"user_follow": self.api.user} class PixivPixivisionExtractor(PixivExtractor): """Extractor for illustrations from a pixivision article""" subcategory = "pixivision" directory_fmt = ("{category}", "pixivision", "{pixivision_id} {pixivision_title}") archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}" pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)" example = "https://www.pixivision.net/en/a/12345" def __init__(self, match): PixivExtractor.__init__(self, match) self.pixivision_id = match[1] def works(self): return ( self.api.illust_detail(illust_id.partition("?")[0]) for illust_id in util.unique_sequence(text.extract_iter( self.page, '', '<') return { "pixivision_id" : self.pixivision_id, "pixivision_title": text.unescape(title), } class PixivSeriesExtractor(PixivExtractor): """Extractor for illustrations from a Pixiv series""" subcategory = "series" directory_fmt = ("{category}", "{user[id]} {user[account]}", "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" example = "https://www.pixiv.net/user/12345/series/12345" def __init__(self, match): PixivExtractor.__init__(self, match) self.user_id, self.series_id = match.groups() def works(self): series = None for work in self.api.illust_series(self.series_id): if series is None: series = self.api.data series["total"] = num_series = series.pop("series_work_count") else: num_series -= 1 work["num_series"] = num_series work["series"] = series yield work class PixivSketchExtractor(Extractor): """Extractor for user pages on sketch.pixiv.net""" category = "pixiv" subcategory = "sketch" directory_fmt = ("{category}", "sketch", "{user[unique_name]}") filename_fmt = "{post_id} {id}.{extension}" archive_fmt = "S{user[id]}_{id}" root = "https://sketch.pixiv.net" cookies_domain = ".pixiv.net" pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)" example = "https://sketch.pixiv.net/@USER" def items(self): self.username = self.groups[0] headers = {"Referer": f"{self.root}/@{self.username}"} for post in self.posts(): media = post["media"] post["post_id"] = post["id"] post["date"] = dt.parse_iso(post["created_at"]) util.delete_items(post, ("id", "media", "_links")) yield Message.Directory, "", post post["_http_headers"] = headers for photo in media: original = photo["photo"]["original"] post["id"] = photo["id"] post["width"] = original["width"] post["height"] = original["height"] url = original["url"] text.nameext_from_url(url, post) yield Message.Url, url, post def posts(self): url = f"{self.root}/api/walls/@{self.username}/posts/public.json" headers = { "Accept": "application/vnd.sketch-v4+json", "Referer": self.root + "/", "X-Requested-With": f"{self.root}/@{self.username}", } while True: data = self.request_json(url, headers=headers) yield from data["data"]["items"] next_url = data["_links"].get("next") if not next_url: return url = self.root + next_url["href"] ############################################################################### # Novels ###################################################################### class PixivNovelExtractor(PixivExtractor): """Base class for pixiv novel extractors""" category = "pixiv-novel" request_interval = (0.5, 1.5) def items(self): self.novel_id = self.groups[0] tags = self.config("tags", "japanese") if tags == "original": transform_tags = None elif tags == "translated": def transform_tags(work): work["tags"] = list(dict.fromkeys( tag["translated_name"] or tag["name"] for tag in work["tags"])) else: def transform_tags(work): work["tags"] = [tag["name"] for tag in work["tags"]] ratings = {0: "General", 1: "R-18", 2: "R-18G"} embeds = self.config("embeds") covers = self.config("covers") novels = self.novels() if self.max_posts: novels = itertools.islice(novels, self.max_posts) for novel in novels: if self.meta_user: novel.update(self.api.user_detail(str(novel["user"]["id"]))) if self.meta_comments: if novel["total_comments"]: novel["comments"] = list( self.api.novel_comments(novel["id"])) else: novel["comments"] = () if self.meta_bookmark and novel["is_bookmarked"]: detail = self.api.novel_bookmark_detail(novel["id"]) novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"] if tag["is_registered"]] if transform_tags: transform_tags(novel) novel["num"] = 0 novel["date"] = dt.parse_iso(novel["create_date"]) novel["rating"] = ratings.get(novel["x_restrict"]) novel["suffix"] = "" yield Message.Directory, "", novel try: content = self.api.novel_webview(novel["id"])["text"] except Exception: self.log.warning("Unable to download novel %s", novel["id"]) continue novel["extension"] = "txt" yield Message.Url, "text:" + content, novel if covers: path = novel["image_urls"]["large"].partition("/img/")[2] url = ("https://i.pximg.net/novel-cover-original/img/" + path.rpartition(".")[0].replace("_master1200", "")) novel["date_url"] = self._date_from_url(url) novel["num"] += 1 novel["suffix"] = f"_p{novel['num']:02}" novel["_fallback"] = (url + ".png",) url_jpg = url + ".jpg" text.nameext_from_url(url_jpg, novel) yield Message.Url, url_jpg, novel del novel["_fallback"] if embeds: desktop = False illusts = {} for marker in text.extract_iter(content, "[", "]"): if marker.startswith("uploadedimage:"): desktop = True elif marker.startswith("pixivimage:"): illusts[marker[11:].partition("-")[0]] = None if desktop: try: body = self._request_ajax("/novel/" + str(novel["id"])) images = body["textEmbeddedImages"].values() except Exception as exc: self.log.warning( "%s: Failed to get embedded novel images (%s: %s)", novel["id"], exc.__class__.__name__, exc) images = () for image in images: url = image.pop("urls")["original"] novel.update(image) novel["date_url"] = self._date_from_url(url) novel["num"] += 1 novel["suffix"] = f"_p{novel['num']:02}" text.nameext_from_url(url, novel) yield Message.Url, url, novel if illusts: novel["_extractor"] = PixivWorkExtractor novel["date_url"] = None for illust_id in illusts: novel["num"] += 1 novel["suffix"] = f"_p{novel['num']:02}" url = f"{self.root}/artworks/{illust_id}" yield Message.Queue, url, novel class PixivNovelNovelExtractor(PixivNovelExtractor): """Extractor for pixiv novels""" subcategory = "novel" pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" example = "https://www.pixiv.net/novel/show.php?id=12345" def novels(self): novel = self.api.novel_detail(self.novel_id) if self.config("full-series") and novel["series"]: self.subcategory = PixivNovelSeriesExtractor.subcategory return self.api.novel_series(novel["series"]["id"]) return (novel,) class PixivNovelUserExtractor(PixivNovelExtractor): """Extractor for pixiv users' novels""" subcategory = "user" pattern = USER_PATTERN + r"/novels" example = "https://www.pixiv.net/en/users/12345/novels" def novels(self): return self.api.user_novels(self.novel_id) class PixivNovelSeriesExtractor(PixivNovelExtractor): """Extractor for pixiv novel series""" subcategory = "series" pattern = BASE_PATTERN + r"/novel/series/(\d+)" example = "https://www.pixiv.net/novel/series/12345" def novels(self): return self.api.novel_series(self.novel_id) class PixivNovelBookmarkExtractor(PixivNovelExtractor): """Extractor for bookmarked pixiv novels""" subcategory = "bookmark" pattern = (USER_PATTERN + r"/bookmarks/novels" r"(?:/([^/?#]+))?(?:/?\?([^#]+))?") example = "https://www.pixiv.net/en/users/12345/bookmarks/novels" def novels(self): user_id, tag, query = self.groups tag = text.unquote(tag) if tag else None if text.parse_query(query).get("rest") == "hide": restrict = "private" else: restrict = "public" return self.api.user_bookmarks_novel(user_id, tag, restrict) ############################################################################### # API ######################################################################### class PixivAppAPI(): """Minimal interface for the Pixiv App API for mobile devices For a more complete implementation or documentation, see - https://github.com/upbit/pixivpy - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1 """ CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT" CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj" HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34" "f83e4aaf4aa829ce78c231e05b0bae2c") def __init__(self, extractor): self.extractor = extractor self.log = extractor.log self.username = extractor._get_auth_info()[0] self.user = None extractor.headers_web = extractor.session.headers.copy() extractor.session.headers.update({ "App-OS" : "ios", "App-OS-Version": "16.7.2", "App-Version" : "7.19.1", "User-Agent" : "PixivIOSApp/7.19.1 (iOS 16.7.2; iPhone12,8)", "Referer" : "https://app-api.pixiv.net/", }) self.client_id = extractor.config( "client-id", self.CLIENT_ID) self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) token = extractor.config("refresh-token") if token is None or token == "cache": token = _refresh_token_cache(self.username) self.refresh_token = token def login(self): """Login and gain an access token""" self.user, auth = self._login_impl(self.username) self.extractor.session.headers["Authorization"] = auth @cache(maxage=3600, keyarg=1) def _login_impl(self, username): if not self.refresh_token: raise exception.AuthenticationError( "'refresh-token' required.\n" "Run `gallery-dl oauth:pixiv` to get one.") self.log.info("Refreshing access token") url = "https://oauth.secure.pixiv.net/auth/token" data = { "client_id" : self.client_id, "client_secret" : self.client_secret, "grant_type" : "refresh_token", "refresh_token" : self.refresh_token, "get_secure_url": "1", } time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00") headers = { "X-Client-Time": time, "X-Client-Hash": hashlib.md5( (time + self.HASH_SECRET).encode()).hexdigest(), } response = self.extractor.request( url, method="POST", headers=headers, data=data, fatal=False) if response.status_code >= 400: self.log.debug(response.text) raise exception.AuthenticationError("Invalid refresh token") data = response.json()["response"] return data["user"], "Bearer " + data["access_token"] def illust_detail(self, illust_id): params = {"illust_id": illust_id} return self._call("/v1/illust/detail", params)["illust"] def illust_bookmark_detail(self, illust_id): params = {"illust_id": illust_id} return self._call( "/v2/illust/bookmark/detail", params)["bookmark_detail"] def illust_comments(self, illust_id): params = {"illust_id": illust_id} return self._pagination("/v3/illust/comments", params, "comments") def illust_follow(self, restrict="all"): params = {"restrict": restrict} return self._pagination("/v2/illust/follow", params) def illust_ranking(self, mode="day", date=None): params = {"mode": mode, "date": date} return self._pagination("/v1/illust/ranking", params) def illust_related(self, illust_id): params = {"illust_id": illust_id} return self._pagination("/v2/illust/related", params) def illust_series(self, series_id, offset=0): params = {"illust_series_id": series_id, "offset": offset} return self._pagination("/v1/illust/series", params, key_data="illust_series_detail") def novel_bookmark_detail(self, novel_id): params = {"novel_id": novel_id} return self._call( "/v2/novel/bookmark/detail", params)["bookmark_detail"] def novel_comments(self, novel_id): params = {"novel_id": novel_id} return self._pagination("/v1/novel/comments", params, "comments") def novel_detail(self, novel_id): params = {"novel_id": novel_id} return self._call("/v2/novel/detail", params)["novel"] def novel_series(self, series_id): params = {"series_id": series_id} return self._pagination("/v1/novel/series", params, "novels") def novel_text(self, novel_id): params = {"novel_id": novel_id} return self._call("/v1/novel/text", params) def novel_webview(self, novel_id): params = {"id": novel_id, "viewer_version": "20221031_ai"} return self._call( "/webview/v2/novel", params, self._novel_webview_parse) def _novel_webview_parse(self, response): return util.json_loads(text.extr( response.text, "novel: ", ",\n")) def search_illust(self, word, sort=None, target=None, duration=None, date_start=None, date_end=None): params = {"word": word, "search_target": target, "sort": sort, "duration": duration, "start_date": date_start, "end_date": date_end} return self._pagination_search("/v1/search/illust", params) def user_bookmarks_illust(self, user_id, tag=None, restrict="public"): """Return illusts bookmarked by a user""" params = {"user_id": user_id, "tag": tag, "restrict": restrict} return self._pagination("/v1/user/bookmarks/illust", params) def user_bookmarks_novel(self, user_id, tag=None, restrict="public"): """Return novels bookmarked by a user""" params = {"user_id": user_id, "tag": tag, "restrict": restrict} return self._pagination("/v1/user/bookmarks/novel", params, "novels") def user_bookmark_tags_illust(self, user_id, restrict="public"): """Return bookmark tags defined by a user""" params = {"user_id": user_id, "restrict": restrict} return self._pagination( "/v1/user/bookmark-tags/illust", params, "bookmark_tags") @memcache(keyarg=1) def user_detail(self, user_id, fatal=True): params = {"user_id": user_id} return self._call("/v1/user/detail", params, fatal=fatal) def user_following(self, user_id, restrict="public"): params = {"user_id": user_id, "restrict": restrict} return self._pagination("/v1/user/following", params, "user_previews") def user_illusts(self, user_id): params = {"user_id": user_id} return self._pagination("/v1/user/illusts", params, key_user="user") def user_novels(self, user_id): params = {"user_id": user_id} return self._pagination("/v1/user/novels", params, "novels") def ugoira_metadata(self, illust_id): params = {"illust_id": illust_id} return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"] def _call(self, endpoint, params=None, parse=None, fatal=True): url = "https://app-api.pixiv.net" + endpoint while True: self.login() response = self.extractor.request(url, params=params, fatal=False) if parse: data = parse(response) else: data = response.json() if "error" not in data or not fatal: return data self.log.debug(data) if response.status_code == 404: raise exception.NotFoundError() error = data["error"] if "rate limit" in (error.get("message") or "").lower(): self.extractor.wait(seconds=300) continue msg = (f"'{msg}'" if (msg := error.get("user_message")) else f"'{msg}'" if (msg := error.get("message")) else error) raise exception.AbortExtraction("API request failed: " + msg) def _pagination(self, endpoint, params, key_items="illusts", key_data=None, key_user=None): data = self._call(endpoint, params) if key_data is not None: self.data = data.get(key_data) if key_user is not None and not data[key_user].get("id"): user = self.user_detail(self.extractor.user_id, fatal=False) if user.get("error"): raise exception.NotFoundError("user") return while True: yield from data[key_items] if not data["next_url"]: return query = data["next_url"].rpartition("?")[2] params = text.parse_query(query) data = self._call(endpoint, params) def _pagination_search(self, endpoint, params): sort = params["sort"] if sort == "date_desc": date_key = "end_date" date_off = dt.timedelta(days=1) date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731 elif sort == "date_asc": date_key = "start_date" date_off = dt.timedelta(days=-1) date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731 else: date_key = None date_last = None while True: data = self._call(endpoint, params) if date_last is None: yield from data["illusts"] else: works = data["illusts"] if date_cmp(date_last, works[-1]["create_date"]): for work in works: if date_last is None: yield work elif date_cmp(date_last, work["create_date"]): date_last = None if not (next_url := data.get("next_url")): return query = next_url.rpartition("?")[2] params = text.parse_query(query) if date_key and text.parse_int(params.get("offset")) >= 5000: date_last = data["illusts"][-1]["create_date"] date_val = (dt.parse_iso(date_last) + date_off).strftime( "%Y-%m-%d") self.log.info("Reached 'offset' >= 5000; " "Updating '%s' to '%s'", date_key, date_val) params[date_key] = date_val params.pop("offset", None) @cache(maxage=36500*86400, keyarg=0) def _refresh_token_cache(username): return None