aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pixiv.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
commita6e995c093de8aae2e91a0787281bb34c0b871eb (patch)
tree2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/pixiv.py
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
-rw-r--r--gallery_dl/extractor/pixiv.py267
1 files changed, 138 insertions, 129 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 73c5c1c..cb0e93e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2023 Mike Fährmann
+# Copyright 2014-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,7 +8,7 @@
"""Extractors for https://www.pixiv.net/"""
-from .common import Extractor, Message
+from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache, memcache
from datetime import datetime, timedelta
@@ -43,6 +43,10 @@ class PixivExtractor(Extractor):
self.meta_comments = self.config("comments")
self.meta_captions = self.config("captions")
+ if self.sanity_workaround or self.meta_captions:
+ self.meta_captions_sub = util.re(
+ r'<a href="/jump\.php\?([^"]+)').sub
+
def items(self):
tags = self.config("tags", "japanese")
if tags == "original":
@@ -85,9 +89,9 @@ class PixivExtractor(Extractor):
if tag["is_registered"]]
if self.meta_captions and not work.get("caption") and \
not work.get("_mypixiv") and not work.get("_ajax"):
- body = self._request_ajax("/illust/" + str(work["id"]))
- if body:
- work["caption"] = text.unescape(body["illustComment"])
+ if body := self._request_ajax("/illust/" + str(work["id"])):
+ work["caption"] = self._sanitize_ajax_caption(
+ body["illustComment"])
if transform_tags:
transform_tags(work)
@@ -115,7 +119,7 @@ class PixivExtractor(Extractor):
return [
{
"url" : img["image_urls"]["original"],
- "suffix": "_p{:02}".format(num),
+ "suffix": f"_p{num:02}",
"_fallback": self._fallback_image(img),
}
for num, img in enumerate(meta_pages)
@@ -198,7 +202,7 @@ class PixivExtractor(Extractor):
for ext in ("jpg", "png", "gif"):
try:
- url = "{}0.{}".format(base, ext)
+ url = f"{base}0.{ext}"
self.request(url, method="HEAD")
break
except exception.HttpError:
@@ -209,8 +213,8 @@ class PixivExtractor(Extractor):
return [
{
- "url": "{}{}.{}".format(base, num, ext),
- "suffix": "_p{:02}".format(num),
+ "url": f"{base}{num}.{ext}",
+ "suffix": f"_p{num:02}",
"_ugoira_frame_index": num,
}
for num in range(len(frames))
@@ -226,9 +230,16 @@ class PixivExtractor(Extractor):
return ({"url": url},)
def _request_ajax(self, endpoint):
- url = "{}/ajax{}".format(self.root, endpoint)
+ url = f"{self.root}/ajax{endpoint}"
try:
- return self.request(url, headers=self.headers_web).json()["body"]
+ data = self.request_json(
+ url, headers=self.headers_web, fatal=False)
+ if not data.get("error"):
+ return data["body"]
+
+ self.log.debug("Server response: %s", util.json_dumps(data))
+ return self.log.error(
+ "'%s'", data.get("message") or "General Error")
except Exception:
return None
@@ -272,7 +283,7 @@ class PixivExtractor(Extractor):
translated_name = None
tags.append({"name": name, "translated_name": translated_name})
- work["caption"] = text.unescape(body["illustComment"])
+ work["caption"] = self._sanitize_ajax_caption(body["illustComment"])
work["page_count"] = count = body["pageCount"]
if count == 1:
return ({"url": url},)
@@ -280,16 +291,15 @@ class PixivExtractor(Extractor):
base, _, ext = url.rpartition("_p0.")
return [
{
- "url" : "{}_p{}.{}".format(base, num, ext),
- "suffix": "_p{:02}".format(num),
+ "url" : f"{base}_p{num}.{ext}",
+ "suffix": f"_p{num:02}",
}
for num in range(count)
]
def _extract_ajax_url(self, body):
try:
- original = body["urls"]["original"]
- if original:
+ if original := body["urls"]["original"]:
return original
except Exception:
pass
@@ -305,12 +315,18 @@ class PixivExtractor(Extractor):
for ext in ("jpg", "png", "gif"):
try:
- url = "{}_p0.{}".format(base, ext)
+ url = f"{base}_p0.{ext}"
self.request(url, method="HEAD")
return url
except exception.HttpError:
pass
+ def _sanitize_ajax_caption(self, caption):
+ if not caption:
+ return ""
+ return text.unescape(self.meta_captions_sub(
+ lambda m: '<a href="' + text.unquote(m[1]), caption))
+
def _fallback_image(self, src):
if isinstance(src, str):
urls = None
@@ -329,8 +345,7 @@ class PixivExtractor(Extractor):
if fmt in urls:
yield urls[fmt]
- @staticmethod
- def _date_from_url(url, offset=timedelta(hours=9)):
+ def _date_from_url(self, url, offset=timedelta(hours=9)):
try:
_, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
return datetime(
@@ -338,12 +353,11 @@ class PixivExtractor(Extractor):
except Exception:
return None
- @staticmethod
- def _make_work(kind, url, user):
+ def _make_work(self, kind, url, user):
p = url.split("/")
return {
- "create_date" : "{}-{}-{}T{}:{}:{}+09:00".format(
- p[5], p[6], p[7], p[8], p[9], p[10]) if len(p) > 9 else None,
+ "create_date" : (f"{p[5]}-{p[6]}-{p[7]}T{p[8]}:{p[9]}:{p[10]}"
+ f"+09:00" if len(p) > 9 else None),
"height" : 0,
"id" : kind,
"image_urls" : None,
@@ -367,23 +381,15 @@ class PixivExtractor(Extractor):
return {}
-class PixivUserExtractor(PixivExtractor):
+class PixivUserExtractor(Dispatch, PixivExtractor):
"""Extractor for a pixiv user profile"""
- subcategory = "user"
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
example = "https://www.pixiv.net/en/users/12345"
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- self.user_id = match.group(1)
-
- def initialize(self):
- pass
-
def items(self):
- base = "{}/users/{}/".format(self.root, self.user_id)
+ base = f"{self.root}/users/{self.groups[0]}/"
return self._dispatch_extractors((
(PixivAvatarExtractor , base + "avatar"),
(PixivBackgroundExtractor , base + "background"),
@@ -391,7 +397,10 @@ class PixivUserExtractor(PixivExtractor):
(PixivFavoriteExtractor , base + "bookmarks/artworks"),
(PixivNovelBookmarkExtractor, base + "bookmarks/novels"),
(PixivNovelUserExtractor , base + "novels"),
- ), ("artworks",))
+ ), ("artworks",), (
+ ("bookmark", "novel-bookmark"),
+ ("user" , "novel-user"),
+ ))
class PixivArtworksExtractor(PixivExtractor):
@@ -434,7 +443,9 @@ class PixivArtworksExtractor(PixivExtractor):
if self.sanity_workaround:
body = self._request_ajax(
- "/user/{}/profile/all".format(self.user_id))
+ f"/user/{self.user_id}/profile/all")
+ if not body:
+ return ()
try:
ajax_ids = list(map(int, body["illusts"]))
ajax_ids.extend(map(int, body["manga"]))
@@ -557,7 +568,7 @@ class PixivWorkExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
- self.illust_id = match.group(1) or match.group(2)
+ self.illust_id = match[1] or match[2]
def works(self):
works = (self.api.illust_detail(self.illust_id),)
@@ -642,7 +653,7 @@ class PixivFavoriteExtractor(PixivExtractor):
for preview in self.api.user_following(self.user_id, restrict):
user = preview["user"]
user["_extractor"] = PixivUserExtractor
- url = "https://www.pixiv.net/users/{}".format(user["id"])
+ url = f"https://www.pixiv.net/users/{user['id']}"
yield Message.Queue, url, user
@@ -657,7 +668,7 @@ class PixivRankingExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
- self.query = match.group(1)
+ self.query = match[1]
self.mode = self.date = None
def works(self):
@@ -693,12 +704,11 @@ class PixivRankingExtractor(PixivExtractor):
try:
self.mode = mode = mode_map[mode]
except KeyError:
- raise exception.StopExtraction("Invalid mode '%s'", mode)
+ raise exception.AbortExtraction(f"Invalid mode '{mode}'")
- date = query.get("date")
- if date:
+ if date := query.get("date"):
if len(date) == 8 and date.isdecimal():
- date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
+ date = f"{date[0:4]}-{date[4:6]}-{date[6:8]}"
else:
self.log.warning("invalid date '%s'", date)
date = None
@@ -746,7 +756,7 @@ class PixivSearchExtractor(PixivExtractor):
try:
self.word = query["word"]
except KeyError:
- raise exception.StopExtraction("Missing search term")
+ raise exception.AbortExtraction("Missing search term")
sort = query.get("order", "date_d")
sort_map = {
@@ -759,7 +769,7 @@ class PixivSearchExtractor(PixivExtractor):
try:
self.sort = sort = sort_map[sort]
except KeyError:
- raise exception.StopExtraction("Invalid search order '%s'", sort)
+ raise exception.AbortExtraction(f"Invalid search order '{sort}'")
target = query.get("s_mode", "s_tag_full")
target_map = {
@@ -770,7 +780,7 @@ class PixivSearchExtractor(PixivExtractor):
try:
self.target = target = target_map[target]
except KeyError:
- raise exception.StopExtraction("Invalid search mode '%s'", target)
+ raise exception.AbortExtraction(f"Invalid search mode '{target}'")
self.date_start = query.get("scd")
self.date_end = query.get("ecd")
@@ -811,7 +821,7 @@ class PixivPixivisionExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
- self.pixivision_id = match.group(1)
+ self.pixivision_id = match[1]
def works(self):
return (
@@ -860,18 +870,71 @@ class PixivSeriesExtractor(PixivExtractor):
yield work
+class PixivSketchExtractor(Extractor):
+ """Extractor for user pages on sketch.pixiv.net"""
+ category = "pixiv"
+ subcategory = "sketch"
+ directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
+ filename_fmt = "{post_id} {id}.{extension}"
+ archive_fmt = "S{user[id]}_{id}"
+ root = "https://sketch.pixiv.net"
+ cookies_domain = ".pixiv.net"
+ pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
+ example = "https://sketch.pixiv.net/@USER"
+
+ def items(self):
+ self.username = self.groups[0]
+ headers = {"Referer": f"{self.root}/@{self.username}"}
+
+ for post in self.posts():
+ media = post["media"]
+ post["post_id"] = post["id"]
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ util.delete_items(post, ("id", "media", "_links"))
+
+ yield Message.Directory, post
+ post["_http_headers"] = headers
+
+ for photo in media:
+ original = photo["photo"]["original"]
+ post["id"] = photo["id"]
+ post["width"] = original["width"]
+ post["height"] = original["height"]
+
+ url = original["url"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ url = f"{self.root}/api/walls/@{self.username}/posts/public.json"
+ headers = {
+ "Accept": "application/vnd.sketch-v4+json",
+ "Referer": self.root + "/",
+ "X-Requested-With": f"{self.root}/@{self.username}",
+ }
+
+ while True:
+ data = self.request_json(url, headers=headers)
+ yield from data["data"]["items"]
+
+ next_url = data["_links"].get("next")
+ if not next_url:
+ return
+ url = self.root + next_url["href"]
+
+
+###############################################################################
+# Novels ######################################################################
+
class PixivNovelExtractor(PixivExtractor):
- """Extractor for pixiv novels"""
- subcategory = "novel"
+ """Base class for pixiv novel extractors"""
+ category = "pixiv-novel"
request_interval = (0.5, 1.5)
- pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
- example = "https://www.pixiv.net/novel/show.php?id=12345"
-
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- self.novel_id = match.group(1)
def items(self):
+ self.novel_id = self.groups[0]
+
tags = self.config("tags", "japanese")
if tags == "original":
transform_tags = None
@@ -928,7 +991,7 @@ class PixivNovelExtractor(PixivExtractor):
path.rpartition(".")[0].replace("_master1200", ""))
novel["date_url"] = self._date_from_url(url)
novel["num"] += 1
- novel["suffix"] = "_p{:02}".format(novel["num"])
+ novel["suffix"] = f"_p{novel['num']:02}"
novel["_fallback"] = (url + ".png",)
url_jpg = url + ".jpg"
text.nameext_from_url(url_jpg, novel)
@@ -960,7 +1023,7 @@ class PixivNovelExtractor(PixivExtractor):
novel.update(image)
novel["date_url"] = self._date_from_url(url)
novel["num"] += 1
- novel["suffix"] = "_p{:02}".format(novel["num"])
+ novel["suffix"] = f"_p{novel['num']:02}"
text.nameext_from_url(url, novel)
yield Message.Url, url, novel
@@ -969,10 +1032,17 @@ class PixivNovelExtractor(PixivExtractor):
novel["date_url"] = None
for illust_id in illusts:
novel["num"] += 1
- novel["suffix"] = "_p{:02}".format(novel["num"])
- url = "{}/artworks/{}".format(self.root, illust_id)
+ novel["suffix"] = f"_p{novel['num']:02}"
+ url = f"{self.root}/artworks/{illust_id}"
yield Message.Queue, url, novel
+
+class PixivNovelNovelExtractor(PixivNovelExtractor):
+ """Extractor for pixiv novels"""
+ subcategory = "novel"
+ pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
+ example = "https://www.pixiv.net/novel/show.php?id=12345"
+
def novels(self):
novel = self.api.novel_detail(self.novel_id)
if self.config("full-series") and novel["series"]:
@@ -983,7 +1053,7 @@ class PixivNovelExtractor(PixivExtractor):
class PixivNovelUserExtractor(PixivNovelExtractor):
"""Extractor for pixiv users' novels"""
- subcategory = "novel-user"
+ subcategory = "user"
pattern = USER_PATTERN + r"/novels"
example = "https://www.pixiv.net/en/users/12345/novels"
@@ -993,7 +1063,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor):
class PixivNovelSeriesExtractor(PixivNovelExtractor):
"""Extractor for pixiv novel series"""
- subcategory = "novel-series"
+ subcategory = "series"
pattern = BASE_PATTERN + r"/novel/series/(\d+)"
example = "https://www.pixiv.net/novel/series/12345"
@@ -1003,86 +1073,25 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor):
class PixivNovelBookmarkExtractor(PixivNovelExtractor):
"""Extractor for bookmarked pixiv novels"""
- subcategory = "novel-bookmark"
+ subcategory = "bookmark"
pattern = (USER_PATTERN + r"/bookmarks/novels"
r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
example = "https://www.pixiv.net/en/users/12345/bookmarks/novels"
- def __init__(self, match):
- PixivNovelExtractor.__init__(self, match)
- self.user_id, self.tag, self.query = match.groups()
-
def novels(self):
- if self.tag:
- tag = text.unquote(self.tag)
- else:
- tag = None
+ user_id, tag, query = self.groups
+ tag = text.unquote(tag) if tag else None
- if text.parse_query(self.query).get("rest") == "hide":
+ if text.parse_query(query).get("rest") == "hide":
restrict = "private"
else:
restrict = "public"
- return self.api.user_bookmarks_novel(self.user_id, tag, restrict)
-
-
-class PixivSketchExtractor(Extractor):
- """Extractor for user pages on sketch.pixiv.net"""
- category = "pixiv"
- subcategory = "sketch"
- directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
- filename_fmt = "{post_id} {id}.{extension}"
- archive_fmt = "S{user[id]}_{id}"
- root = "https://sketch.pixiv.net"
- cookies_domain = ".pixiv.net"
- pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
- example = "https://sketch.pixiv.net/@USER"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.username = match.group(1)
-
- def items(self):
- headers = {"Referer": "{}/@{}".format(self.root, self.username)}
-
- for post in self.posts():
- media = post["media"]
- post["post_id"] = post["id"]
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- util.delete_items(post, ("id", "media", "_links"))
-
- yield Message.Directory, post
- post["_http_headers"] = headers
-
- for photo in media:
- original = photo["photo"]["original"]
- post["id"] = photo["id"]
- post["width"] = original["width"]
- post["height"] = original["height"]
-
- url = original["url"]
- text.nameext_from_url(url, post)
- yield Message.Url, url, post
-
- def posts(self):
- url = "{}/api/walls/@{}/posts/public.json".format(
- self.root, self.username)
- headers = {
- "Accept": "application/vnd.sketch-v4+json",
- "X-Requested-With": "{}/@{}".format(self.root, self.username),
- "Referer": self.root + "/",
- }
-
- while True:
- data = self.request(url, headers=headers).json()
- yield from data["data"]["items"]
+ return self.api.user_bookmarks_novel(user_id, tag, restrict)
- next_url = data["_links"].get("next")
- if not next_url:
- return
- url = self.root + next_url["href"]
+###############################################################################
+# API #########################################################################
class PixivAppAPI():
"""Minimal interface for the Pixiv App API for mobile devices
@@ -1288,7 +1297,7 @@ class PixivAppAPI():
self.extractor.wait(seconds=300)
continue
- raise exception.StopExtraction("API request failed: %s", error)
+ raise exception.AbortExtraction(f"API request failed: {error}")
def _pagination(self, endpoint, params,
key_items="illusts", key_data=None):