summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pixiv.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
-rw-r--r--gallery_dl/extractor/pixiv.py300
1 files changed, 225 insertions, 75 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index c908e44..c2d1243 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -27,11 +27,17 @@ class PixivExtractor(Extractor):
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
cookies_domain = None
+ sanity_url = "https://s.pximg.net/common/images/limit_sanity_level_360.png"
+ mypixiv_url = "https://s.pximg.net/common/images/limit_mypixiv_360.png"
def _init(self):
self.api = PixivAppAPI(self)
self.load_ugoira = self.config("ugoira", True)
self.max_posts = self.config("max-posts", 0)
+ self.sanity_workaround = self.config("sanity", True)
+ self.meta_user = self.config("metadata")
+ self.meta_bookmark = self.config("metadata-bookmark")
+ self.meta_comments = self.config("comments")
def items(self):
tags = self.config("tags", "japanese")
@@ -46,11 +52,7 @@ class PixivExtractor(Extractor):
def transform_tags(work):
work["tags"] = [tag["name"] for tag in work["tags"]]
- url_sanity = ("https://s.pximg.net/common/images"
- "/limit_sanity_level_360.png")
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
- meta_user = self.config("metadata")
- meta_bookmark = self.config("metadata-bookmark")
metadata = self.metadata()
works = self.works()
@@ -60,18 +62,26 @@ class PixivExtractor(Extractor):
if not work["user"]["id"]:
continue
- meta_single_page = work["meta_single_page"]
- meta_pages = work["meta_pages"]
- del work["meta_single_page"]
- del work["image_urls"]
- del work["meta_pages"]
+ files = self._extract_files(work)
- if meta_user:
+ if self.meta_user:
work.update(self.api.user_detail(work["user"]["id"]))
- if meta_bookmark and work["is_bookmarked"]:
+ if self.meta_comments:
+ if work["total_comments"]:
+ work["comments"] = list(
+ self.api.illust_comments(work["id"]))
+ else:
+ work["comments"] = ()
+ if self.meta_bookmark and work["is_bookmarked"]:
detail = self.api.illust_bookmark_detail(work["id"])
work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
if tag["is_registered"]]
+ if self.sanity_workaround and not work.get("caption") and \
+ not work.get("_mypixiv"):
+ body = self._request_ajax("/illust/" + str(work["id"]))
+ if body:
+ work["caption"] = text.unescape(body["illustComment"])
+
if transform_tags:
transform_tags(work)
work["num"] = 0
@@ -81,69 +91,177 @@ class PixivExtractor(Extractor):
work.update(metadata)
yield Message.Directory, work
+ for work["num"], file in enumerate(files):
+ url = file["url"]
+ work.update(file)
+ work["date_url"] = self._date_from_url(url)
+ yield Message.Url, url, text.nameext_from_url(url, work)
- if work["type"] == "ugoira":
- if not self.load_ugoira:
- continue
+ def _extract_files(self, work):
+ meta_single_page = work["meta_single_page"]
+ meta_pages = work["meta_pages"]
+ del work["meta_single_page"]
+ del work["image_urls"]
+ del work["meta_pages"]
+ if work["type"] == "ugoira":
+ if self.load_ugoira:
try:
- ugoira = self.api.ugoira_metadata(work["id"])
+ return self._extract_ugoira(work)
except exception.StopExtraction as exc:
self.log.warning(
"Unable to retrieve Ugoira metatdata (%s - %s)",
- work.get("id"), exc.message)
- continue
-
- url = ugoira["zip_urls"]["medium"]
- work["frames"] = frames = ugoira["frames"]
- work["date_url"] = self._date_from_url(url)
- work["_http_adjust_extension"] = False
-
- if self.load_ugoira == "original":
- base, sep, _ = url.rpartition("_ugoira")
- base = base.replace(
- "/img-zip-ugoira/", "/img-original/", 1) + sep
-
- for ext in ("jpg", "png", "gif"):
- try:
- url = ("{}0.{}".format(base, ext))
- self.request(url, method="HEAD")
- break
- except exception.HttpError:
- pass
- else:
- self.log.warning(
- "Unable to find Ugoira frame URLs (%s)",
- work.get("id"))
- continue
-
- for num, frame in enumerate(frames):
- url = ("{}{}.{}".format(base, num, ext))
- work["num"] = work["_ugoira_frame_index"] = num
- work["suffix"] = "_p{:02}".format(num)
- text.nameext_from_url(url, work)
- yield Message.Url, url, work
-
+ work["id"], exc.message)
+
+ elif work["page_count"] == 1:
+ url = meta_single_page["original_image_url"]
+ if url == self.sanity_url:
+ if self.sanity_workaround:
+ self.log.warning("%s: 'sanity_level' warning", work["id"])
+ body = self._request_ajax("/illust/" + str(work["id"]))
+ return self._extract_ajax(work, body)
else:
- url = url.replace("_ugoira600x600", "_ugoira1920x1080")
- yield Message.Url, url, text.nameext_from_url(url, work)
-
- elif work["page_count"] == 1:
- url = meta_single_page["original_image_url"]
- if url == url_sanity:
self.log.warning(
- "Unable to download work %s ('sanity_level' warning)",
+ "%s: Unable to download work ('sanity_level' warning)",
work["id"])
- continue
- work["date_url"] = self._date_from_url(url)
- yield Message.Url, url, text.nameext_from_url(url, work)
+ elif url == self.mypixiv_url:
+ work["_mypixiv"] = True
+ self.log.warning("%s: 'My pixiv' locked", work["id"])
+ return ()
+ else:
+ return ({"url": url},)
+ else:
+ return [
+ {
+ "url" : img["image_urls"]["original"],
+ "suffix": "_p{:02}".format(num),
+ }
+ for num, img in enumerate(meta_pages)
+ ]
+
+ return ()
+
+ def _extract_ugoira(self, work):
+ ugoira = self.api.ugoira_metadata(work["id"])
+ url = ugoira["zip_urls"]["medium"]
+ work["_ugoira_frame_data"] = work["frames"] = frames = ugoira["frames"]
+ work["date_url"] = self._date_from_url(url)
+ work["_http_adjust_extension"] = False
+
+ if self.load_ugoira == "original":
+ work["_ugoira_original"] = True
+ base, sep, _ = url.rpartition("_ugoira")
+ base = base.replace("/img-zip-ugoira/", "/img-original/", 1) + sep
+
+ for ext in ("jpg", "png", "gif"):
+ try:
+ url = "{}0.{}".format(base, ext)
+ self.request(url, method="HEAD")
+ break
+ except exception.HttpError:
+ pass
else:
- for work["num"], img in enumerate(meta_pages):
- url = img["image_urls"]["original"]
- work["date_url"] = self._date_from_url(url)
- work["suffix"] = "_p{:02}".format(work["num"])
- yield Message.Url, url, text.nameext_from_url(url, work)
+ self.log.warning(
+ "Unable to find Ugoira frame URLs (%s)", work["id"])
+
+ return [
+ {
+ "url": "{}{}.{}".format(base, num, ext),
+ "suffix": "_p{:02}".format(num),
+ "_ugoira_frame_index": num,
+ }
+ for num in range(len(frames))
+ ]
+ else:
+ work["_ugoira_original"] = False
+ url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
+ return ({"url": url},)
+
+ def _request_ajax(self, endpoint):
+ url = "{}/ajax{}".format(self.root, endpoint)
+ try:
+ return self.request(url, headers=self.headers_web).json()["body"]
+ except Exception:
+ return None
+
+ def _extract_ajax(self, work, body):
+ url = self._extract_ajax_url(body)
+ if not url:
+ return ()
+
+ for key_app, key_ajax in (
+ ("title" , "illustTitle"),
+ ("image_urls" , "urls"),
+ ("create_date" , "createDate"),
+ ("width" , "width"),
+ ("height" , "height"),
+ ("sanity_level" , "sl"),
+ ("total_view" , "viewCount"),
+ ("total_comments" , "commentCount"),
+ ("total_bookmarks" , "bookmarkCount"),
+ ("restrict" , "restrict"),
+ ("x_restrict" , "xRestrict"),
+ ("illust_ai_type" , "aiType"),
+ ("illust_book_style", "bookStyle"),
+ ):
+ work[key_app] = body[key_ajax]
+
+ work["user"] = {
+ "account" : body["userAccount"],
+ "id" : int(body["userId"]),
+ "is_followed": False,
+ "name" : body["userName"],
+ "profile_image_urls": {},
+ }
+
+ work["tags"] = tags = []
+ for tag in body["tags"]["tags"]:
+ name = tag["tag"]
+ try:
+ translated_name = tag["translation"]["en"]
+ except Exception:
+ translated_name = None
+ tags.append({"name": name, "translated_name": translated_name})
+
+ work["caption"] = text.unescape(body["illustComment"])
+ work["page_count"] = count = body["pageCount"]
+ if count == 1:
+ return ({"url": url},)
+
+ base, _, ext = url.rpartition("_p0.")
+ return [
+ {
+ "url" : "{}_p{}.{}".format(base, num, ext),
+ "suffix": "_p{:02}".format(num),
+ }
+ for num in range(count)
+ ]
+
+ def _extract_ajax_url(self, body):
+ try:
+ original = body["urls"]["original"]
+ if original:
+ return original
+ except KeyError:
+ pass
+
+ try:
+ square1200 = body["userIllusts"][body["id"]]["url"]
+ except KeyError:
+ return
+ parts = square1200.rpartition("_p0")[0].split("/")
+ del parts[3:5]
+ parts[3] = "img-original"
+ base = "/".join(parts)
+
+ for ext in ("jpg", "png", "gif"):
+ try:
+ url = "{}_p0.{}".format(base, ext)
+ self.request(url, method="HEAD")
+ return url
+ except exception.HttpError:
+ pass
@staticmethod
def _date_from_url(url, offset=timedelta(hours=9)):
@@ -175,6 +293,9 @@ class PixivExtractor(Extractor):
"x_restrict" : 0,
}
+ def _web_to_mobile(self, work):
+ return work
+
def works(self):
"""Return an iterable containing all relevant 'work' objects"""
@@ -255,12 +376,12 @@ class PixivAvatarExtractor(PixivExtractor):
pattern = USER_PATTERN + r"/avatar"
example = "https://www.pixiv.net/en/users/12345/avatar"
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- self.user_id = match.group(1)
+ def _init(self):
+ PixivExtractor._init(self)
+ self.sanity_workaround = self.meta_comments = False
def works(self):
- user = self.api.user_detail(self.user_id)["user"]
+ user = self.api.user_detail(self.groups[0])["user"]
url = user["profile_image_urls"]["medium"].replace("_170.", ".")
return (self._make_work("avatar", url, user),)
@@ -273,12 +394,12 @@ class PixivBackgroundExtractor(PixivExtractor):
pattern = USER_PATTERN + "/background"
example = "https://www.pixiv.net/en/users/12345/background"
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- self.user_id = match.group(1)
+ def _init(self):
+ PixivExtractor._init(self)
+ self.sanity_workaround = self.meta_comments = False
def works(self):
- detail = self.api.user_detail(self.user_id)
+ detail = self.api.user_detail(self.groups[0])
url = detail["profile"]["background_image_url"]
if not url:
return ()
@@ -335,6 +456,22 @@ class PixivWorkExtractor(PixivExtractor):
return works
+class PixivUnlistedExtractor(PixivExtractor):
+ """Extractor for a unlisted pixiv illustrations"""
+ subcategory = "unlisted"
+ pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)"
+ example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0"
+
+ def _extract_files(self, work):
+ body = self._request_ajax("/illust/unlisted/" + work["id"])
+ work["id_unlisted"] = work["id"]
+ work["id"] = text.parse_int(body["illustId"])
+ return self._extract_ajax(work, body)
+
+ def works(self):
+ return ({"id": self.groups[0], "user": {"id": 1}},)
+
+
class PixivFavoriteExtractor(PixivExtractor):
"""Extractor for all favorites/bookmarks of a pixiv user"""
subcategory = "favorite"
@@ -626,8 +763,6 @@ class PixivNovelExtractor(PixivExtractor):
work["tags"] = [tag["name"] for tag in work["tags"]]
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
- meta_user = self.config("metadata")
- meta_bookmark = self.config("metadata-bookmark")
embeds = self.config("embeds")
covers = self.config("covers")
@@ -645,9 +780,15 @@ class PixivNovelExtractor(PixivExtractor):
if self.max_posts:
novels = itertools.islice(novels, self.max_posts)
for novel in novels:
- if meta_user:
+ if self.meta_user:
novel.update(self.api.user_detail(novel["user"]["id"]))
- if meta_bookmark and novel["is_bookmarked"]:
+ if self.meta_comments:
+ if novel["total_comments"]:
+ novel["comments"] = list(
+ self.api.novel_comments(novel["id"]))
+ else:
+ novel["comments"] = ()
+ if self.meta_bookmark and novel["is_bookmarked"]:
detail = self.api.novel_bookmark_detail(novel["id"])
novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
if tag["is_registered"]]
@@ -848,6 +989,7 @@ class PixivAppAPI():
self.username = extractor._get_auth_info()[0]
self.user = None
+ extractor.headers_web = extractor.session.headers.copy()
extractor.session.headers.update({
"App-OS" : "ios",
"App-OS-Version": "16.7.2",
@@ -913,6 +1055,10 @@ class PixivAppAPI():
return self._call(
"/v2/illust/bookmark/detail", params)["bookmark_detail"]
+ def illust_comments(self, illust_id):
+ params = {"illust_id": illust_id}
+ return self._pagination("/v3/illust/comments", params, "comments")
+
def illust_follow(self, restrict="all"):
params = {"restrict": restrict}
return self._pagination("/v2/illust/follow", params)
@@ -935,6 +1081,10 @@ class PixivAppAPI():
return self._call(
"/v2/novel/bookmark/detail", params)["bookmark_detail"]
+ def novel_comments(self, novel_id):
+ params = {"novel_id": novel_id}
+ return self._pagination("/v1/novel/comments", params, "comments")
+
def novel_detail(self, novel_id):
params = {"novel_id": novel_id}
return self._call("/v2/novel/detail", params)["novel"]