aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pixiv.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
-rw-r--r--gallery_dl/extractor/pixiv.py77
1 files changed, 64 insertions, 13 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index c2d1243..8c6e6d8 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -38,6 +38,7 @@ class PixivExtractor(Extractor):
self.meta_user = self.config("metadata")
self.meta_bookmark = self.config("metadata-bookmark")
self.meta_comments = self.config("comments")
+ self.meta_captions = self.config("captions")
def items(self):
tags = self.config("tags", "japanese")
@@ -76,8 +77,8 @@ class PixivExtractor(Extractor):
detail = self.api.illust_bookmark_detail(work["id"])
work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
if tag["is_registered"]]
- if self.sanity_workaround and not work.get("caption") and \
- not work.get("_mypixiv"):
+ if self.meta_captions and not work.get("caption") and \
+ not work.get("_mypixiv") and not work.get("_ajax"):
body = self._request_ajax("/illust/" + str(work["id"]))
if body:
work["caption"] = text.unescape(body["illustComment"])
@@ -108,10 +109,10 @@ class PixivExtractor(Extractor):
if self.load_ugoira:
try:
return self._extract_ugoira(work)
- except exception.StopExtraction as exc:
+ except Exception as exc:
self.log.warning(
- "Unable to retrieve Ugoira metatdata (%s - %s)",
- work["id"], exc.message)
+ "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
+ work["id"], exc.__class__.__name__, exc)
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
@@ -186,6 +187,7 @@ class PixivExtractor(Extractor):
return None
def _extract_ajax(self, work, body):
+ work["_ajax"] = True
url = self._extract_ajax_url(body)
if not url:
return ()
@@ -243,12 +245,12 @@ class PixivExtractor(Extractor):
original = body["urls"]["original"]
if original:
return original
- except KeyError:
+ except Exception:
pass
try:
square1200 = body["userIllusts"][body["id"]]["url"]
- except KeyError:
+ except Exception:
return
parts = square1200.rpartition("_p0")[0].split("/")
del parts[3:5]
@@ -293,9 +295,6 @@ class PixivExtractor(Extractor):
"x_restrict" : 0,
}
- def _web_to_mobile(self, work):
- return work
-
def works(self):
"""Return an iterable containing all relevant 'work' objects"""
@@ -334,15 +333,17 @@ class PixivUserExtractor(PixivExtractor):
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
+ _warning = True
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
example = "https://www.pixiv.net/en/users/12345/artworks"
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- u1, t1, u2, t2 = match.groups()
+ def _init(self):
+ PixivExtractor._init(self)
+
+ u1, t1, u2, t2 = self.groups
if t1:
t1 = text.unquote(t1)
elif t2:
@@ -350,6 +351,14 @@ class PixivArtworksExtractor(PixivExtractor):
self.user_id = u1 or u2
self.tag = t1 or t2
+ if self.sanity_workaround:
+ self.cookies_domain = d = ".pixiv.net"
+ self._init_cookies()
+ if self._warning and not self.cookies.get("PHPSESSID", domain=d):
+ PixivArtworksExtractor._warning = False
+ self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
+ "non R-18 'sanity_level' works.")
+
def metadata(self):
if self.config("metadata"):
self.api.user_detail(self.user_id)
@@ -358,6 +367,19 @@ class PixivArtworksExtractor(PixivExtractor):
def works(self):
works = self.api.user_illusts(self.user_id)
+ if self.sanity_workaround:
+ body = self._request_ajax(
+ "/user/{}/profile/all".format(self.user_id))
+ try:
+ ajax_ids = list(map(int, body["illusts"]))
+ ajax_ids.extend(map(int, body["manga"]))
+ ajax_ids.sort()
+ except Exception as exc:
+ self.log.warning("Unable to collect artwork IDs using AJAX "
+ "API (%s: %s)", exc.__class__.__name__, exc)
+ else:
+ works = self._extend_sanity(works, ajax_ids)
+
if self.tag:
tag = self.tag.lower()
works = (
@@ -367,6 +389,35 @@ class PixivArtworksExtractor(PixivExtractor):
return works
+ def _extend_sanity(self, works, ajax_ids):
+ user = {"id": 1}
+ index = len(ajax_ids) - 1
+
+ for work in works:
+ while index >= 0:
+ work_id = work["id"]
+ ajax_id = ajax_ids[index]
+
+ if ajax_id == work_id:
+ index -= 1
+ break
+
+ elif ajax_id > work_id:
+ index -= 1
+ self.log.debug("Inserting work %s", ajax_id)
+ yield self._make_work(ajax_id, self.sanity_url, user)
+
+ else: # ajax_id < work_id
+ break
+
+ yield work
+
+ while index >= 0:
+ ajax_id = ajax_ids[index]
+ self.log.debug("Inserting work %s", ajax_id)
+ yield self._make_work(ajax_id, self.sanity_url, user)
+ index -= 1
+
class PixivAvatarExtractor(PixivExtractor):
"""Extractor for pixiv avatars"""