diff options
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 77 |
1 files changed, 64 insertions, 13 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index c2d1243..8c6e6d8 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -38,6 +38,7 @@ class PixivExtractor(Extractor): self.meta_user = self.config("metadata") self.meta_bookmark = self.config("metadata-bookmark") self.meta_comments = self.config("comments") + self.meta_captions = self.config("captions") def items(self): tags = self.config("tags", "japanese") @@ -76,8 +77,8 @@ class PixivExtractor(Extractor): detail = self.api.illust_bookmark_detail(work["id"]) work["tags_bookmark"] = [tag["name"] for tag in detail["tags"] if tag["is_registered"]] - if self.sanity_workaround and not work.get("caption") and \ - not work.get("_mypixiv"): + if self.meta_captions and not work.get("caption") and \ + not work.get("_mypixiv") and not work.get("_ajax"): body = self._request_ajax("/illust/" + str(work["id"])) if body: work["caption"] = text.unescape(body["illustComment"]) @@ -108,10 +109,10 @@ class PixivExtractor(Extractor): if self.load_ugoira: try: return self._extract_ugoira(work) - except exception.StopExtraction as exc: + except Exception as exc: self.log.warning( - "Unable to retrieve Ugoira metatdata (%s - %s)", - work["id"], exc.message) + "%s: Unable to retrieve Ugoira metatdata (%s - %s)", + work["id"], exc.__class__.__name__, exc) elif work["page_count"] == 1: url = meta_single_page["original_image_url"] @@ -186,6 +187,7 @@ class PixivExtractor(Extractor): return None def _extract_ajax(self, work, body): + work["_ajax"] = True url = self._extract_ajax_url(body) if not url: return () @@ -243,12 +245,12 @@ class PixivExtractor(Extractor): original = body["urls"]["original"] if original: return original - except KeyError: + except Exception: pass try: square1200 = body["userIllusts"][body["id"]]["url"] - except KeyError: + except Exception: return parts = square1200.rpartition("_p0")[0].split("/") del parts[3:5] @@ -293,9 +295,6 @@ class PixivExtractor(Extractor): "x_restrict" : 0, } - def _web_to_mobile(self, work): - return work - def works(self): """Return an iterable containing all relevant 'work' objects""" @@ -334,15 +333,17 @@ class PixivUserExtractor(PixivExtractor): class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" + _warning = True pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") example = "https://www.pixiv.net/en/users/12345/artworks" - def __init__(self, match): - PixivExtractor.__init__(self, match) - u1, t1, u2, t2 = match.groups() + def _init(self): + PixivExtractor._init(self) + + u1, t1, u2, t2 = self.groups if t1: t1 = text.unquote(t1) elif t2: @@ -350,6 +351,14 @@ class PixivArtworksExtractor(PixivExtractor): self.user_id = u1 or u2 self.tag = t1 or t2 + if self.sanity_workaround: + self.cookies_domain = d = ".pixiv.net" + self._init_cookies() + if self._warning and not self.cookies.get("PHPSESSID", domain=d): + PixivArtworksExtractor._warning = False + self.log.warning("No 'PHPSESSID' cookie set. Can detect only " + "non R-18 'sanity_level' works.") + def metadata(self): if self.config("metadata"): self.api.user_detail(self.user_id) @@ -358,6 +367,19 @@ class PixivArtworksExtractor(PixivExtractor): def works(self): works = self.api.user_illusts(self.user_id) + if self.sanity_workaround: + body = self._request_ajax( + "/user/{}/profile/all".format(self.user_id)) + try: + ajax_ids = list(map(int, body["illusts"])) + ajax_ids.extend(map(int, body["manga"])) + ajax_ids.sort() + except Exception as exc: + self.log.warning("Unable to collect artwork IDs using AJAX " + "API (%s: %s)", exc.__class__.__name__, exc) + else: + works = self._extend_sanity(works, ajax_ids) + if self.tag: tag = self.tag.lower() works = ( @@ -367,6 +389,35 @@ class PixivArtworksExtractor(PixivExtractor): return works + def _extend_sanity(self, works, ajax_ids): + user = {"id": 1} + index = len(ajax_ids) - 1 + + for work in works: + while index >= 0: + work_id = work["id"] + ajax_id = ajax_ids[index] + + if ajax_id == work_id: + index -= 1 + break + + elif ajax_id > work_id: + index -= 1 + self.log.debug("Inserting work %s", ajax_id) + yield self._make_work(ajax_id, self.sanity_url, user) + + else: # ajax_id < work_id + break + + yield work + + while index >= 0: + ajax_id = ajax_ids[index] + self.log.debug("Inserting work %s", ajax_id) + yield self._make_work(ajax_id, self.sanity_url, user) + index -= 1 + class PixivAvatarExtractor(PixivExtractor): """Extractor for pixiv avatars""" |
