diff options
| author | 2024-12-02 00:31:59 -0500 | |
|---|---|---|
| committer | 2024-12-02 00:31:59 -0500 | |
| commit | 1981ccaaea6eab2cf32536ec5afe132a870914d8 (patch) | |
| tree | 013f1e17d922d3a6abf7f57aa6a175c2ce5d93bc /gallery_dl/extractor/pixiv.py | |
| parent | fc004701f923bb954a22c7fec2ae8d607e78cb2b (diff) | |
New upstream version 1.28.0.upstream/1.28.0
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 123 |
1 files changed, 66 insertions, 57 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8c6e6d8..8ad061d 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -26,13 +26,14 @@ class PixivExtractor(Extractor): directory_fmt = ("{category}", "{user[id]} {user[account]}") filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" - cookies_domain = None + cookies_domain = ".pixiv.net" sanity_url = "https://s.pximg.net/common/images/limit_sanity_level_360.png" mypixiv_url = "https://s.pximg.net/common/images/limit_mypixiv_360.png" def _init(self): self.api = PixivAppAPI(self) self.load_ugoira = self.config("ugoira", True) + self.load_ugoira_original = (self.load_ugoira == "original") self.max_posts = self.config("max-posts", 0) self.sanity_workaround = self.config("sanity", True) self.meta_user = self.config("metadata") @@ -105,34 +106,7 @@ class PixivExtractor(Extractor): del work["image_urls"] del work["meta_pages"] - if work["type"] == "ugoira": - if self.load_ugoira: - try: - return self._extract_ugoira(work) - except Exception as exc: - self.log.warning( - "%s: Unable to retrieve Ugoira metatdata (%s - %s)", - work["id"], exc.__class__.__name__, exc) - - elif work["page_count"] == 1: - url = meta_single_page["original_image_url"] - if url == self.sanity_url: - if self.sanity_workaround: - self.log.warning("%s: 'sanity_level' warning", work["id"]) - body = self._request_ajax("/illust/" + str(work["id"])) - return self._extract_ajax(work, body) - else: - self.log.warning( - "%s: Unable to download work ('sanity_level' warning)", - work["id"]) - elif url == self.mypixiv_url: - work["_mypixiv"] = True - self.log.warning("%s: 'My pixiv' locked", work["id"]) - return () - else: - return ({"url": url},) - - else: + if meta_pages: return [ { "url" : img["image_urls"]["original"], @@ -141,30 +115,58 @@ class PixivExtractor(Extractor): for num, img in enumerate(meta_pages) ] + url = meta_single_page["original_image_url"] + if url == self.sanity_url: + work["_ajax"] = True + self.log.warning("%s: 'limit_sanity_level' warning", work["id"]) + if self.sanity_workaround: + body = self._request_ajax("/illust/" + str(work["id"])) + return self._extract_ajax(work, body) + + elif url == self.mypixiv_url: + work["_mypixiv"] = True + self.log.warning("%s: 'My pixiv' locked", work["id"]) + + elif work["type"] != "ugoira": + return ({"url": url},) + + elif self.load_ugoira: + try: + return self._extract_ugoira(work, url) + except Exception as exc: + self.log.warning( + "%s: Unable to retrieve Ugoira metatdata (%s - %s)", + work["id"], exc.__class__.__name__, exc) + return () - def _extract_ugoira(self, work): + def _extract_ugoira(self, work, img_url): ugoira = self.api.ugoira_metadata(work["id"]) - url = ugoira["zip_urls"]["medium"] work["_ugoira_frame_data"] = work["frames"] = frames = ugoira["frames"] - work["date_url"] = self._date_from_url(url) + work["_ugoira_original"] = self.load_ugoira_original work["_http_adjust_extension"] = False - if self.load_ugoira == "original": - work["_ugoira_original"] = True - base, sep, _ = url.rpartition("_ugoira") - base = base.replace("/img-zip-ugoira/", "/img-original/", 1) + sep + if self.load_ugoira_original: + work["date_url"] = self._date_from_url(img_url) - for ext in ("jpg", "png", "gif"): - try: - url = "{}0.{}".format(base, ext) - self.request(url, method="HEAD") - break - except exception.HttpError: - pass + base, sep, ext = img_url.rpartition("_ugoira0.") + if sep: + base += "_ugoira" else: - self.log.warning( - "Unable to find Ugoira frame URLs (%s)", work["id"]) + base, sep, _ = img_url.rpartition("_ugoira") + base = base.replace( + "/img-zip-ugoira/", "/img-original/", 1) + sep + + for ext in ("jpg", "png", "gif"): + try: + url = "{}0.{}".format(base, ext) + self.request(url, method="HEAD") + break + except exception.HttpError: + pass + else: + self.log.warning( + "Unable to find Ugoira frame URLs (%s)", work["id"]) return [ { @@ -174,9 +176,11 @@ class PixivExtractor(Extractor): } for num in range(len(frames)) ] + else: - work["_ugoira_original"] = False - url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1) + zip_url = ugoira["zip_urls"]["medium"] + work["date_url"] = self._date_from_url(zip_url) + url = zip_url.replace("_ugoira600x600", "_ugoira1920x1080", 1) return ({"url": url},) def _request_ajax(self, endpoint): @@ -333,12 +337,12 @@ class PixivUserExtractor(PixivExtractor): class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" - _warning = True pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") example = "https://www.pixiv.net/en/users/12345/artworks" + _warn_phpsessid = True def _init(self): PixivExtractor._init(self) @@ -352,12 +356,13 @@ class PixivArtworksExtractor(PixivExtractor): self.tag = t1 or t2 if self.sanity_workaround: - self.cookies_domain = d = ".pixiv.net" + self.cookies_domain = domain = ".pixiv.net" self._init_cookies() - if self._warning and not self.cookies.get("PHPSESSID", domain=d): - PixivArtworksExtractor._warning = False - self.log.warning("No 'PHPSESSID' cookie set. Can detect only " - "non R-18 'sanity_level' works.") + if self._warn_phpsessid: + PixivArtworksExtractor._warn_phpsessid = False + if not self.cookies.get("PHPSESSID", domain=domain): + self.log.warning("No 'PHPSESSID' cookie set. Can detect on" + "ly non R-18 'limit_sanity_level' works.") def metadata(self): if self.config("metadata"): @@ -601,7 +606,10 @@ class PixivRankingExtractor(PixivExtractor): self.mode = self.date = None def works(self): - return self.api.illust_ranking(self.mode, self.date) + ranking = self.ranking + for ranking["rank"], work in enumerate( + self.api.illust_ranking(self.mode, self.date), 1): + yield work def metadata(self): query = text.parse_query(self.query) @@ -640,10 +648,12 @@ class PixivRankingExtractor(PixivExtractor): date = (now - timedelta(days=1)).strftime("%Y-%m-%d") self.date = date - return {"ranking": { + self.ranking = ranking = { "mode": mode, "date": self.date, - }} + "rank": 0, + } + return {"ranking": ranking} class PixivSearchExtractor(PixivExtractor): @@ -734,7 +744,6 @@ class PixivPixivisionExtractor(PixivExtractor): directory_fmt = ("{category}", "pixivision", "{pixivision_id} {pixivision_title}") archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}" - cookies_domain = ".pixiv.net" pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)" example = "https://www.pixivision.net/en/a/12345" |
