aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pixiv.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-12-02 00:31:59 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2024-12-02 00:31:59 -0500
commit1981ccaaea6eab2cf32536ec5afe132a870914d8 (patch)
tree013f1e17d922d3a6abf7f57aa6a175c2ce5d93bc /gallery_dl/extractor/pixiv.py
parentfc004701f923bb954a22c7fec2ae8d607e78cb2b (diff)
New upstream version 1.28.0.upstream/1.28.0
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
-rw-r--r--gallery_dl/extractor/pixiv.py123
1 files changed, 66 insertions, 57 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8c6e6d8..8ad061d 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -26,13 +26,14 @@ class PixivExtractor(Extractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
- cookies_domain = None
+ cookies_domain = ".pixiv.net"
sanity_url = "https://s.pximg.net/common/images/limit_sanity_level_360.png"
mypixiv_url = "https://s.pximg.net/common/images/limit_mypixiv_360.png"
def _init(self):
self.api = PixivAppAPI(self)
self.load_ugoira = self.config("ugoira", True)
+ self.load_ugoira_original = (self.load_ugoira == "original")
self.max_posts = self.config("max-posts", 0)
self.sanity_workaround = self.config("sanity", True)
self.meta_user = self.config("metadata")
@@ -105,34 +106,7 @@ class PixivExtractor(Extractor):
del work["image_urls"]
del work["meta_pages"]
- if work["type"] == "ugoira":
- if self.load_ugoira:
- try:
- return self._extract_ugoira(work)
- except Exception as exc:
- self.log.warning(
- "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
- work["id"], exc.__class__.__name__, exc)
-
- elif work["page_count"] == 1:
- url = meta_single_page["original_image_url"]
- if url == self.sanity_url:
- if self.sanity_workaround:
- self.log.warning("%s: 'sanity_level' warning", work["id"])
- body = self._request_ajax("/illust/" + str(work["id"]))
- return self._extract_ajax(work, body)
- else:
- self.log.warning(
- "%s: Unable to download work ('sanity_level' warning)",
- work["id"])
- elif url == self.mypixiv_url:
- work["_mypixiv"] = True
- self.log.warning("%s: 'My pixiv' locked", work["id"])
- return ()
- else:
- return ({"url": url},)
-
- else:
+ if meta_pages:
return [
{
"url" : img["image_urls"]["original"],
@@ -141,30 +115,58 @@ class PixivExtractor(Extractor):
for num, img in enumerate(meta_pages)
]
+ url = meta_single_page["original_image_url"]
+ if url == self.sanity_url:
+ work["_ajax"] = True
+ self.log.warning("%s: 'limit_sanity_level' warning", work["id"])
+ if self.sanity_workaround:
+ body = self._request_ajax("/illust/" + str(work["id"]))
+ return self._extract_ajax(work, body)
+
+ elif url == self.mypixiv_url:
+ work["_mypixiv"] = True
+ self.log.warning("%s: 'My pixiv' locked", work["id"])
+
+ elif work["type"] != "ugoira":
+ return ({"url": url},)
+
+ elif self.load_ugoira:
+ try:
+ return self._extract_ugoira(work, url)
+ except Exception as exc:
+ self.log.warning(
+ "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
+ work["id"], exc.__class__.__name__, exc)
+
return ()
- def _extract_ugoira(self, work):
+ def _extract_ugoira(self, work, img_url):
ugoira = self.api.ugoira_metadata(work["id"])
- url = ugoira["zip_urls"]["medium"]
work["_ugoira_frame_data"] = work["frames"] = frames = ugoira["frames"]
- work["date_url"] = self._date_from_url(url)
+ work["_ugoira_original"] = self.load_ugoira_original
work["_http_adjust_extension"] = False
- if self.load_ugoira == "original":
- work["_ugoira_original"] = True
- base, sep, _ = url.rpartition("_ugoira")
- base = base.replace("/img-zip-ugoira/", "/img-original/", 1) + sep
+ if self.load_ugoira_original:
+ work["date_url"] = self._date_from_url(img_url)
- for ext in ("jpg", "png", "gif"):
- try:
- url = "{}0.{}".format(base, ext)
- self.request(url, method="HEAD")
- break
- except exception.HttpError:
- pass
+ base, sep, ext = img_url.rpartition("_ugoira0.")
+ if sep:
+ base += "_ugoira"
else:
- self.log.warning(
- "Unable to find Ugoira frame URLs (%s)", work["id"])
+ base, sep, _ = img_url.rpartition("_ugoira")
+ base = base.replace(
+ "/img-zip-ugoira/", "/img-original/", 1) + sep
+
+ for ext in ("jpg", "png", "gif"):
+ try:
+ url = "{}0.{}".format(base, ext)
+ self.request(url, method="HEAD")
+ break
+ except exception.HttpError:
+ pass
+ else:
+ self.log.warning(
+ "Unable to find Ugoira frame URLs (%s)", work["id"])
return [
{
@@ -174,9 +176,11 @@ class PixivExtractor(Extractor):
}
for num in range(len(frames))
]
+
else:
- work["_ugoira_original"] = False
- url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
+ zip_url = ugoira["zip_urls"]["medium"]
+ work["date_url"] = self._date_from_url(zip_url)
+ url = zip_url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
return ({"url": url},)
def _request_ajax(self, endpoint):
@@ -333,12 +337,12 @@ class PixivUserExtractor(PixivExtractor):
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
- _warning = True
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
example = "https://www.pixiv.net/en/users/12345/artworks"
+ _warn_phpsessid = True
def _init(self):
PixivExtractor._init(self)
@@ -352,12 +356,13 @@ class PixivArtworksExtractor(PixivExtractor):
self.tag = t1 or t2
if self.sanity_workaround:
- self.cookies_domain = d = ".pixiv.net"
+ self.cookies_domain = domain = ".pixiv.net"
self._init_cookies()
- if self._warning and not self.cookies.get("PHPSESSID", domain=d):
- PixivArtworksExtractor._warning = False
- self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
- "non R-18 'sanity_level' works.")
+ if self._warn_phpsessid:
+ PixivArtworksExtractor._warn_phpsessid = False
+ if not self.cookies.get("PHPSESSID", domain=domain):
+ self.log.warning("No 'PHPSESSID' cookie set. Can detect on"
+ "ly non R-18 'limit_sanity_level' works.")
def metadata(self):
if self.config("metadata"):
@@ -601,7 +606,10 @@ class PixivRankingExtractor(PixivExtractor):
self.mode = self.date = None
def works(self):
- return self.api.illust_ranking(self.mode, self.date)
+ ranking = self.ranking
+ for ranking["rank"], work in enumerate(
+ self.api.illust_ranking(self.mode, self.date), 1):
+ yield work
def metadata(self):
query = text.parse_query(self.query)
@@ -640,10 +648,12 @@ class PixivRankingExtractor(PixivExtractor):
date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
- return {"ranking": {
+ self.ranking = ranking = {
"mode": mode,
"date": self.date,
- }}
+ "rank": 0,
+ }
+ return {"ranking": ranking}
class PixivSearchExtractor(PixivExtractor):
@@ -734,7 +744,6 @@ class PixivPixivisionExtractor(PixivExtractor):
directory_fmt = ("{category}", "pixivision",
"{pixivision_id} {pixivision_title}")
archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
- cookies_domain = ".pixiv.net"
pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
example = "https://www.pixivision.net/en/a/12345"