summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pixiv.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
-rw-r--r--gallery_dl/extractor/pixiv.py246
1 files changed, 147 insertions, 99 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a33df42..9b35e42 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,16 +10,16 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache
+from ..cache import cache, memcache
from datetime import datetime, timedelta
import itertools
import hashlib
-import time
class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
category = "pixiv"
+ root = "https://www.pixiv.net"
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
@@ -73,7 +73,14 @@ class PixivExtractor(Extractor):
if work["type"] == "ugoira":
if not self.load_ugoira:
continue
- ugoira = self.api.ugoira_metadata(work["id"])
+
+ try:
+ ugoira = self.api.ugoira_metadata(work["id"])
+ except exception.StopExtraction as exc:
+ self.log.warning(
+ "Unable to retrieve Ugoira metatdata (%s - %s)",
+ work.get("id"), exc.message)
+ continue
url = ugoira["zip_urls"]["medium"].replace(
"_ugoira600x600", "_ugoira1920x1080")
@@ -91,22 +98,70 @@ class PixivExtractor(Extractor):
work["suffix"] = "_p{:02}".format(work["num"])
yield Message.Url, url, text.nameext_from_url(url, work)
+ @staticmethod
+ def _make_work(kind, url, user):
+ p = url.split("/")
+ return {
+ "create_date" : "{}-{}-{}T{}:{}:{}+09:00".format(
+ p[5], p[6], p[7], p[8], p[9], p[10]) if len(p) > 9 else None,
+ "height" : 0,
+ "id" : kind,
+ "image_urls" : None,
+ "meta_pages" : (),
+ "meta_single_page": {"original_image_url": url},
+ "page_count" : 1,
+ "sanity_level" : 0,
+ "tags" : (),
+ "title" : kind,
+ "type" : kind,
+ "user" : user,
+ "width" : 0,
+ "x_restrict" : 0,
+ }
+
def works(self):
- """Return an iterable containing all relevant 'work'-objects"""
+ """Return an iterable containing all relevant 'work' objects"""
def metadata(self):
- """Collect metadata for extractor-job"""
+ """Collect metadata for extractor job"""
return {}
class PixivUserExtractor(PixivExtractor):
- """Extractor for works of a pixiv user"""
+ """Extractor for a pixiv user profile"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
- r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
- r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
- r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
- r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
+ r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
+ r")(\d+)(?:$|[?#])")
+ test = (
+ ("https://www.pixiv.net/en/users/173530"),
+ ("https://www.pixiv.net/u/173530"),
+ ("https://www.pixiv.net/member.php?id=173530"),
+ ("https://www.pixiv.net/mypage.php#id=173530"),
+ ("https://www.pixiv.net/#id=173530"),
+ )
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def items(self):
+ base = "{}/users/{}/".format(self.root, self.user_id)
+ return self._dispatch_extractors((
+ (PixivAvatarExtractor , base + "avatar"),
+ (PixivBackgroundExtractor, base + "background"),
+ (PixivArtworksExtractor , base + "artworks"),
+ (PixivFavoriteExtractor , base + "bookmarks/artworks"),
+ ), ("artworks",))
+
+
+class PixivArtworksExtractor(PixivExtractor):
+ """Extractor for artworks of a pixiv user"""
+ subcategory = "artworks"
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
+ r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
+ r"(?:/([^/?#]+))?/?(?:$|[?#])"
+ r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
test = (
("https://www.pixiv.net/en/users/173530/artworks", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
@@ -120,47 +175,30 @@ class PixivUserExtractor(PixivExtractor):
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
- # avatar (#595, #623, #1124)
- ("https://www.pixiv.net/en/users/173530", {
- "options": (("avatar", True),),
- "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
- "range": "1",
- }),
- # background (#623, #1124, #2495)
- ("https://www.pixiv.net/en/users/194921", {
- "options": (("background", True),),
- "content": "aeda3536003ea3002f70657cb93c5053f26f5843",
- "range": "1",
- }),
# deleted account
("http://www.pixiv.net/member_illust.php?id=173531", {
"options": (("metadata", True),),
"exception": exception.NotFoundError,
}),
- ("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/en/users/173530/manga"),
("https://www.pixiv.net/en/users/173530/illustrations"),
("https://www.pixiv.net/member_illust.php?id=173530"),
- ("https://www.pixiv.net/u/173530"),
- ("https://www.pixiv.net/user/173530"),
- ("https://www.pixiv.net/mypage.php#id=173530"),
- ("https://www.pixiv.net/#id=173530"),
("https://touch.pixiv.net/member_illust.php?id=173530"),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
- u1, t1, u2, t2, u3 = match.groups()
+ u1, t1, u2, t2 = match.groups()
if t1:
t1 = text.unquote(t1)
elif t2:
t2 = text.parse_query(t2).get("tag")
- self.user_id = u1 or u2 or u3
+ self.user_id = u1 or u2
self.tag = t1 or t2
def metadata(self):
if self.config("metadata"):
- return {"user": self.api.user_detail(self.user_id)["user"]}
+ return self.api.user_detail(self.user_id)
return {}
def works(self):
@@ -173,54 +211,60 @@ class PixivUserExtractor(PixivExtractor):
if tag in [t["name"].lower() for t in work["tags"]]
)
- avatar = self.config("avatar")
- background = self.config("background")
- if avatar or background:
- work_list = []
- detail = self.api.user_detail(self.user_id)
- user = detail["user"]
-
- if avatar:
- url = user["profile_image_urls"]["medium"]
- work_list.append((self._make_work(
- "avatar", url.replace("_170.", "."), user),))
-
- if background:
- url = detail["profile"]["background_image_url"]
- if url:
- if "/c/" in url:
- parts = url.split("/")
- del parts[3:5]
- url = "/".join(parts)
- url = url.replace("_master1200.", ".")
- work = self._make_work("background", url, user)
- if url.endswith(".jpg"):
- work["_fallback"] = (url[:-4] + ".png",)
- work_list.append((work,))
-
- work_list.append(works)
- works = itertools.chain.from_iterable(work_list)
-
return works
- @staticmethod
- def _make_work(kind, url, user):
- return {
- "create_date" : None,
- "height" : 0,
- "id" : kind,
- "image_urls" : None,
- "meta_pages" : (),
- "meta_single_page": {"original_image_url": url},
- "page_count" : 1,
- "sanity_level" : 0,
- "tags" : (),
- "title" : kind,
- "type" : kind,
- "user" : user,
- "width" : 0,
- "x_restrict" : 0,
- }
+
+class PixivAvatarExtractor(PixivExtractor):
+ """Extractor for pixiv avatars"""
+ subcategory = "avatar"
+ filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
+ archive_fmt = "avatar_{user[id]}_{date}"
+ pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
+ r"/(?:en/)?users/(\d+)/avatar")
+ test = ("https://www.pixiv.net/en/users/173530/avatar", {
+ "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
+ })
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def works(self):
+ user = self.api.user_detail(self.user_id)["user"]
+ url = user["profile_image_urls"]["medium"].replace("_170.", ".")
+ return (self._make_work("avatar", url, user),)
+
+
+class PixivBackgroundExtractor(PixivExtractor):
+ """Extractor for pixiv background banners"""
+ subcategory = "background"
+ filename_fmt = "background{date?_//:%Y-%m-%d}.{extension}"
+ archive_fmt = "background_{user[id]}_{date}"
+ pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
+ r"/(?:en/)?users/(\d+)/background")
+ test = ("https://www.pixiv.net/en/users/194921/background", {
+ "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
+ r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
+ })
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def works(self):
+ detail = self.api.user_detail(self.user_id)
+ url = detail["profile"]["background_image_url"]
+ if not url:
+ return ()
+ if "/c/" in url:
+ parts = url.split("/")
+ del parts[3:5]
+ url = "/".join(parts)
+ url = url.replace("_master1200.", ".")
+ work = self._make_work("background", url, detail["user"])
+ if url.endswith(".jpg"):
+ work["_fallback"] = (url[:-4] + ".png",)
+ return (work,)
class PixivMeExtractor(PixivExtractor):
@@ -312,10 +356,10 @@ class PixivFavoriteExtractor(PixivExtractor):
r"|bookmark\.php)(?:\?([^#]*))?")
test = (
("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
- "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+ "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
("https://www.pixiv.net/bookmark.php?id=173530", {
- "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+ "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
# bookmarks with specific tag
(("https://www.pixiv.net/en/users/3137110"
@@ -735,66 +779,70 @@ class PixivAppAPI():
def illust_detail(self, illust_id):
params = {"illust_id": illust_id}
- return self._call("v1/illust/detail", params)["illust"]
+ return self._call("/v1/illust/detail", params)["illust"]
def illust_follow(self, restrict="all"):
params = {"restrict": restrict}
- return self._pagination("v2/illust/follow", params)
+ return self._pagination("/v2/illust/follow", params)
def illust_ranking(self, mode="day", date=None):
params = {"mode": mode, "date": date}
- return self._pagination("v1/illust/ranking", params)
+ return self._pagination("/v1/illust/ranking", params)
def illust_related(self, illust_id):
params = {"illust_id": illust_id}
- return self._pagination("v2/illust/related", params)
+ return self._pagination("/v2/illust/related", params)
def search_illust(self, word, sort=None, target=None, duration=None,
date_start=None, date_end=None):
params = {"word": word, "search_target": target,
"sort": sort, "duration": duration,
"start_date": date_start, "end_date": date_end}
- return self._pagination("v1/search/illust", params)
+ return self._pagination("/v1/search/illust", params)
def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
- return self._pagination("v1/user/bookmarks/illust", params)
+ return self._pagination("/v1/user/bookmarks/illust", params)
+ @memcache(keyarg=1)
def user_detail(self, user_id):
params = {"user_id": user_id}
- return self._call("v1/user/detail", params)
+ return self._call("/v1/user/detail", params)
def user_following(self, user_id, restrict="public"):
params = {"user_id": user_id, "restrict": restrict}
- return self._pagination("v1/user/following", params, "user_previews")
+ return self._pagination("/v1/user/following", params, "user_previews")
def user_illusts(self, user_id):
params = {"user_id": user_id}
- return self._pagination("v1/user/illusts", params)
+ return self._pagination("/v1/user/illusts", params)
def ugoira_metadata(self, illust_id):
params = {"illust_id": illust_id}
- return self._call("v1/ugoira/metadata", params)["ugoira_metadata"]
+ return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
def _call(self, endpoint, params=None):
- url = "https://app-api.pixiv.net/" + endpoint
+ url = "https://app-api.pixiv.net" + endpoint
+
+ while True:
+ self.login()
+ response = self.extractor.request(url, params=params, fatal=False)
+ data = response.json()
- self.login()
- response = self.extractor.request(url, params=params, fatal=False)
- data = response.json()
+ if "error" not in data:
+ return data
+
+ self.log.debug(data)
- if "error" in data:
if response.status_code == 404:
raise exception.NotFoundError()
error = data["error"]
if "rate limit" in (error.get("message") or "").lower():
- self.log.info("Waiting two minutes for API rate limit reset.")
- time.sleep(120)
- return self._call(endpoint, params)
- raise exception.StopExtraction("API request failed: %s", error)
+ self.extractor.wait(seconds=300)
+ continue
- return data
+ raise exception.StopExtraction("API request failed: %s", error)
def _pagination(self, endpoint, params, key="illusts"):
while True: