aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pixiv.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
-rw-r--r--gallery_dl/extractor/pixiv.py81
1 files changed, 42 insertions, 39 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 6276a2a..eb1a7f2 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -9,14 +9,13 @@
"""Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message, Dispatch
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import cache, memcache
-from datetime import datetime, timedelta
import itertools
import hashlib
BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
-USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
+USER_PATTERN = rf"{BASE_PATTERN}/(?:en/)?users/(\d+)"
class PixivExtractor(Extractor):
@@ -44,7 +43,7 @@ class PixivExtractor(Extractor):
self.meta_captions = self.config("captions")
if self.sanity_workaround or self.meta_captions:
- self.meta_captions_sub = util.re(
+ self.meta_captions_sub = text.re(
r'<a href="/jump\.php\?([^"]+)').sub
def items(self):
@@ -96,12 +95,12 @@ class PixivExtractor(Extractor):
if transform_tags:
transform_tags(work)
work["num"] = 0
- work["date"] = text.parse_datetime(work["create_date"])
+ work["date"] = dt.parse_iso(work["create_date"])
work["rating"] = ratings.get(work["x_restrict"])
work["suffix"] = ""
work.update(metadata)
- yield Message.Directory, work
+ yield Message.Directory, "", work
for work["num"], file in enumerate(files):
url = file["url"]
work.update(file)
@@ -149,7 +148,7 @@ class PixivExtractor(Extractor):
self._extract_ajax(work, body)
return self._extract_ugoira(work, url)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Unable to extract Ugoira URL. Provide "
"logged-in cookies to access it", work["id"])
@@ -238,10 +237,13 @@ class PixivExtractor(Extractor):
return data["body"]
self.log.debug("Server response: %s", util.json_dumps(data))
- return self.log.error(
- "'%s'", data.get("message") or "General Error")
+ if (msg := data.get("message")) == "An unknown error occurred":
+ msg = "Invalid 'PHPSESSID' cookie"
+ else:
+ msg = f"'{msg or 'General Error'}'"
+ self.log.error("%s", msg)
except Exception:
- return None
+ pass
def _extract_ajax(self, work, body):
work["_ajax"] = True
@@ -274,6 +276,9 @@ class PixivExtractor(Extractor):
"profile_image_urls": {},
}
+ if "is_bookmarked" not in work:
+ work["is_bookmarked"] = True if body.get("bookmarkData") else False
+
work["tags"] = tags = []
for tag in body["tags"]["tags"]:
name = tag["tag"]
@@ -350,10 +355,10 @@ class PixivExtractor(Extractor):
if fmt in urls:
yield urls[fmt]
- def _date_from_url(self, url, offset=timedelta(hours=9)):
+ def _date_from_url(self, url, offset=dt.timedelta(hours=9)):
try:
_, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
- return datetime(
+ return dt.datetime(
int(y), int(m), int(d), int(H), int(M), int(S)) - offset
except Exception:
return None
@@ -388,7 +393,7 @@ class PixivExtractor(Extractor):
class PixivUserExtractor(Dispatch, PixivExtractor):
"""Extractor for a pixiv user profile"""
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
example = "https://www.pixiv.net/en/users/12345"
@@ -411,7 +416,7 @@ class PixivUserExtractor(Dispatch, PixivExtractor):
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
@@ -450,7 +455,7 @@ class PixivArtworksExtractor(PixivExtractor):
ajax_ids.extend(map(int, body["manga"]))
ajax_ids.sort()
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning("u%s: Failed to collect artwork IDs "
"using AJAX API", self.user_id)
else:
@@ -500,7 +505,7 @@ class PixivAvatarExtractor(PixivExtractor):
subcategory = "avatar"
filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "avatar_{user[id]}_{date}"
- pattern = USER_PATTERN + r"/avatar"
+ pattern = rf"{USER_PATTERN}/avatar"
example = "https://www.pixiv.net/en/users/12345/avatar"
def _init(self):
@@ -518,7 +523,7 @@ class PixivBackgroundExtractor(PixivExtractor):
subcategory = "background"
filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "background_{user[id]}_{date}"
- pattern = USER_PATTERN + "/background"
+ pattern = rf"{USER_PATTERN}/background"
example = "https://www.pixiv.net/en/users/12345/background"
def _init(self):
@@ -580,7 +585,7 @@ class PixivWorkExtractor(PixivExtractor):
class PixivUnlistedExtractor(PixivExtractor):
"""Extractor for a unlisted pixiv illustrations"""
subcategory = "unlisted"
- pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)"
+ pattern = rf"{BASE_PATTERN}/(?:en/)?artworks/unlisted/(\w+)"
example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0"
def _extract_files(self, work):
@@ -599,7 +604,7 @@ class PixivFavoriteExtractor(PixivExtractor):
directory_fmt = ("{category}", "bookmarks",
"{user_bookmark[id]} {user_bookmark[account]}")
archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
- pattern = (BASE_PATTERN + r"/(?:(?:en/)?"
+ pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?"
r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
r"|bookmark\.php)(?:\?([^#]*))?")
example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks"
@@ -662,7 +667,7 @@ class PixivRankingExtractor(PixivExtractor):
archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "rankings",
"{ranking[mode]}", "{ranking[date]}")
- pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?"
+ pattern = rf"{BASE_PATTERN}/ranking\.php(?:\?([^#]*))?"
example = "https://www.pixiv.net/ranking.php"
def __init__(self, match):
@@ -712,8 +717,7 @@ class PixivRankingExtractor(PixivExtractor):
self.log.warning("invalid date '%s'", date)
date = None
if not date:
- now = util.datetime_utcnow()
- date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
+ date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
self.type = type = query.get("content")
@@ -732,7 +736,7 @@ class PixivSearchExtractor(PixivExtractor):
subcategory = "search"
archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "search", "{search[word]}")
- pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
+ pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
r"|search\.php)(?:\?([^#]+))?")
example = "https://www.pixiv.net/en/tags/TAG"
@@ -798,7 +802,7 @@ class PixivFollowExtractor(PixivExtractor):
subcategory = "follow"
archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "following")
- pattern = BASE_PATTERN + r"/bookmark_new_illust\.php"
+ pattern = rf"{BASE_PATTERN}/bookmark_new_illust\.php"
example = "https://www.pixiv.net/bookmark_new_illust.php"
def works(self):
@@ -847,7 +851,7 @@ class PixivSeriesExtractor(PixivExtractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}",
"{series[id]} {series[title]}")
filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
- pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
+ pattern = rf"{BASE_PATTERN}/user/(\d+)/series/(\d+)"
example = "https://www.pixiv.net/user/12345/series/12345"
def __init__(self, match):
@@ -888,11 +892,10 @@ class PixivSketchExtractor(Extractor):
for post in self.posts():
media = post["media"]
post["post_id"] = post["id"]
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = dt.parse_iso(post["created_at"])
util.delete_items(post, ("id", "media", "_links"))
- yield Message.Directory, post
+ yield Message.Directory, "", post
post["_http_headers"] = headers
for photo in media:
@@ -969,11 +972,11 @@ class PixivNovelExtractor(PixivExtractor):
if transform_tags:
transform_tags(novel)
novel["num"] = 0
- novel["date"] = text.parse_datetime(novel["create_date"])
+ novel["date"] = dt.parse_iso(novel["create_date"])
novel["rating"] = ratings.get(novel["x_restrict"])
novel["suffix"] = ""
- yield Message.Directory, novel
+ yield Message.Directory, "", novel
try:
content = self.api.novel_webview(novel["id"])["text"]
@@ -1039,7 +1042,7 @@ class PixivNovelExtractor(PixivExtractor):
class PixivNovelNovelExtractor(PixivNovelExtractor):
"""Extractor for pixiv novels"""
subcategory = "novel"
- pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
+ pattern = rf"{BASE_PATTERN}/n(?:ovel/show\.php\?id=|/)(\d+)"
example = "https://www.pixiv.net/novel/show.php?id=12345"
def novels(self):
@@ -1053,7 +1056,7 @@ class PixivNovelNovelExtractor(PixivNovelExtractor):
class PixivNovelUserExtractor(PixivNovelExtractor):
"""Extractor for pixiv users' novels"""
subcategory = "user"
- pattern = USER_PATTERN + r"/novels"
+ pattern = rf"{USER_PATTERN}/novels"
example = "https://www.pixiv.net/en/users/12345/novels"
def novels(self):
@@ -1063,7 +1066,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor):
class PixivNovelSeriesExtractor(PixivNovelExtractor):
"""Extractor for pixiv novel series"""
subcategory = "series"
- pattern = BASE_PATTERN + r"/novel/series/(\d+)"
+ pattern = rf"{BASE_PATTERN}/novel/series/(\d+)"
example = "https://www.pixiv.net/novel/series/12345"
def novels(self):
@@ -1073,7 +1076,7 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor):
class PixivNovelBookmarkExtractor(PixivNovelExtractor):
"""Extractor for bookmarked pixiv novels"""
subcategory = "bookmark"
- pattern = (USER_PATTERN + r"/bookmarks/novels"
+ pattern = (rf"{USER_PATTERN}/bookmarks/novels"
r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
example = "https://www.pixiv.net/en/users/12345/bookmarks/novels"
@@ -1151,7 +1154,7 @@ class PixivAppAPI():
"get_secure_url": "1",
}
- time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
+ time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
headers = {
"X-Client-Time": time,
"X-Client-Hash": hashlib.md5(
@@ -1326,11 +1329,11 @@ class PixivAppAPI():
sort = params["sort"]
if sort == "date_desc":
date_key = "end_date"
- date_off = timedelta(days=1)
+ date_off = dt.timedelta(days=1)
date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731
elif sort == "date_asc":
date_key = "start_date"
- date_off = timedelta(days=-1)
+ date_off = dt.timedelta(days=-1)
date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731
else:
date_key = None
@@ -1357,8 +1360,8 @@ class PixivAppAPI():
if date_key and text.parse_int(params.get("offset")) >= 5000:
date_last = data["illusts"][-1]["create_date"]
- date_val = (text.parse_datetime(
- date_last) + date_off).strftime("%Y-%m-%d")
+ date_val = (dt.parse_iso(date_last) + date_off).strftime(
+ "%Y-%m-%d")
self.log.info("Reached 'offset' >= 5000; "
"Updating '%s' to '%s'", date_key, date_val)
params[date_key] = date_val