diff options
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 81 |
1 files changed, 42 insertions, 39 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 6276a2a..eb1a7f2 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -9,14 +9,13 @@ """Extractors for https://www.pixiv.net/""" from .common import Extractor, Message, Dispatch -from .. import text, util, exception +from .. import text, util, dt, exception from ..cache import cache, memcache -from datetime import datetime, timedelta import itertools import hashlib BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net" -USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)" +USER_PATTERN = rf"{BASE_PATTERN}/(?:en/)?users/(\d+)" class PixivExtractor(Extractor): @@ -44,7 +43,7 @@ class PixivExtractor(Extractor): self.meta_captions = self.config("captions") if self.sanity_workaround or self.meta_captions: - self.meta_captions_sub = util.re( + self.meta_captions_sub = text.re( r'<a href="/jump\.php\?([^"]+)').sub def items(self): @@ -96,12 +95,12 @@ class PixivExtractor(Extractor): if transform_tags: transform_tags(work) work["num"] = 0 - work["date"] = text.parse_datetime(work["create_date"]) + work["date"] = dt.parse_iso(work["create_date"]) work["rating"] = ratings.get(work["x_restrict"]) work["suffix"] = "" work.update(metadata) - yield Message.Directory, work + yield Message.Directory, "", work for work["num"], file in enumerate(files): url = file["url"] work.update(file) @@ -149,7 +148,7 @@ class PixivExtractor(Extractor): self._extract_ajax(work, body) return self._extract_ugoira(work, url) except Exception as exc: - self.log.debug("", exc_info=exc) + self.log.traceback(exc) self.log.warning( "%s: Unable to extract Ugoira URL. Provide " "logged-in cookies to access it", work["id"]) @@ -238,10 +237,13 @@ class PixivExtractor(Extractor): return data["body"] self.log.debug("Server response: %s", util.json_dumps(data)) - return self.log.error( - "'%s'", data.get("message") or "General Error") + if (msg := data.get("message")) == "An unknown error occurred": + msg = "Invalid 'PHPSESSID' cookie" + else: + msg = f"'{msg or 'General Error'}'" + self.log.error("%s", msg) except Exception: - return None + pass def _extract_ajax(self, work, body): work["_ajax"] = True @@ -274,6 +276,9 @@ class PixivExtractor(Extractor): "profile_image_urls": {}, } + if "is_bookmarked" not in work: + work["is_bookmarked"] = True if body.get("bookmarkData") else False + work["tags"] = tags = [] for tag in body["tags"]["tags"]: name = tag["tag"] @@ -350,10 +355,10 @@ class PixivExtractor(Extractor): if fmt in urls: yield urls[fmt] - def _date_from_url(self, url, offset=timedelta(hours=9)): + def _date_from_url(self, url, offset=dt.timedelta(hours=9)): try: _, _, _, _, _, y, m, d, H, M, S, _ = url.split("/") - return datetime( + return dt.datetime( int(y), int(m), int(d), int(H), int(M), int(S)) - offset except Exception: return None @@ -388,7 +393,7 @@ class PixivExtractor(Extractor): class PixivUserExtractor(Dispatch, PixivExtractor): """Extractor for a pixiv user profile""" - pattern = (BASE_PATTERN + r"/(?:" + pattern = (rf"{BASE_PATTERN}/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") example = "https://www.pixiv.net/en/users/12345" @@ -411,7 +416,7 @@ class PixivUserExtractor(Dispatch, PixivExtractor): class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" - pattern = (BASE_PATTERN + r"/(?:" + pattern = (rf"{BASE_PATTERN}/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") @@ -450,7 +455,7 @@ class PixivArtworksExtractor(PixivExtractor): ajax_ids.extend(map(int, body["manga"])) ajax_ids.sort() except Exception as exc: - self.log.debug("", exc_info=exc) + self.log.traceback(exc) self.log.warning("u%s: Failed to collect artwork IDs " "using AJAX API", self.user_id) else: @@ -500,7 +505,7 @@ class PixivAvatarExtractor(PixivExtractor): subcategory = "avatar" filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "avatar_{user[id]}_{date}" - pattern = USER_PATTERN + r"/avatar" + pattern = rf"{USER_PATTERN}/avatar" example = "https://www.pixiv.net/en/users/12345/avatar" def _init(self): @@ -518,7 +523,7 @@ class PixivBackgroundExtractor(PixivExtractor): subcategory = "background" filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" - pattern = USER_PATTERN + "/background" + pattern = rf"{USER_PATTERN}/background" example = "https://www.pixiv.net/en/users/12345/background" def _init(self): @@ -580,7 +585,7 @@ class PixivWorkExtractor(PixivExtractor): class PixivUnlistedExtractor(PixivExtractor): """Extractor for a unlisted pixiv illustrations""" subcategory = "unlisted" - pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)" + pattern = rf"{BASE_PATTERN}/(?:en/)?artworks/unlisted/(\w+)" example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0" def _extract_files(self, work): @@ -599,7 +604,7 @@ class PixivFavoriteExtractor(PixivExtractor): directory_fmt = ("{category}", "bookmarks", "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" - pattern = (BASE_PATTERN + r"/(?:(?:en/)?" + pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?" r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?" r"|bookmark\.php)(?:\?([^#]*))?") example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks" @@ -662,7 +667,7 @@ class PixivRankingExtractor(PixivExtractor): archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}" directory_fmt = ("{category}", "rankings", "{ranking[mode]}", "{ranking[date]}") - pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?" + pattern = rf"{BASE_PATTERN}/ranking\.php(?:\?([^#]*))?" example = "https://www.pixiv.net/ranking.php" def __init__(self, match): @@ -712,8 +717,7 @@ class PixivRankingExtractor(PixivExtractor): self.log.warning("invalid date '%s'", date) date = None if not date: - now = util.datetime_utcnow() - date = (now - timedelta(days=1)).strftime("%Y-%m-%d") + date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d") self.date = date self.type = type = query.get("content") @@ -732,7 +736,7 @@ class PixivSearchExtractor(PixivExtractor): subcategory = "search" archive_fmt = "s_{search[word]}_{id}{num}.{extension}" directory_fmt = ("{category}", "search", "{search[word]}") - pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" + pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") example = "https://www.pixiv.net/en/tags/TAG" @@ -798,7 +802,7 @@ class PixivFollowExtractor(PixivExtractor): subcategory = "follow" archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}" directory_fmt = ("{category}", "following") - pattern = BASE_PATTERN + r"/bookmark_new_illust\.php" + pattern = rf"{BASE_PATTERN}/bookmark_new_illust\.php" example = "https://www.pixiv.net/bookmark_new_illust.php" def works(self): @@ -847,7 +851,7 @@ class PixivSeriesExtractor(PixivExtractor): directory_fmt = ("{category}", "{user[id]} {user[account]}", "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" - pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" + pattern = rf"{BASE_PATTERN}/user/(\d+)/series/(\d+)" example = "https://www.pixiv.net/user/12345/series/12345" def __init__(self, match): @@ -888,11 +892,10 @@ class PixivSketchExtractor(Extractor): for post in self.posts(): media = post["media"] post["post_id"] = post["id"] - post["date"] = text.parse_datetime( - post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + post["date"] = dt.parse_iso(post["created_at"]) util.delete_items(post, ("id", "media", "_links")) - yield Message.Directory, post + yield Message.Directory, "", post post["_http_headers"] = headers for photo in media: @@ -969,11 +972,11 @@ class PixivNovelExtractor(PixivExtractor): if transform_tags: transform_tags(novel) novel["num"] = 0 - novel["date"] = text.parse_datetime(novel["create_date"]) + novel["date"] = dt.parse_iso(novel["create_date"]) novel["rating"] = ratings.get(novel["x_restrict"]) novel["suffix"] = "" - yield Message.Directory, novel + yield Message.Directory, "", novel try: content = self.api.novel_webview(novel["id"])["text"] @@ -1039,7 +1042,7 @@ class PixivNovelExtractor(PixivExtractor): class PixivNovelNovelExtractor(PixivNovelExtractor): """Extractor for pixiv novels""" subcategory = "novel" - pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" + pattern = rf"{BASE_PATTERN}/n(?:ovel/show\.php\?id=|/)(\d+)" example = "https://www.pixiv.net/novel/show.php?id=12345" def novels(self): @@ -1053,7 +1056,7 @@ class PixivNovelNovelExtractor(PixivNovelExtractor): class PixivNovelUserExtractor(PixivNovelExtractor): """Extractor for pixiv users' novels""" subcategory = "user" - pattern = USER_PATTERN + r"/novels" + pattern = rf"{USER_PATTERN}/novels" example = "https://www.pixiv.net/en/users/12345/novels" def novels(self): @@ -1063,7 +1066,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor): class PixivNovelSeriesExtractor(PixivNovelExtractor): """Extractor for pixiv novel series""" subcategory = "series" - pattern = BASE_PATTERN + r"/novel/series/(\d+)" + pattern = rf"{BASE_PATTERN}/novel/series/(\d+)" example = "https://www.pixiv.net/novel/series/12345" def novels(self): @@ -1073,7 +1076,7 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor): class PixivNovelBookmarkExtractor(PixivNovelExtractor): """Extractor for bookmarked pixiv novels""" subcategory = "bookmark" - pattern = (USER_PATTERN + r"/bookmarks/novels" + pattern = (rf"{USER_PATTERN}/bookmarks/novels" r"(?:/([^/?#]+))?(?:/?\?([^#]+))?") example = "https://www.pixiv.net/en/users/12345/bookmarks/novels" @@ -1151,7 +1154,7 @@ class PixivAppAPI(): "get_secure_url": "1", } - time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") + time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00") headers = { "X-Client-Time": time, "X-Client-Hash": hashlib.md5( @@ -1326,11 +1329,11 @@ class PixivAppAPI(): sort = params["sort"] if sort == "date_desc": date_key = "end_date" - date_off = timedelta(days=1) + date_off = dt.timedelta(days=1) date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731 elif sort == "date_asc": date_key = "start_date" - date_off = timedelta(days=-1) + date_off = dt.timedelta(days=-1) date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731 else: date_key = None @@ -1357,8 +1360,8 @@ class PixivAppAPI(): if date_key and text.parse_int(params.get("offset")) >= 5000: date_last = data["illusts"][-1]["create_date"] - date_val = (text.parse_datetime( - date_last) + date_off).strftime("%Y-%m-%d") + date_val = (dt.parse_iso(date_last) + date_off).strftime( + "%Y-%m-%d") self.log.info("Reached 'offset' >= 5000; " "Updating '%s' to '%s'", date_key, date_val) params[date_key] = date_val |
