diff options
| author | 2022-05-26 23:57:04 -0400 | |
|---|---|---|
| committer | 2022-05-26 23:57:04 -0400 | |
| commit | ad61a6d8122973534ab63df48f6090954bc73db6 (patch) | |
| tree | aedce94427ac95fa180005f88fc94b5c8ef5a62a /gallery_dl/extractor/pixiv.py | |
| parent | c6b88a96bd191711fc540d7babab3d2e09c68da8 (diff) | |
New upstream version 1.22.0.upstream/1.22.0
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 246 |
1 files changed, 147 insertions, 99 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index a33df42..9b35e42 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2021 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,16 +10,16 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import cache +from ..cache import cache, memcache from datetime import datetime, timedelta import itertools import hashlib -import time class PixivExtractor(Extractor): """Base class for pixiv extractors""" category = "pixiv" + root = "https://www.pixiv.net" directory_fmt = ("{category}", "{user[id]} {user[account]}") filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" @@ -73,7 +73,14 @@ class PixivExtractor(Extractor): if work["type"] == "ugoira": if not self.load_ugoira: continue - ugoira = self.api.ugoira_metadata(work["id"]) + + try: + ugoira = self.api.ugoira_metadata(work["id"]) + except exception.StopExtraction as exc: + self.log.warning( + "Unable to retrieve Ugoira metatdata (%s - %s)", + work.get("id"), exc.message) + continue url = ugoira["zip_urls"]["medium"].replace( "_ugoira600x600", "_ugoira1920x1080") @@ -91,22 +98,70 @@ class PixivExtractor(Extractor): work["suffix"] = "_p{:02}".format(work["num"]) yield Message.Url, url, text.nameext_from_url(url, work) + @staticmethod + def _make_work(kind, url, user): + p = url.split("/") + return { + "create_date" : "{}-{}-{}T{}:{}:{}+09:00".format( + p[5], p[6], p[7], p[8], p[9], p[10]) if len(p) > 9 else None, + "height" : 0, + "id" : kind, + "image_urls" : None, + "meta_pages" : (), + "meta_single_page": {"original_image_url": url}, + "page_count" : 1, + "sanity_level" : 0, + "tags" : (), + "title" : kind, + "type" : kind, + "user" : user, + "width" : 0, + "x_restrict" : 0, + } + def works(self): - """Return an iterable containing all relevant 'work'-objects""" + """Return an iterable containing all relevant 'work' objects""" def metadata(self): - """Collect metadata for extractor-job""" + """Collect metadata for extractor job""" return {} class PixivUserExtractor(PixivExtractor): - """Extractor for works of a pixiv user""" + """Extractor for a pixiv user profile""" subcategory = "user" pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" - r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)" - r"(?:/([^/?#]+))?)?/?(?:$|[?#])" - r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?" - r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))") + r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" + r")(\d+)(?:$|[?#])") + test = ( + ("https://www.pixiv.net/en/users/173530"), + ("https://www.pixiv.net/u/173530"), + ("https://www.pixiv.net/member.php?id=173530"), + ("https://www.pixiv.net/mypage.php#id=173530"), + ("https://www.pixiv.net/#id=173530"), + ) + + def __init__(self, match): + PixivExtractor.__init__(self, match) + self.user_id = match.group(1) + + def items(self): + base = "{}/users/{}/".format(self.root, self.user_id) + return self._dispatch_extractors(( + (PixivAvatarExtractor , base + "avatar"), + (PixivBackgroundExtractor, base + "background"), + (PixivArtworksExtractor , base + "artworks"), + (PixivFavoriteExtractor , base + "bookmarks/artworks"), + ), ("artworks",)) + + +class PixivArtworksExtractor(PixivExtractor): + """Extractor for artworks of a pixiv user""" + subcategory = "artworks" + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" + r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" + r"(?:/([^/?#]+))?/?(?:$|[?#])" + r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") test = ( ("https://www.pixiv.net/en/users/173530/artworks", { "url": "852c31ad83b6840bacbce824d85f2a997889efb7", @@ -120,47 +175,30 @@ class PixivUserExtractor(PixivExtractor): "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), { "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", }), - # avatar (#595, #623, #1124) - ("https://www.pixiv.net/en/users/173530", { - "options": (("avatar", True),), - "content": "4e57544480cc2036ea9608103e8f024fa737fe66", - "range": "1", - }), - # background (#623, #1124, #2495) - ("https://www.pixiv.net/en/users/194921", { - "options": (("background", True),), - "content": "aeda3536003ea3002f70657cb93c5053f26f5843", - "range": "1", - }), # deleted account ("http://www.pixiv.net/member_illust.php?id=173531", { "options": (("metadata", True),), "exception": exception.NotFoundError, }), - ("https://www.pixiv.net/en/users/173530"), ("https://www.pixiv.net/en/users/173530/manga"), ("https://www.pixiv.net/en/users/173530/illustrations"), ("https://www.pixiv.net/member_illust.php?id=173530"), - ("https://www.pixiv.net/u/173530"), - ("https://www.pixiv.net/user/173530"), - ("https://www.pixiv.net/mypage.php#id=173530"), - ("https://www.pixiv.net/#id=173530"), ("https://touch.pixiv.net/member_illust.php?id=173530"), ) def __init__(self, match): PixivExtractor.__init__(self, match) - u1, t1, u2, t2, u3 = match.groups() + u1, t1, u2, t2 = match.groups() if t1: t1 = text.unquote(t1) elif t2: t2 = text.parse_query(t2).get("tag") - self.user_id = u1 or u2 or u3 + self.user_id = u1 or u2 self.tag = t1 or t2 def metadata(self): if self.config("metadata"): - return {"user": self.api.user_detail(self.user_id)["user"]} + return self.api.user_detail(self.user_id) return {} def works(self): @@ -173,54 +211,60 @@ class PixivUserExtractor(PixivExtractor): if tag in [t["name"].lower() for t in work["tags"]] ) - avatar = self.config("avatar") - background = self.config("background") - if avatar or background: - work_list = [] - detail = self.api.user_detail(self.user_id) - user = detail["user"] - - if avatar: - url = user["profile_image_urls"]["medium"] - work_list.append((self._make_work( - "avatar", url.replace("_170.", "."), user),)) - - if background: - url = detail["profile"]["background_image_url"] - if url: - if "/c/" in url: - parts = url.split("/") - del parts[3:5] - url = "/".join(parts) - url = url.replace("_master1200.", ".") - work = self._make_work("background", url, user) - if url.endswith(".jpg"): - work["_fallback"] = (url[:-4] + ".png",) - work_list.append((work,)) - - work_list.append(works) - works = itertools.chain.from_iterable(work_list) - return works - @staticmethod - def _make_work(kind, url, user): - return { - "create_date" : None, - "height" : 0, - "id" : kind, - "image_urls" : None, - "meta_pages" : (), - "meta_single_page": {"original_image_url": url}, - "page_count" : 1, - "sanity_level" : 0, - "tags" : (), - "title" : kind, - "type" : kind, - "user" : user, - "width" : 0, - "x_restrict" : 0, - } + +class PixivAvatarExtractor(PixivExtractor): + """Extractor for pixiv avatars""" + subcategory = "avatar" + filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" + archive_fmt = "avatar_{user[id]}_{date}" + pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" + r"/(?:en/)?users/(\d+)/avatar") + test = ("https://www.pixiv.net/en/users/173530/avatar", { + "content": "4e57544480cc2036ea9608103e8f024fa737fe66", + }) + + def __init__(self, match): + PixivExtractor.__init__(self, match) + self.user_id = match.group(1) + + def works(self): + user = self.api.user_detail(self.user_id)["user"] + url = user["profile_image_urls"]["medium"].replace("_170.", ".") + return (self._make_work("avatar", url, user),) + + +class PixivBackgroundExtractor(PixivExtractor): + """Extractor for pixiv background banners""" + subcategory = "background" + filename_fmt = "background{date?_//:%Y-%m-%d}.{extension}" + archive_fmt = "background_{user[id]}_{date}" + pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" + r"/(?:en/)?users/(\d+)/background") + test = ("https://www.pixiv.net/en/users/194921/background", { + "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02" + r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg", + }) + + def __init__(self, match): + PixivExtractor.__init__(self, match) + self.user_id = match.group(1) + + def works(self): + detail = self.api.user_detail(self.user_id) + url = detail["profile"]["background_image_url"] + if not url: + return () + if "/c/" in url: + parts = url.split("/") + del parts[3:5] + url = "/".join(parts) + url = url.replace("_master1200.", ".") + work = self._make_work("background", url, detail["user"]) + if url.endswith(".jpg"): + work["_fallback"] = (url[:-4] + ".png",) + return (work,) class PixivMeExtractor(PixivExtractor): @@ -312,10 +356,10 @@ class PixivFavoriteExtractor(PixivExtractor): r"|bookmark\.php)(?:\?([^#]*))?") test = ( ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", { - "url": "e717eb511500f2fa3497aaee796a468ecf685cc4", + "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949", }), ("https://www.pixiv.net/bookmark.php?id=173530", { - "url": "e717eb511500f2fa3497aaee796a468ecf685cc4", + "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949", }), # bookmarks with specific tag (("https://www.pixiv.net/en/users/3137110" @@ -735,66 +779,70 @@ class PixivAppAPI(): def illust_detail(self, illust_id): params = {"illust_id": illust_id} - return self._call("v1/illust/detail", params)["illust"] + return self._call("/v1/illust/detail", params)["illust"] def illust_follow(self, restrict="all"): params = {"restrict": restrict} - return self._pagination("v2/illust/follow", params) + return self._pagination("/v2/illust/follow", params) def illust_ranking(self, mode="day", date=None): params = {"mode": mode, "date": date} - return self._pagination("v1/illust/ranking", params) + return self._pagination("/v1/illust/ranking", params) def illust_related(self, illust_id): params = {"illust_id": illust_id} - return self._pagination("v2/illust/related", params) + return self._pagination("/v2/illust/related", params) def search_illust(self, word, sort=None, target=None, duration=None, date_start=None, date_end=None): params = {"word": word, "search_target": target, "sort": sort, "duration": duration, "start_date": date_start, "end_date": date_end} - return self._pagination("v1/search/illust", params) + return self._pagination("/v1/search/illust", params) def user_bookmarks_illust(self, user_id, tag=None, restrict="public"): params = {"user_id": user_id, "tag": tag, "restrict": restrict} - return self._pagination("v1/user/bookmarks/illust", params) + return self._pagination("/v1/user/bookmarks/illust", params) + @memcache(keyarg=1) def user_detail(self, user_id): params = {"user_id": user_id} - return self._call("v1/user/detail", params) + return self._call("/v1/user/detail", params) def user_following(self, user_id, restrict="public"): params = {"user_id": user_id, "restrict": restrict} - return self._pagination("v1/user/following", params, "user_previews") + return self._pagination("/v1/user/following", params, "user_previews") def user_illusts(self, user_id): params = {"user_id": user_id} - return self._pagination("v1/user/illusts", params) + return self._pagination("/v1/user/illusts", params) def ugoira_metadata(self, illust_id): params = {"illust_id": illust_id} - return self._call("v1/ugoira/metadata", params)["ugoira_metadata"] + return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"] def _call(self, endpoint, params=None): - url = "https://app-api.pixiv.net/" + endpoint + url = "https://app-api.pixiv.net" + endpoint + + while True: + self.login() + response = self.extractor.request(url, params=params, fatal=False) + data = response.json() - self.login() - response = self.extractor.request(url, params=params, fatal=False) - data = response.json() + if "error" not in data: + return data + + self.log.debug(data) - if "error" in data: if response.status_code == 404: raise exception.NotFoundError() error = data["error"] if "rate limit" in (error.get("message") or "").lower(): - self.log.info("Waiting two minutes for API rate limit reset.") - time.sleep(120) - return self._call(endpoint, params) - raise exception.StopExtraction("API request failed: %s", error) + self.extractor.wait(seconds=300) + continue - return data + raise exception.StopExtraction("API request failed: %s", error) def _pagination(self, endpoint, params, key="illusts"): while True: |
