diff options
| author | 2023-06-19 01:14:28 -0400 | |
|---|---|---|
| committer | 2023-06-19 01:14:28 -0400 | |
| commit | 9fb906aeb3816abb42f459d1b67e35024e6f2348 (patch) | |
| tree | 30b039301c783475c0f4d46b0e0c5ec9851b2567 /gallery_dl/extractor/pixiv.py | |
| parent | 8950c0f2ef55ec2ed36b3fccc9fd85b64b877c3b (diff) | |
New upstream version 1.25.6.upstream/1.25.6
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 99 |
1 files changed, 70 insertions, 29 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index cdaf595..861959e 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -15,6 +15,9 @@ from datetime import datetime, timedelta import itertools import hashlib +BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" +USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)" + class PixivExtractor(Extractor): """Base class for pixiv extractors""" @@ -150,7 +153,7 @@ class PixivExtractor(Extractor): class PixivUserExtractor(PixivExtractor): """Extractor for a pixiv user profile""" subcategory = "user" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") test = ( @@ -168,18 +171,19 @@ class PixivUserExtractor(PixivExtractor): def items(self): base = "{}/users/{}/".format(self.root, self.user_id) return self._dispatch_extractors(( - (PixivAvatarExtractor , base + "avatar"), - (PixivBackgroundExtractor, base + "background"), - (PixivArtworksExtractor , base + "artworks"), - (PixivFavoriteExtractor , base + "bookmarks/artworks"), - (PixivNovelUserExtractor , base + "novels"), + (PixivAvatarExtractor , base + "avatar"), + (PixivBackgroundExtractor , base + "background"), + (PixivArtworksExtractor , base + "artworks"), + (PixivFavoriteExtractor , base + "bookmarks/artworks"), + (PixivNovelBookmarkExtractor, base + "bookmarks/novels"), + (PixivNovelUserExtractor , base + "novels"), ), ("artworks",)) class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") @@ -240,8 +244,7 @@ class PixivAvatarExtractor(PixivExtractor): subcategory = "avatar" filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "avatar_{user[id]}_{date}" - pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" - r"/(?:en/)?users/(\d+)/avatar") + pattern = USER_PATTERN + r"/avatar" test = ("https://www.pixiv.net/en/users/173530/avatar", { "content": "4e57544480cc2036ea9608103e8f024fa737fe66", }) @@ -261,8 +264,7 @@ class PixivBackgroundExtractor(PixivExtractor): subcategory = "background" filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" - pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" - r"/(?:en/)?users/(\d+)/background") + pattern = USER_PATTERN + "/background" test = ("https://www.pixiv.net/en/users/194921/background", { "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02" r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg", @@ -376,12 +378,12 @@ class PixivWorkExtractor(PixivExtractor): class PixivFavoriteExtractor(PixivExtractor): - """Extractor for all favorites/bookmarks of a pixiv-user""" + """Extractor for all favorites/bookmarks of a pixiv user""" subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?" + pattern = (BASE_PATTERN + r"/(?:(?:en/)?" r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?" r"|bookmark\.php)(?:\?([^#]*))?") test = ( @@ -484,8 +486,7 @@ class PixivRankingExtractor(PixivExtractor): archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}" directory_fmt = ("{category}", "rankings", "{ranking[mode]}", "{ranking[date]}") - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/ranking\.php(?:\?([^#]*))?") + pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?" test = ( ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"), ("https://www.pixiv.net/ranking.php"), @@ -550,8 +551,7 @@ class PixivSearchExtractor(PixivExtractor): subcategory = "search" archive_fmt = "s_{search[word]}_{id}{num}.{extension}" directory_fmt = ("{category}", "search", "{search[word]}") - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" + pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") test = ( ("https://www.pixiv.net/en/tags/Original", { @@ -634,8 +634,7 @@ class PixivFollowExtractor(PixivExtractor): subcategory = "follow" archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}" directory_fmt = ("{category}", "following") - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/bookmark_new_illust\.php") + pattern = BASE_PATTERN + r"/bookmark_new_illust\.php" test = ( ("https://www.pixiv.net/bookmark_new_illust.php"), ("https://touch.pixiv.net/bookmark_new_illust.php"), @@ -697,8 +696,7 @@ class PixivSeriesExtractor(PixivExtractor): directory_fmt = ("{category}", "{user[id]} {user[account]}", "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" - pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" - r"/user/(\d+)/series/(\d+)") + pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" test = ("https://www.pixiv.net/user/10509347/series/21859", { "range": "1-10", "count": 10, @@ -755,8 +753,7 @@ class PixivNovelExtractor(PixivExtractor): """Extractor for pixiv novels""" subcategory = "novel" request_interval = 1.0 - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/n(?:ovel/show\.php\?id=|/)(\d+)") + pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" test = ( ("https://www.pixiv.net/novel/show.php?id=19612040", { "count": 1, @@ -799,6 +796,12 @@ class PixivNovelExtractor(PixivExtractor): "options": (("embeds", True),), "count": 3, }), + # full series + ("https://www.pixiv.net/novel/show.php?id=19612040", { + "options": (("full-series", True),), + "count": 4, + }), + # short URL ("https://www.pixiv.net/n/19612040"), ) @@ -862,7 +865,7 @@ class PixivNovelExtractor(PixivExtractor): illusts = {} for marker in text.extract_iter(content, "[", "]"): - if marker.startswith("[jumpuri:"): + if marker.startswith("[jumpuri:If you would like to "): desktop = True elif marker.startswith("pixivimage:"): illusts[marker[11:].partition("-")[0]] = None @@ -895,14 +898,17 @@ class PixivNovelExtractor(PixivExtractor): yield Message.Queue, url, novel def novels(self): - return (self.api.novel_detail(self.novel_id),) + novel = self.api.novel_detail(self.novel_id) + if self.config("full-series") and novel["series"]: + self.subcategory = PixivNovelSeriesExtractor.subcategory + return self.api.novel_series(novel["series"]["id"]) + return (novel,) class PixivNovelUserExtractor(PixivNovelExtractor): """Extractor for pixiv users' novels""" subcategory = "novel-user" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/(?:en/)?users/(\d+)/novels") + pattern = USER_PATTERN + r"/novels" test = ("https://www.pixiv.net/en/users/77055466/novels", { "pattern": "^text:", "range": "1-5", @@ -916,8 +922,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor): class PixivNovelSeriesExtractor(PixivNovelExtractor): """Extractor for pixiv novel series""" subcategory = "novel-series" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/novel/series/(\d+)") + pattern = BASE_PATTERN + r"/novel/series/(\d+)" test = ("https://www.pixiv.net/novel/series/10278364", { "count": 4, "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2", @@ -927,6 +932,37 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor): return self.api.novel_series(self.novel_id) +class PixivNovelBookmarkExtractor(PixivNovelExtractor): + """Extractor for bookmarked pixiv novels""" + subcategory = "novel-bookmark" + pattern = (USER_PATTERN + r"/bookmarks/novels" + r"(?:/([^/?#]+))?(?:/?\?([^#]+))?") + test = ( + ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", { + "count": 1, + "content": "7194e8faa876b2b536f185ee271a2b6e46c69089", + }), + ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"), + ) + + def __init__(self, match): + PixivNovelExtractor.__init__(self, match) + self.user_id, self.tag, self.query = match.groups() + + def novels(self): + if self.tag: + tag = text.unquote(self.tag) + else: + tag = None + + if text.parse_query(self.query).get("rest") == "hide": + restrict = "private" + else: + restrict = "public" + + return self.api.user_bookmarks_novel(self.user_id, tag, restrict) + + class PixivSketchExtractor(Extractor): """Extractor for user pages on sketch.pixiv.net""" category = "pixiv" @@ -1113,6 +1149,11 @@ class PixivAppAPI(): params = {"user_id": user_id, "tag": tag, "restrict": restrict} return self._pagination("/v1/user/bookmarks/illust", params) + def user_bookmarks_novel(self, user_id, tag=None, restrict="public"): + """Return novels bookmarked by a user""" + params = {"user_id": user_id, "tag": tag, "restrict": restrict} + return self._pagination("/v1/user/bookmarks/novel", params, "novels") + def user_bookmark_tags_illust(self, user_id, restrict="public"): """Return bookmark tags defined by a user""" params = {"user_id": user_id, "restrict": restrict} |
