diff options
Diffstat (limited to 'gallery_dl/extractor/pixiv.py')
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 293 |
1 files changed, 35 insertions, 258 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 861959e..18a3ceb 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -26,10 +26,9 @@ class PixivExtractor(Extractor): directory_fmt = ("{category}", "{user[id]} {user[account]}") filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" - cookiedomain = None + cookies_domain = None - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.api = PixivAppAPI(self) self.load_ugoira = self.config("ugoira", True) self.max_posts = self.config("max-posts", 0) @@ -47,6 +46,8 @@ class PixivExtractor(Extractor): def transform_tags(work): work["tags"] = [tag["name"] for tag in work["tags"]] + url_sanity = ("https://s.pximg.net/common/images" + "/limit_sanity_level_360.png") ratings = {0: "General", 1: "R-18", 2: "R-18G"} meta_user = self.config("metadata") meta_bookmark = self.config("metadata-bookmark") @@ -102,6 +103,10 @@ class PixivExtractor(Extractor): elif work["page_count"] == 1: url = meta_single_page["original_image_url"] + if url == url_sanity: + self.log.debug("Skipping 'sanity_level' warning (%s)", + work["id"]) + continue work["date_url"] = self._date_from_url(url) yield Message.Url, url, text.nameext_from_url(url, work) @@ -156,18 +161,15 @@ class PixivUserExtractor(PixivExtractor): pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") - test = ( - ("https://www.pixiv.net/en/users/173530"), - ("https://www.pixiv.net/u/173530"), - ("https://www.pixiv.net/member.php?id=173530"), - ("https://www.pixiv.net/mypage.php#id=173530"), - ("https://www.pixiv.net/#id=173530"), - ) + example = "https://www.pixiv.net/en/users/12345" def __init__(self, match): PixivExtractor.__init__(self, match) self.user_id = match.group(1) + def initialize(self): + pass + def items(self): base = "{}/users/{}/".format(self.root, self.user_id) return self._dispatch_extractors(( @@ -187,29 +189,7 @@ class PixivArtworksExtractor(PixivExtractor): r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") - test = ( - ("https://www.pixiv.net/en/users/173530/artworks", { - "url": "852c31ad83b6840bacbce824d85f2a997889efb7", - }), - # illusts with specific tag - (("https://www.pixiv.net/en/users/173530/artworks" - "/%E6%89%8B%E3%81%B6%E3%82%8D"), { - "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", - }), - (("https://www.pixiv.net/member_illust.php?id=173530" - "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), { - "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", - }), - # deleted account - ("http://www.pixiv.net/member_illust.php?id=173531", { - "options": (("metadata", True),), - "exception": exception.NotFoundError, - }), - ("https://www.pixiv.net/en/users/173530/manga"), - ("https://www.pixiv.net/en/users/173530/illustrations"), - ("https://www.pixiv.net/member_illust.php?id=173530"), - ("https://touch.pixiv.net/member_illust.php?id=173530"), - ) + example = "https://www.pixiv.net/en/users/12345/artworks" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -245,9 +225,7 @@ class PixivAvatarExtractor(PixivExtractor): filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "avatar_{user[id]}_{date}" pattern = USER_PATTERN + r"/avatar" - test = ("https://www.pixiv.net/en/users/173530/avatar", { - "content": "4e57544480cc2036ea9608103e8f024fa737fe66", - }) + example = "https://www.pixiv.net/en/users/12345/avatar" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -265,10 +243,7 @@ class PixivBackgroundExtractor(PixivExtractor): filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" pattern = USER_PATTERN + "/background" - test = ("https://www.pixiv.net/en/users/194921/background", { - "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02" - r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg", - }) + example = "https://www.pixiv.net/en/users/12345/background" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -295,14 +270,7 @@ class PixivMeExtractor(PixivExtractor): """Extractor for pixiv.me URLs""" subcategory = "me" pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)" - test = ( - ("https://pixiv.me/del_shannon", { - "url": "29c295ce75150177e6b0a09089a949804c708fbf", - }), - ("https://pixiv.me/del_shanno", { - "exception": exception.NotFoundError, - }), - ) + example = "https://pixiv.me/USER" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -325,45 +293,7 @@ class PixivWorkExtractor(PixivExtractor): r"|(?:i(?:\d+\.pixiv|\.pximg)\.net" r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)" r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))") - test = ( - ("https://www.pixiv.net/artworks/966412", { - "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", - "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a", - "keyword": { - "date" : "dt:2008-06-12 15:29:13", - "date_url": "dt:2008-06-12 15:29:13", - }, - }), - (("http://www.pixiv.net/member_illust.php" - "?mode=medium&illust_id=966411"), { - "exception": exception.NotFoundError, - }), - # ugoira - (("https://www.pixiv.net/member_illust.php" - "?mode=medium&illust_id=66806629"), { - "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef", - "keyword": { - "frames" : list, - "date" : "dt:2018-01-14 15:06:08", - "date_url": "dt:2018-01-15 04:24:48", - }, - }), - # related works (#1237) - ("https://www.pixiv.net/artworks/966412", { - "options": (("related", True),), - "range": "1-10", - "count": ">= 10", - }), - ("https://www.pixiv.net/en/artworks/966412"), - ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"), - ("http://i1.pixiv.net/c/600x600/img-master" - "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), - ("https://i.pximg.net/img-original" - "/img/2017/04/25/07/33/29/62568267_p0.png"), - ("https://www.pixiv.net/i/966412"), - ("http://img.pixiv.net/img/soundcross/42626136.jpg"), - ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"), - ) + example = "https://www.pixiv.net/artworks/12345" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -386,47 +316,7 @@ class PixivFavoriteExtractor(PixivExtractor): pattern = (BASE_PATTERN + r"/(?:(?:en/)?" r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?" r"|bookmark\.php)(?:\?([^#]*))?") - test = ( - ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", { - "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949", - }), - ("https://www.pixiv.net/bookmark.php?id=173530", { - "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949", - }), - # bookmarks with specific tag - (("https://www.pixiv.net/en/users/3137110" - "/bookmarks/artworks/%E3%81%AF%E3%82%93%E3%82%82%E3%82%93"), { - "url": "379b28275f786d946e01f721e54afe346c148a8c", - }), - # bookmarks with specific tag (legacy url) - (("https://www.pixiv.net/bookmark.php?id=3137110" - "&tag=%E3%81%AF%E3%82%93%E3%82%82%E3%82%93&p=1"), { - "url": "379b28275f786d946e01f721e54afe346c148a8c", - }), - # own bookmarks - ("https://www.pixiv.net/bookmark.php", { - "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", - "keyword": {"tags_bookmark": ["47", "hitman"]}, - "options": (("metadata-bookmark", True),), - }), - # own bookmarks with tag (#596) - ("https://www.pixiv.net/bookmark.php?tag=foobar", { - "count": 0, - }), - # followed users (#515) - ("https://www.pixiv.net/en/users/173530/following", { - "pattern": PixivUserExtractor.pattern, - "count": ">= 12", - }), - # followed users (legacy url) (#515) - ("https://www.pixiv.net/bookmark.php?id=173530&type=user", { - "pattern": PixivUserExtractor.pattern, - "count": ">= 12", - }), - # touch URLs - ("https://touch.pixiv.net/bookmark.php?id=173530"), - ("https://touch.pixiv.net/bookmark.php"), - ) + example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks" def __init__(self, match): uid, kind, self.tag, query = match.groups() @@ -487,14 +377,7 @@ class PixivRankingExtractor(PixivExtractor): directory_fmt = ("{category}", "rankings", "{ranking[mode]}", "{ranking[date]}") pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?" - test = ( - ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"), - ("https://www.pixiv.net/ranking.php"), - ("https://touch.pixiv.net/ranking.php"), - ("https://www.pixiv.net/ranking.php?mode=unknown", { - "exception": exception.StopExtraction, - }), - ) + example = "https://www.pixiv.net/ranking.php" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -553,24 +436,7 @@ class PixivSearchExtractor(PixivExtractor): directory_fmt = ("{category}", "search", "{search[word]}") pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") - test = ( - ("https://www.pixiv.net/en/tags/Original", { - "range": "1-10", - "count": 10, - }), - ("https://pixiv.net/en/tags/foo/artworks?order=week&s_mode=s_tag", { - "exception": exception.StopExtraction, - }), - ("https://pixiv.net/en/tags/foo/artworks?order=date&s_mode=tag", { - "exception": exception.StopExtraction, - }), - ("https://www.pixiv.net/search.php?s_mode=s_tag&name=Original", { - "exception": exception.StopExtraction, - }), - ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"), - ("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"), - ("https://touch.pixiv.net/search.php?word=Original"), - ) + example = "https://www.pixiv.net/en/tags/TAG" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -635,10 +501,7 @@ class PixivFollowExtractor(PixivExtractor): archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}" directory_fmt = ("{category}", "following") pattern = BASE_PATTERN + r"/bookmark_new_illust\.php" - test = ( - ("https://www.pixiv.net/bookmark_new_illust.php"), - ("https://touch.pixiv.net/bookmark_new_illust.php"), - ) + example = "https://www.pixiv.net/bookmark_new_illust.php" def works(self): return self.api.illust_follow() @@ -655,17 +518,7 @@ class PixivPixivisionExtractor(PixivExtractor): "{pixivision_id} {pixivision_title}") archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}" pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)" - test = ( - ("https://www.pixivision.net/en/a/2791"), - ("https://pixivision.net/a/2791", { - "count": 7, - "keyword": { - "pixivision_id": "2791", - "pixivision_title": "What's your favorite music? Editor’s " - "picks featuring: “CD Covers”!", - }, - }), - ) + example = "https://www.pixivision.net/en/a/12345" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -697,22 +550,7 @@ class PixivSeriesExtractor(PixivExtractor): "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" - test = ("https://www.pixiv.net/user/10509347/series/21859", { - "range": "1-10", - "count": 10, - "keyword": { - "num_series": int, - "series": { - "canonical": "https://www.pixiv.net/user/10509347" - "/series/21859", - "description": str, - "ogp": dict, - "title": "先輩がうざい後輩の話", - "total": int, - "twitter": dict, - }, - }, - }) + example = "https://www.pixiv.net/user/12345/series/12345" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -754,56 +592,7 @@ class PixivNovelExtractor(PixivExtractor): subcategory = "novel" request_interval = 1.0 pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" - test = ( - ("https://www.pixiv.net/novel/show.php?id=19612040", { - "count": 1, - "content": "8c818474153cbd2f221ee08766e1d634c821d8b4", - "keyword": { - "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、", - "comment_access_control": 0, - "create_date": "2023-04-02T15:18:58+09:00", - "date": "dt:2023-04-02 06:18:58", - "id": 19612040, - "is_bookmarked": False, - "is_muted": False, - "is_mypixiv_only": False, - "is_original": True, - "is_x_restricted": False, - "novel_ai_type": 1, - "page_count": 1, - "rating": "General", - "restrict": 0, - "series": { - "id": 10278364, - "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、" - "どうやら異母妹に霊力を搾取されていたようです〜", - }, - "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着", - "異能", "ざまぁ", "学園", "神嫁"], - "text_length": 5974, - "title": "異母妹から「無能な名無し」と虐げられていた私、" - "どうやら異母妹に霊力を搾取されていたようです(1)", - "user": { - "account": "yukinaga_chifuyu", - "id": 77055466, - }, - "visible": True, - "x_restrict": 0, - }, - }), - # embeds - ("https://www.pixiv.net/novel/show.php?id=16422450", { - "options": (("embeds", True),), - "count": 3, - }), - # full series - ("https://www.pixiv.net/novel/show.php?id=19612040", { - "options": (("full-series", True),), - "count": 4, - }), - # short URL - ("https://www.pixiv.net/n/19612040"), - ) + example = "https://www.pixiv.net/novel/show.php?id=12345" def __init__(self, match): PixivExtractor.__init__(self, match) @@ -856,8 +645,13 @@ class PixivNovelExtractor(PixivExtractor): yield Message.Directory, novel + try: + content = self.api.novel_text(novel["id"])["novel_text"] + except Exception: + self.log.warning("Unable to download novel %s", novel["id"]) + continue + novel["extension"] = "txt" - content = self.api.novel_text(novel["id"])["novel_text"] yield Message.Url, "text:" + content, novel if embeds: @@ -909,11 +703,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor): """Extractor for pixiv users' novels""" subcategory = "novel-user" pattern = USER_PATTERN + r"/novels" - test = ("https://www.pixiv.net/en/users/77055466/novels", { - "pattern": "^text:", - "range": "1-5", - "count": 5, - }) + example = "https://www.pixiv.net/en/users/12345/novels" def novels(self): return self.api.user_novels(self.novel_id) @@ -923,10 +713,7 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor): """Extractor for pixiv novel series""" subcategory = "novel-series" pattern = BASE_PATTERN + r"/novel/series/(\d+)" - test = ("https://www.pixiv.net/novel/series/10278364", { - "count": 4, - "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2", - }) + example = "https://www.pixiv.net/novel/series/12345" def novels(self): return self.api.novel_series(self.novel_id) @@ -937,13 +724,7 @@ class PixivNovelBookmarkExtractor(PixivNovelExtractor): subcategory = "novel-bookmark" pattern = (USER_PATTERN + r"/bookmarks/novels" r"(?:/([^/?#]+))?(?:/?\?([^#]+))?") - test = ( - ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", { - "count": 1, - "content": "7194e8faa876b2b536f185ee271a2b6e46c69089", - }), - ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"), - ) + example = "https://www.pixiv.net/en/users/12345/bookmarks/novels" def __init__(self, match): PixivNovelExtractor.__init__(self, match) @@ -971,13 +752,9 @@ class PixivSketchExtractor(Extractor): filename_fmt = "{post_id} {id}.{extension}" archive_fmt = "S{user[id]}_{id}" root = "https://sketch.pixiv.net" - cookiedomain = ".pixiv.net" + cookies_domain = ".pixiv.net" pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)" - test = ("https://sketch.pixiv.net/@nicoby", { - "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium" - r"/file/\d+/\d+\.(jpg|png)", - "count": ">= 35", - }) + example = "https://sketch.pixiv.net/@USER" def __init__(self, match): Extractor.__init__(self, match) |
