diff options
| author | 2021-11-01 05:03:49 -0400 | |
|---|---|---|
| committer | 2021-11-01 05:03:49 -0400 | |
| commit | 4a965d875415907cc1a016b428ae305a964f9228 (patch) | |
| tree | 7cece9948a7ba390348e00c669f9cb1f7a9ba39a /gallery_dl/extractor/kemonoparty.py | |
| parent | 34ba2951b8c523713425c98addb9256ea05c946f (diff) | |
New upstream version 1.19.1.upstream/1.19.1
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
| -rw-r--r-- | gallery_dl/extractor/kemonoparty.py | 135 |
1 files changed, 128 insertions, 7 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index c5f5ae7..d5aad67 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -14,7 +14,8 @@ from ..cache import cache import itertools import re -BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)" +BASE_PATTERN = r"(?:https?://)?kemono\.party" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)" class KemonopartyExtractor(Extractor): @@ -29,7 +30,9 @@ class KemonopartyExtractor(Extractor): def items(self): self._prepare_ddosguard_cookies() - find_inline = re.compile(r'src="(/inline/[^"]+)').findall + find_inline = re.compile( + r'src="(?:https?://kemono\.party)?(/inline/[^"]+' + r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall skip_service = \ "patreon" if self.config("patreon-skip-file", True) else None @@ -101,7 +104,7 @@ class KemonopartyExtractor(Extractor): class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" subcategory = "user" - pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])" + pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])" test = ( ("https://kemono.party/fanbox/user/6993449", { "range": "1-25", @@ -138,11 +141,11 @@ class KemonopartyUserExtractor(KemonopartyExtractor): class KemonopartyPostExtractor(KemonopartyExtractor): """Extractor for a single kemono.party post""" subcategory = "post" - pattern = BASE_PATTERN + r"/post/([^/?#]+)" + pattern = USER_PATTERN + r"/post/([^/?#]+)" test = ( ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://kemono\.party/data/files/fanbox" - r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", + "pattern": r"https://kemono.party/data/21/0f" + r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg", "keyword": { "added": "Wed, 06 May 2020 20:28:02 GMT", "content": str, @@ -197,10 +200,128 @@ class KemonopartyPostExtractor(KemonopartyExtractor): return (posts[0],) if len(posts) > 1 else posts +class KemonopartyDiscordExtractor(KemonopartyExtractor): + """Extractor for kemono.party discord servers""" + subcategory = "discord" + directory_fmt = ("{category}", "discord", "{server}", + "{channel_name|channel}") + filename_fmt = "{id}_{num:>02}_{filename}.{extension}" + archive_fmt = "discord_{server}_{id}_{num}" + pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)" + test = ( + (("https://kemono.party/discord" + "/server/488668827274444803#finish-work"), { + "count": 4, + "keyword": {"channel_name": "finish-work"}, + }), + (("https://kemono.party/discord" + "/server/256559665620451329/channel/462437519519383555#"), { + "pattern": r"https://kemono\.party/data/attachments/discord" + r"/256559665620451329/\d+/\d+/.+", + "count": ">= 2", + }), + # 'inline' files + (("https://kemono.party/discord" + "/server/315262215055736843/channel/315262215055736843#general"), { + "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$", + "range": "1-5", + "options": (("image-filter", "type == 'inline'"),), + }), + ) + + def __init__(self, match): + KemonopartyExtractor.__init__(self, match) + self.server, self.channel, self.channel_name = match.groups() + + def items(self): + self._prepare_ddosguard_cookies() + + find_inline = re.compile( + r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" + r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall + + posts = self.posts() + max_posts = self.config("max-posts") + if max_posts: + posts = itertools.islice(posts, max_posts) + + for post in posts: + files = [] + append = files.append + for attachment in post["attachments"]: + attachment["type"] = "attachment" + append(attachment) + for path in find_inline(post["content"] or ""): + append({"path": "https://cdn.discordapp.com" + path, + "name": path, "type": "inline"}) + + post["channel_name"] = self.channel_name + post["date"] = text.parse_datetime( + post["published"], "%a, %d %b %Y %H:%M:%S %Z") + yield Message.Directory, post + + for post["num"], file in enumerate(files, 1): + post["type"] = file["type"] + url = file["path"] + if url[0] == "/": + url = self.root + "/data" + url + elif url.startswith("https://kemono.party"): + url = self.root + "/data" + url[20:] + + text.nameext_from_url(file["name"], post) + yield Message.Url, url, post + + def posts(self): + if self.channel is None: + url = "{}/api/discord/channels/lookup?q={}".format( + self.root, self.server) + for channel in self.request(url).json(): + if channel["name"] == self.channel_name: + self.channel = channel["id"] + break + else: + raise exception.NotFoundError("channel") + + url = "{}/api/discord/channel/{}".format(self.root, self.channel) + params = {"skip": 0} + + while True: + posts = self.request(url, params=params).json() + yield from posts + + if len(posts) < 25: + break + params["skip"] += 25 + + +class KemonopartyDiscordServerExtractor(KemonopartyExtractor): + subcategory = "discord-server" + pattern = BASE_PATTERN + r"/discord/server/(\d+)$" + test = ("https://kemono.party/discord/server/488668827274444803", { + "pattern": KemonopartyDiscordExtractor.pattern, + "count": 13, + }) + + def __init__(self, match): + KemonopartyExtractor.__init__(self, match) + self.server = match.group(1) + + def items(self): + url = "{}/api/discord/channels/lookup?q={}".format( + self.root, self.server) + channels = self.request(url).json() + + for channel in channels: + url = "{}/discord/server/{}/channel/{}#{}".format( + self.root, self.server, channel["id"], channel["name"]) + channel["_extractor"] = KemonopartyDiscordExtractor + yield Message.Queue, url, channel + + class KemonopartyFavoriteExtractor(KemonopartyExtractor): """Extractor for kemono.party favorites""" subcategory = "favorite" - pattern = r"(?:https?://)?kemono\.party/favorites" + pattern = BASE_PATTERN + r"/favorites" test = ("https://kemono.party/favorites", { "pattern": KemonopartyUserExtractor.pattern, "url": "f4b5b796979bcba824af84206578c79101c7f0e1", |
