diff options
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
| -rw-r--r-- | gallery_dl/extractor/kemonoparty.py | 134 |
1 files changed, 91 insertions, 43 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 894c671..1596cfb 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from .. import text, exception -from ..cache import cache +from ..cache import cache, memcache import itertools import re @@ -70,8 +70,7 @@ class KemonopartyExtractor(Extractor): self.root, post["service"], post["user"], post["id"]) post["_http_headers"] = headers post["date"] = text.parse_datetime( - post["published"] or post["added"], - "%a, %d %b %Y %H:%M:%S %Z") + post["published"] or post["added"], "%Y-%m-%dT%H:%M:%S") if username: post["username"] = username if comments: @@ -197,14 +196,25 @@ class KemonopartyExtractor(Extractor): dms = [] for dm in text.extract_iter(page, "<article", "</article>"): + footer = text.extr(dm, "<footer", "</footer>") dms.append({ - "body": text.unescape(text.extract( + "body": text.unescape(text.extr( dm, "<pre>", "</pre></", - )[0].strip()), - "date": text.extr(dm, 'datetime="', '"'), + ).strip()), + "date": text.extr(footer, 'Published: ', '\n'), }) return dms + @memcache(keyarg=1) + def _discord_channels(self, server): + url = "{}/api/v1/discord/channel/lookup/{}".format( + self.root, server) + return self.request(url).json() + + @memcache(keyarg=1) + def _post_revisions(self, url): + return self.request(url + "/revisions").json() + def _validate(response): return (response.headers["content-length"] != "9" or @@ -214,48 +224,82 @@ def _validate(response): class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" subcategory = "user" - pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])" + pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])" example = "https://kemono.party/SERVICE/user/12345" def __init__(self, match): - _, _, service, user_id, offset = match.groups() + _, _, service, user_id, self.query = match.groups() self.subcategory = service KemonopartyExtractor.__init__(self, match) - self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id) + self.api_url = "{}/api/v1/{}/user/{}".format( + self.root, service, user_id) self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) - self.offset = text.parse_int(offset) def posts(self): url = self.api_url - params = {"o": self.offset} + params = text.parse_query(self.query) + params["o"] = text.parse_int(params.get("o")) + revisions = self.config("revisions") while True: posts = self.request(url, params=params).json() - yield from posts - cnt = len(posts) - if cnt < 25: - return - params["o"] += cnt + if revisions: + for post in posts: + post["revision_id"] = 0 + yield post + post_url = "{}/post/{}".format(self.api_url, post["id"]) + try: + revs = self._post_revisions(post_url) + except exception.HttpError: + pass + else: + yield from revs + else: + yield from posts + + if len(posts) < 50: + break + params["o"] += 50 class KemonopartyPostExtractor(KemonopartyExtractor): """Extractor for a single kemono.party post""" subcategory = "post" - pattern = USER_PATTERN + r"/post/([^/?#]+)" + pattern = USER_PATTERN + r"/post/([^/?#]+)(/revisions?(?:/(\d*))?)?" example = "https://kemono.party/SERVICE/user/12345/post/12345" def __init__(self, match): - _, _, service, user_id, post_id = match.groups() + _, _, service, user_id, post_id, self.revision, self.revision_id = \ + match.groups() self.subcategory = service KemonopartyExtractor.__init__(self, match) - self.api_url = "{}/api/{}/user/{}/post/{}".format( + self.api_url = "{}/api/v1/{}/user/{}/post/{}".format( self.root, service, user_id, post_id) self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): - posts = self.request(self.api_url).json() - return (posts[0],) if len(posts) > 1 else posts + if not self.revision: + post = self.request(self.api_url).json() + if self.config("revisions"): + post["revision_id"] = 0 + try: + revs = self._post_revisions(self.api_url) + except exception.HttpError: + pass + else: + return itertools.chain((post,), revs) + return (post,) + + revs = self._post_revisions(self.api_url) + if not self.revision_id: + return revs + + for rev in revs: + if str(rev["revision_id"]) == self.revision_id: + return (rev,) + + raise exception.NotFoundError("revision") class KemonopartyDiscordExtractor(KemonopartyExtractor): @@ -270,11 +314,29 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): def __init__(self, match): KemonopartyExtractor.__init__(self, match) - _, _, self.server, self.channel, self.channel_name = match.groups() + _, _, self.server, self.channel_id, self.channel = match.groups() + self.channel_name = "" def items(self): self._prepare_ddosguard_cookies() + if self.channel_id: + self.channel_name = self.channel + else: + if self.channel.isdecimal() and len(self.channel) >= 16: + key = "id" + else: + key = "name" + + for channel in self._discord_channels(self.server): + if channel[key] == self.channel: + break + else: + raise exception.NotFoundError("channel") + + self.channel_id = channel["id"] + self.channel_name = channel["name"] + find_inline = re.compile( r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall @@ -299,7 +361,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): post["channel_name"] = self.channel_name post["date"] = text.parse_datetime( - post["published"], "%a, %d %b %Y %H:%M:%S %Z") + post["published"], "%Y-%m-%dT%H:%M:%S.%f") post["count"] = len(files) yield Message.Directory, post @@ -319,27 +381,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): yield Message.Url, url, post def posts(self): - if self.channel is None: - url = "{}/api/discord/channels/lookup?q={}".format( - self.root, self.server) - for channel in self.request(url).json(): - if channel["name"] == self.channel_name: - self.channel = channel["id"] - break - else: - raise exception.NotFoundError("channel") - - url = "{}/api/discord/channel/{}".format(self.root, self.channel) - params = {"skip": 0} + url = "{}/api/v1/discord/channel/{}".format( + self.root, self.channel_id) + params = {"o": 0} while True: posts = self.request(url, params=params).json() yield from posts - cnt = len(posts) - if cnt < 25: + if len(posts) < 150: break - params["skip"] += cnt + params["o"] += 150 class KemonopartyDiscordServerExtractor(KemonopartyExtractor): @@ -352,11 +404,7 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor): self.server = match.group(3) def items(self): - url = "{}/api/discord/channels/lookup?q={}".format( - self.root, self.server) - channels = self.request(url).json() - - for channel in channels: + for channel in self._discord_channels(self.server): url = "{}/discord/server/{}/channel/{}#{}".format( self.root, self.server, channel["id"], channel["name"]) channel["_extractor"] = KemonopartyDiscordExtractor |
