diff options
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
| -rw-r--r-- | gallery_dl/extractor/kemonoparty.py | 352 |
1 files changed, 188 insertions, 164 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 6f2d5f3..3d04f75 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import cache, memcache +from ..cache import cache import itertools import json import re @@ -38,6 +38,7 @@ class KemonopartyExtractor(Extractor): Extractor.__init__(self, match) def _init(self): + self.api = KemonoAPI(self) self.revisions = self.config("revisions") if self.revisions: self.revisions_unique = (self.revisions == "unique") @@ -53,48 +54,60 @@ class KemonopartyExtractor(Extractor): sort_keys=True, separators=(",", ":")).encode def items(self): + service = self.groups[2] + creator_id = self.groups[3] + find_hash = re.compile(HASH_PATTERN).match generators = self._build_file_generators(self.config("files")) - duplicates = self.config("duplicates") - comments = self.config("comments") - username = dms = announcements = None + announcements = True if self.config("announcements") else None + comments = True if self.config("comments") else False + duplicates = True if self.config("duplicates") else False + dms = True if self.config("dms") else None + profile = username = None # prevent files from being sent with gzip compression headers = {"Accept-Encoding": "identity"} if self.config("metadata"): - username = text.unescape(text.extract( - self.request(self.user_url).text, - '<meta name="artist_name" content="', '"')[0]) - if self.config("dms"): - dms = True - if self.config("announcements"): - announcements = True + profile = self.api.creator_profile(service, creator_id) + username = profile["name"] posts = self.posts() max_posts = self.config("max-posts") if max_posts: posts = itertools.islice(posts, max_posts) + if self.revisions: + posts = self._revisions(posts) for post in posts: - headers["Referer"] = "{}/{}/user/{}/post/{}".format( self.root, post["service"], post["user"], post["id"]) post["_http_headers"] = headers post["date"] = self._parse_datetime( post.get("published") or post.get("added") or "") - if username: + if profile is not None: post["username"] = username + post["user_profile"] = profile if comments: - post["comments"] = self._extract_comments(post) + try: + post["comments"] = self.api.creator_post_comments( + service, creator_id, post["id"]) + except exception.HttpError: + post["comments"] = () if dms is not None: if dms is True: - dms = self._extract_cards(post, "dms") + dms = self.api.creator_dms( + post["service"], post["user"]) + try: + dms = dms["props"]["dms"] + except Exception: + dms = () post["dms"] = dms if announcements is not None: if announcements is True: - announcements = self._extract_cards(post, "announcements") + announcements = self.api.creator_announcements( + post["service"], post["user"]) post["announcements"] = announcements files = [] @@ -145,20 +158,23 @@ class KemonopartyExtractor(Extractor): self.cookies_update(self._login_impl( (username, self.cookies_domain), password)) - @cache(maxage=28*86400, keyarg=1) + @cache(maxage=3650*86400, keyarg=1) def _login_impl(self, username, password): username = username[0] self.log.info("Logging in as %s", username) - url = self.root + "/account/login" + url = self.root + "/api/v1/authentication/login" data = {"username": username, "password": password} - response = self.request(url, method="POST", data=data) - if response.url.endswith("/account/login") and \ - "Username or password is incorrect" in response.text: - raise exception.AuthenticationError() + response = self.request(url, method="POST", json=data, fatal=False) + if response.status_code >= 400: + try: + msg = '"' + response.json()["error"] + '"' + except Exception: + msg = '"0/1 Username or password is incorrect"' + raise exception.AuthenticationError(msg) - return {c.name: c.value for c in response.history[0].cookies} + return {c.name: c.value for c in response.cookies} def _file(self, post): file = post["file"] @@ -188,56 +204,21 @@ class KemonopartyExtractor(Extractor): filetypes = filetypes.split(",") return [genmap[ft] for ft in filetypes] - def _extract_comments(self, post): - url = "{}/{}/user/{}/post/{}".format( - self.root, post["service"], post["user"], post["id"]) - page = self.request(url).text - - comments = [] - for comment in text.extract_iter(page, "<article", "</article>"): - extr = text.extract_from(comment) - cid = extr('id="', '"') - comments.append({ - "id" : cid, - "user": extr('href="#' + cid + '"', '</').strip(" \n\r>"), - "body": extr( - '<section class="comment__body">', '</section>').strip(), - "date": extr('datetime="', '"'), - }) - return comments - - def _extract_cards(self, post, type): - url = "{}/{}/user/{}/{}".format( - self.root, post["service"], post["user"], type) - page = self.request(url).text - - cards = [] - for card in text.extract_iter(page, "<article", "</article>"): - footer = text.extr(card, "<footer", "</footer>") - cards.append({ - "body": text.unescape(text.extr( - card, "<pre>", "</pre></", - ).strip()), - "date": text.extr(footer, ': ', '\n'), - }) - return cards - def _parse_datetime(self, date_string): if len(date_string) > 19: date_string = date_string[:19] return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S") - @memcache(keyarg=1) - def _discord_channels(self, server): - url = "{}/api/v1/discord/channel/lookup/{}".format( - self.root, server) - return self.request(url).json() + def _revisions(self, posts): + return itertools.chain.from_iterable( + self._revisions_post(post) for post in posts) - def _revisions_post(self, post, url): + def _revisions_post(self, post): post["revision_id"] = 0 try: - revs = self.request(url + "/revisions").json() + revs = self.api.creator_post_revisions( + post["service"], post["user"], post["id"]) except exception.HttpError: post["revision_hash"] = self._revision_hash(post) post["revision_index"] = 1 @@ -268,8 +249,8 @@ class KemonopartyExtractor(Extractor): return revs - def _revisions_all(self, url): - revs = self.request(url + "/revisions").json() + def _revisions_all(self, service, creator_id, post_id): + revs = self.api.creator_post_revisions(service, creator_id, post_id) cnt = idx = len(revs) for rev in revs: @@ -305,50 +286,30 @@ def _validate(response): class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.su user listing""" subcategory = "user" - pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])" + pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)" example = "https://kemono.su/SERVICE/user/12345" def __init__(self, match): - _, _, service, user_id, self.query = match.groups() - self.subcategory = service + self.subcategory = match.group(3) KemonopartyExtractor.__init__(self, match) - self.api_url = "{}/api/v1/{}/user/{}".format( - self.root, service, user_id) - self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): - url = self.api_url - params = text.parse_query(self.query) - params["o"] = text.parse_int(params.get("o")) - - while True: - posts = self.request(url, params=params).json() - - if self.revisions: - for post in posts: - post_url = "{}/api/v1/{}/user/{}/post/{}".format( - self.root, post["service"], post["user"], post["id"]) - yield from self._revisions_post(post, post_url) - else: - yield from posts - - if len(posts) < 50: - break - params["o"] += 50 + _, _, service, creator_id, query = self.groups + params = text.parse_query(query) + return self.api.creator_posts( + service, creator_id, params.get("o"), params.get("q")) class KemonopartyPostsExtractor(KemonopartyExtractor): """Extractor for kemono.su post listings""" subcategory = "posts" - pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?" example = "https://kemono.su/posts" - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - self.query = match.group(3) - self.api_url = self.root + "/api/v1/posts" - - posts = KemonopartyUserExtractor.posts + def posts(self): + params = text.parse_query(self.groups[4]) + return self.api.posts( + params.get("o"), params.get("q"), params.get("tag")) class KemonopartyPostExtractor(KemonopartyExtractor): @@ -358,27 +319,23 @@ class KemonopartyPostExtractor(KemonopartyExtractor): example = "https://kemono.su/SERVICE/user/12345/post/12345" def __init__(self, match): - _, _, service, user_id, post_id, self.revision, self.revision_id = \ - match.groups() - self.subcategory = service + self.subcategory = match.group(3) KemonopartyExtractor.__init__(self, match) - self.api_url = "{}/api/v1/{}/user/{}/post/{}".format( - self.root, service, user_id, post_id) - self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): - if not self.revision: - post = self.request(self.api_url).json() - if self.revisions: - return self._revisions_post(post, self.api_url) - return (post,) + _, _, service, creator_id, post_id, revision, revision_id = self.groups + post = self.api.creator_post(service, creator_id, post_id) + if not revision: + return (post["post"],) - revs = self._revisions_all(self.api_url) - if not self.revision_id: + self.revisions = False + + revs = self._revisions_all(service, creator_id, post_id) + if not revision_id: return revs for rev in revs: - if str(rev["revision_id"]) == self.revision_id: + if str(rev["revision_id"]) == revision_id: return (rev,) raise exception.NotFoundError("revision") @@ -391,40 +348,37 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): "{channel_name|channel}") filename_fmt = "{id}_{num:>02}_{filename}.{extension}" archive_fmt = "discord_{server}_{id}_{num}" - pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)" - example = "https://kemono.su/discord/server/12345#CHANNEL" - - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - _, _, self.server, self.channel_id, self.channel = match.groups() - self.channel_name = "" + pattern = (BASE_PATTERN + r"/discord/server/(\d+)" + r"(?:/(?:channel/)?(\d+)(?:#(.+))?|#(.+))") + example = "https://kemono.su/discord/server/12345/12345" def items(self): self._prepare_ddosguard_cookies() + _, _, server_id, channel_id, channel_name, channel = self.groups - if self.channel_id: - self.channel_name = self.channel - else: - if self.channel.isdecimal() and len(self.channel) >= 16: + if channel_id is None: + if channel.isdecimal() and len(channel) >= 16: key = "id" else: key = "name" - for channel in self._discord_channels(self.server): - if channel[key] == self.channel: + for ch in self.api.discord_server(server_id): + if ch[key] == channel: break else: raise exception.NotFoundError("channel") - self.channel_id = channel["id"] - self.channel_name = channel["name"] + channel_id = ch["id"] + channel_name = ch["name"] + elif channel_name is None: + channel_name = "" find_inline = re.compile( r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall find_hash = re.compile(HASH_PATTERN).match - posts = self.posts() + posts = self.api.discord_channel(channel_id) max_posts = self.config("max-posts") if max_posts: posts = itertools.islice(posts, max_posts) @@ -441,7 +395,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): append({"path": "https://cdn.discordapp.com" + path, "name": path, "type": "inline", "hash": ""}) - post["channel_name"] = self.channel_name + post["channel_name"] = channel_name post["date"] = self._parse_datetime(post["published"]) post["count"] = len(files) yield Message.Directory, post @@ -461,33 +415,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): url = self.root + "/data" + url[20:] yield Message.Url, url, post - def posts(self): - url = "{}/api/v1/discord/channel/{}".format( - self.root, self.channel_id) - params = {"o": 0} - - while True: - posts = self.request(url, params=params).json() - yield from posts - - if len(posts) < 150: - break - params["o"] += 150 - class KemonopartyDiscordServerExtractor(KemonopartyExtractor): subcategory = "discord-server" pattern = BASE_PATTERN + r"/discord/server/(\d+)$" example = "https://kemono.su/discord/server/12345" - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - self.server = match.group(3) - def items(self): - for channel in self._discord_channels(self.server): - url = "{}/discord/server/{}/channel/{}#{}".format( - self.root, self.server, channel["id"], channel["name"]) + server_id = self.groups[2] + for channel in self.api.discord_server(server_id): + url = "{}/discord/server/{}/{}#{}".format( + self.root, server_id, channel["id"], channel["name"]) channel["_extractor"] = KemonopartyDiscordExtractor yield Message.Queue, url, channel @@ -495,26 +433,21 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor): class KemonopartyFavoriteExtractor(KemonopartyExtractor): """Extractor for kemono.su favorites""" subcategory = "favorite" - pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/favorites()()(?:/?\?([^#]+))?" example = "https://kemono.su/favorites" - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - self.params = text.parse_query(match.group(3)) - self.favorites = (self.params.get("type") or - self.config("favorites") or - "artist") - def items(self): self._prepare_ddosguard_cookies() self.login() - sort = self.params.get("sort") - order = self.params.get("order") or "desc" + params = text.parse_query(self.groups[4]) + type = params.get("type") or self.config("favorites") or "artist" - if self.favorites == "artist": - users = self.request( - self.root + "/api/v1/account/favorites?type=artist").json() + sort = params.get("sort") + order = params.get("order") or "desc" + + if type == "artist": + users = self.api.account_favorites("artist") if not sort: sort = "updated" @@ -527,9 +460,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): self.root, user["service"], user["id"]) yield Message.Queue, url, user - elif self.favorites == "post": - posts = self.request( - self.root + "/api/v1/account/favorites?type=post").json() + elif type == "post": + posts = self.api.account_favorites("post") if not sort: sort = "faved_seq" @@ -541,3 +473,95 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): url = "{}/{}/user/{}/post/{}".format( self.root, post["service"], post["user"], post["id"]) yield Message.Queue, url, post + + +class KemonoAPI(): + """Interface for the Kemono API v1.1.0 + + https://kemono.su/documentation/api + """ + + def __init__(self, extractor): + self.extractor = extractor + self.root = extractor.root + "/api/v1" + + def posts(self, offset=0, query=None, tags=None): + endpoint = "/posts" + params = {"q": query, "o": offset, "tags": tags} + return self._pagination(endpoint, params, 50, "posts") + + def creator_posts(self, service, creator_id, offset=0, query=None): + endpoint = "/{}/user/{}".format(service, creator_id) + params = {"q": query, "o": offset} + return self._pagination(endpoint, params, 50) + + def creator_announcements(self, service, creator_id): + endpoint = "/{}/user/{}/announcements".format(service, creator_id) + return self._call(endpoint) + + def creator_dms(self, service, creator_id): + endpoint = "/{}/user/{}/dms".format(service, creator_id) + return self._call(endpoint) + + def creator_fancards(self, service, creator_id): + endpoint = "/{}/user/{}/fancards".format(service, creator_id) + return self._call(endpoint) + + def creator_post(self, service, creator_id, post_id): + endpoint = "/{}/user/{}/post/{}".format(service, creator_id, post_id) + return self._call(endpoint) + + def creator_post_comments(self, service, creator_id, post_id): + endpoint = "/{}/user/{}/post/{}/comments".format( + service, creator_id, post_id) + return self._call(endpoint) + + def creator_post_revisions(self, service, creator_id, post_id): + endpoint = "/{}/user/{}/post/{}/revisions".format( + service, creator_id, post_id) + return self._call(endpoint) + + def creator_profile(self, service, creator_id): + endpoint = "/{}/user/{}/profile".format(service, creator_id) + return self._call(endpoint) + + def creator_links(self, service, creator_id): + endpoint = "/{}/user/{}/links".format(service, creator_id) + return self._call(endpoint) + + def creator_tags(self, service, creator_id): + endpoint = "/{}/user/{}/tags".format(service, creator_id) + return self._call(endpoint) + + def discord_channel(self, channel_id): + endpoint = "/discord/channel/{}".format(channel_id) + return self._pagination(endpoint, {}, 150) + + def discord_server(self, server_id): + endpoint = "/discord/channel/lookup/{}".format(server_id) + return self._call(endpoint) + + def account_favorites(self, type): + endpoint = "/account/favorites" + params = {"type": type} + return self._call(endpoint, params) + + def _call(self, endpoint, params=None): + url = self.root + endpoint + response = self.extractor.request(url, params=params) + return response.json() + + def _pagination(self, endpoint, params, batch=50, key=False): + params["o"] = text.parse_int(params.get("o")) % 50 + + while True: + data = self._call(endpoint, params) + + if key: + yield from data[key] + else: + yield from data + + if len(data) < batch: + return + params["o"] += batch |
