summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/kemonoparty.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
-rw-r--r--gallery_dl/extractor/kemonoparty.py352
1 files changed, 188 insertions, 164 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 6f2d5f3..3d04f75 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache, memcache
+from ..cache import cache
import itertools
import json
import re
@@ -38,6 +38,7 @@ class KemonopartyExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
+ self.api = KemonoAPI(self)
self.revisions = self.config("revisions")
if self.revisions:
self.revisions_unique = (self.revisions == "unique")
@@ -53,48 +54,60 @@ class KemonopartyExtractor(Extractor):
sort_keys=True, separators=(",", ":")).encode
def items(self):
+ service = self.groups[2]
+ creator_id = self.groups[3]
+
find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
- duplicates = self.config("duplicates")
- comments = self.config("comments")
- username = dms = announcements = None
+ announcements = True if self.config("announcements") else None
+ comments = True if self.config("comments") else False
+ duplicates = True if self.config("duplicates") else False
+ dms = True if self.config("dms") else None
+ profile = username = None
# prevent files from being sent with gzip compression
headers = {"Accept-Encoding": "identity"}
if self.config("metadata"):
- username = text.unescape(text.extract(
- self.request(self.user_url).text,
- '<meta name="artist_name" content="', '"')[0])
- if self.config("dms"):
- dms = True
- if self.config("announcements"):
- announcements = True
+ profile = self.api.creator_profile(service, creator_id)
+ username = profile["name"]
posts = self.posts()
max_posts = self.config("max-posts")
if max_posts:
posts = itertools.islice(posts, max_posts)
+ if self.revisions:
+ posts = self._revisions(posts)
for post in posts:
-
headers["Referer"] = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = self._parse_datetime(
post.get("published") or post.get("added") or "")
- if username:
+ if profile is not None:
post["username"] = username
+ post["user_profile"] = profile
if comments:
- post["comments"] = self._extract_comments(post)
+ try:
+ post["comments"] = self.api.creator_post_comments(
+ service, creator_id, post["id"])
+ except exception.HttpError:
+ post["comments"] = ()
if dms is not None:
if dms is True:
- dms = self._extract_cards(post, "dms")
+ dms = self.api.creator_dms(
+ post["service"], post["user"])
+ try:
+ dms = dms["props"]["dms"]
+ except Exception:
+ dms = ()
post["dms"] = dms
if announcements is not None:
if announcements is True:
- announcements = self._extract_cards(post, "announcements")
+ announcements = self.api.creator_announcements(
+ post["service"], post["user"])
post["announcements"] = announcements
files = []
@@ -145,20 +158,23 @@ class KemonopartyExtractor(Extractor):
self.cookies_update(self._login_impl(
(username, self.cookies_domain), password))
- @cache(maxage=28*86400, keyarg=1)
+ @cache(maxage=3650*86400, keyarg=1)
def _login_impl(self, username, password):
username = username[0]
self.log.info("Logging in as %s", username)
- url = self.root + "/account/login"
+ url = self.root + "/api/v1/authentication/login"
data = {"username": username, "password": password}
- response = self.request(url, method="POST", data=data)
- if response.url.endswith("/account/login") and \
- "Username or password is incorrect" in response.text:
- raise exception.AuthenticationError()
+ response = self.request(url, method="POST", json=data, fatal=False)
+ if response.status_code >= 400:
+ try:
+ msg = '"' + response.json()["error"] + '"'
+ except Exception:
+ msg = '"0/1 Username or password is incorrect"'
+ raise exception.AuthenticationError(msg)
- return {c.name: c.value for c in response.history[0].cookies}
+ return {c.name: c.value for c in response.cookies}
def _file(self, post):
file = post["file"]
@@ -188,56 +204,21 @@ class KemonopartyExtractor(Extractor):
filetypes = filetypes.split(",")
return [genmap[ft] for ft in filetypes]
- def _extract_comments(self, post):
- url = "{}/{}/user/{}/post/{}".format(
- self.root, post["service"], post["user"], post["id"])
- page = self.request(url).text
-
- comments = []
- for comment in text.extract_iter(page, "<article", "</article>"):
- extr = text.extract_from(comment)
- cid = extr('id="', '"')
- comments.append({
- "id" : cid,
- "user": extr('href="#' + cid + '"', '</').strip(" \n\r>"),
- "body": extr(
- '<section class="comment__body">', '</section>').strip(),
- "date": extr('datetime="', '"'),
- })
- return comments
-
- def _extract_cards(self, post, type):
- url = "{}/{}/user/{}/{}".format(
- self.root, post["service"], post["user"], type)
- page = self.request(url).text
-
- cards = []
- for card in text.extract_iter(page, "<article", "</article>"):
- footer = text.extr(card, "<footer", "</footer>")
- cards.append({
- "body": text.unescape(text.extr(
- card, "<pre>", "</pre></",
- ).strip()),
- "date": text.extr(footer, ': ', '\n'),
- })
- return cards
-
def _parse_datetime(self, date_string):
if len(date_string) > 19:
date_string = date_string[:19]
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
- @memcache(keyarg=1)
- def _discord_channels(self, server):
- url = "{}/api/v1/discord/channel/lookup/{}".format(
- self.root, server)
- return self.request(url).json()
+ def _revisions(self, posts):
+ return itertools.chain.from_iterable(
+ self._revisions_post(post) for post in posts)
- def _revisions_post(self, post, url):
+ def _revisions_post(self, post):
post["revision_id"] = 0
try:
- revs = self.request(url + "/revisions").json()
+ revs = self.api.creator_post_revisions(
+ post["service"], post["user"], post["id"])
except exception.HttpError:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
@@ -268,8 +249,8 @@ class KemonopartyExtractor(Extractor):
return revs
- def _revisions_all(self, url):
- revs = self.request(url + "/revisions").json()
+ def _revisions_all(self, service, creator_id, post_id):
+ revs = self.api.creator_post_revisions(service, creator_id, post_id)
cnt = idx = len(revs)
for rev in revs:
@@ -305,50 +286,30 @@ def _validate(response):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.su user listing"""
subcategory = "user"
- pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)"
example = "https://kemono.su/SERVICE/user/12345"
def __init__(self, match):
- _, _, service, user_id, self.query = match.groups()
- self.subcategory = service
+ self.subcategory = match.group(3)
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/v1/{}/user/{}".format(
- self.root, service, user_id)
- self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
- url = self.api_url
- params = text.parse_query(self.query)
- params["o"] = text.parse_int(params.get("o"))
-
- while True:
- posts = self.request(url, params=params).json()
-
- if self.revisions:
- for post in posts:
- post_url = "{}/api/v1/{}/user/{}/post/{}".format(
- self.root, post["service"], post["user"], post["id"])
- yield from self._revisions_post(post, post_url)
- else:
- yield from posts
-
- if len(posts) < 50:
- break
- params["o"] += 50
+ _, _, service, creator_id, query = self.groups
+ params = text.parse_query(query)
+ return self.api.creator_posts(
+ service, creator_id, params.get("o"), params.get("q"))
class KemonopartyPostsExtractor(KemonopartyExtractor):
"""Extractor for kemono.su post listings"""
subcategory = "posts"
- pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?"
+ pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?"
example = "https://kemono.su/posts"
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- self.query = match.group(3)
- self.api_url = self.root + "/api/v1/posts"
-
- posts = KemonopartyUserExtractor.posts
+ def posts(self):
+ params = text.parse_query(self.groups[4])
+ return self.api.posts(
+ params.get("o"), params.get("q"), params.get("tag"))
class KemonopartyPostExtractor(KemonopartyExtractor):
@@ -358,27 +319,23 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
example = "https://kemono.su/SERVICE/user/12345/post/12345"
def __init__(self, match):
- _, _, service, user_id, post_id, self.revision, self.revision_id = \
- match.groups()
- self.subcategory = service
+ self.subcategory = match.group(3)
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/v1/{}/user/{}/post/{}".format(
- self.root, service, user_id, post_id)
- self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
- if not self.revision:
- post = self.request(self.api_url).json()
- if self.revisions:
- return self._revisions_post(post, self.api_url)
- return (post,)
+ _, _, service, creator_id, post_id, revision, revision_id = self.groups
+ post = self.api.creator_post(service, creator_id, post_id)
+ if not revision:
+ return (post["post"],)
- revs = self._revisions_all(self.api_url)
- if not self.revision_id:
+ self.revisions = False
+
+ revs = self._revisions_all(service, creator_id, post_id)
+ if not revision_id:
return revs
for rev in revs:
- if str(rev["revision_id"]) == self.revision_id:
+ if str(rev["revision_id"]) == revision_id:
return (rev,)
raise exception.NotFoundError("revision")
@@ -391,40 +348,37 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
"{channel_name|channel}")
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server}_{id}_{num}"
- pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
- example = "https://kemono.su/discord/server/12345#CHANNEL"
-
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- _, _, self.server, self.channel_id, self.channel = match.groups()
- self.channel_name = ""
+ pattern = (BASE_PATTERN + r"/discord/server/(\d+)"
+ r"(?:/(?:channel/)?(\d+)(?:#(.+))?|#(.+))")
+ example = "https://kemono.su/discord/server/12345/12345"
def items(self):
self._prepare_ddosguard_cookies()
+ _, _, server_id, channel_id, channel_name, channel = self.groups
- if self.channel_id:
- self.channel_name = self.channel
- else:
- if self.channel.isdecimal() and len(self.channel) >= 16:
+ if channel_id is None:
+ if channel.isdecimal() and len(channel) >= 16:
key = "id"
else:
key = "name"
- for channel in self._discord_channels(self.server):
- if channel[key] == self.channel:
+ for ch in self.api.discord_server(server_id):
+ if ch[key] == channel:
break
else:
raise exception.NotFoundError("channel")
- self.channel_id = channel["id"]
- self.channel_name = channel["name"]
+ channel_id = ch["id"]
+ channel_name = ch["name"]
+ elif channel_name is None:
+ channel_name = ""
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
find_hash = re.compile(HASH_PATTERN).match
- posts = self.posts()
+ posts = self.api.discord_channel(channel_id)
max_posts = self.config("max-posts")
if max_posts:
posts = itertools.islice(posts, max_posts)
@@ -441,7 +395,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
append({"path": "https://cdn.discordapp.com" + path,
"name": path, "type": "inline", "hash": ""})
- post["channel_name"] = self.channel_name
+ post["channel_name"] = channel_name
post["date"] = self._parse_datetime(post["published"])
post["count"] = len(files)
yield Message.Directory, post
@@ -461,33 +415,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
url = self.root + "/data" + url[20:]
yield Message.Url, url, post
- def posts(self):
- url = "{}/api/v1/discord/channel/{}".format(
- self.root, self.channel_id)
- params = {"o": 0}
-
- while True:
- posts = self.request(url, params=params).json()
- yield from posts
-
- if len(posts) < 150:
- break
- params["o"] += 150
-
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
example = "https://kemono.su/discord/server/12345"
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- self.server = match.group(3)
-
def items(self):
- for channel in self._discord_channels(self.server):
- url = "{}/discord/server/{}/channel/{}#{}".format(
- self.root, self.server, channel["id"], channel["name"])
+ server_id = self.groups[2]
+ for channel in self.api.discord_server(server_id):
+ url = "{}/discord/server/{}/{}#{}".format(
+ self.root, server_id, channel["id"], channel["name"])
channel["_extractor"] = KemonopartyDiscordExtractor
yield Message.Queue, url, channel
@@ -495,26 +433,21 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.su favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
+ pattern = BASE_PATTERN + r"/favorites()()(?:/?\?([^#]+))?"
example = "https://kemono.su/favorites"
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- self.params = text.parse_query(match.group(3))
- self.favorites = (self.params.get("type") or
- self.config("favorites") or
- "artist")
-
def items(self):
self._prepare_ddosguard_cookies()
self.login()
- sort = self.params.get("sort")
- order = self.params.get("order") or "desc"
+ params = text.parse_query(self.groups[4])
+ type = params.get("type") or self.config("favorites") or "artist"
- if self.favorites == "artist":
- users = self.request(
- self.root + "/api/v1/account/favorites?type=artist").json()
+ sort = params.get("sort")
+ order = params.get("order") or "desc"
+
+ if type == "artist":
+ users = self.api.account_favorites("artist")
if not sort:
sort = "updated"
@@ -527,9 +460,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
self.root, user["service"], user["id"])
yield Message.Queue, url, user
- elif self.favorites == "post":
- posts = self.request(
- self.root + "/api/v1/account/favorites?type=post").json()
+ elif type == "post":
+ posts = self.api.account_favorites("post")
if not sort:
sort = "faved_seq"
@@ -541,3 +473,95 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
url = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
yield Message.Queue, url, post
+
+
+class KemonoAPI():
+ """Interface for the Kemono API v1.1.0
+
+ https://kemono.su/documentation/api
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = extractor.root + "/api/v1"
+
+ def posts(self, offset=0, query=None, tags=None):
+ endpoint = "/posts"
+ params = {"q": query, "o": offset, "tags": tags}
+ return self._pagination(endpoint, params, 50, "posts")
+
+ def creator_posts(self, service, creator_id, offset=0, query=None):
+ endpoint = "/{}/user/{}".format(service, creator_id)
+ params = {"q": query, "o": offset}
+ return self._pagination(endpoint, params, 50)
+
+ def creator_announcements(self, service, creator_id):
+ endpoint = "/{}/user/{}/announcements".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_dms(self, service, creator_id):
+ endpoint = "/{}/user/{}/dms".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_fancards(self, service, creator_id):
+ endpoint = "/{}/user/{}/fancards".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_post(self, service, creator_id, post_id):
+ endpoint = "/{}/user/{}/post/{}".format(service, creator_id, post_id)
+ return self._call(endpoint)
+
+ def creator_post_comments(self, service, creator_id, post_id):
+ endpoint = "/{}/user/{}/post/{}/comments".format(
+ service, creator_id, post_id)
+ return self._call(endpoint)
+
+ def creator_post_revisions(self, service, creator_id, post_id):
+ endpoint = "/{}/user/{}/post/{}/revisions".format(
+ service, creator_id, post_id)
+ return self._call(endpoint)
+
+ def creator_profile(self, service, creator_id):
+ endpoint = "/{}/user/{}/profile".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_links(self, service, creator_id):
+ endpoint = "/{}/user/{}/links".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_tags(self, service, creator_id):
+ endpoint = "/{}/user/{}/tags".format(service, creator_id)
+ return self._call(endpoint)
+
+ def discord_channel(self, channel_id):
+ endpoint = "/discord/channel/{}".format(channel_id)
+ return self._pagination(endpoint, {}, 150)
+
+ def discord_server(self, server_id):
+ endpoint = "/discord/channel/lookup/{}".format(server_id)
+ return self._call(endpoint)
+
+ def account_favorites(self, type):
+ endpoint = "/account/favorites"
+ params = {"type": type}
+ return self._call(endpoint, params)
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+ response = self.extractor.request(url, params=params)
+ return response.json()
+
+ def _pagination(self, endpoint, params, batch=50, key=False):
+ params["o"] = text.parse_int(params.get("o")) % 50
+
+ while True:
+ data = self._call(endpoint, params)
+
+ if key:
+ yield from data[key]
+ else:
+ yield from data
+
+ if len(data) < batch:
+ return
+ params["o"] += batch