summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/kemonoparty.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-10-22 01:00:14 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-10-22 01:00:14 -0400
commite052f3b9e1d9703a5a466daeaf37bacf476c2daf (patch)
treefc608c7d452695706fb13e2b0b34671f569f3ab0 /gallery_dl/extractor/kemonoparty.py
parentb8758ecd073910ce3220b2e68399147b425c37b8 (diff)
New upstream version 1.26.1.upstream/1.26.1
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
-rw-r--r--gallery_dl/extractor/kemonoparty.py134
1 files changed, 91 insertions, 43 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 894c671..1596cfb 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
+from ..cache import cache, memcache
import itertools
import re
@@ -70,8 +70,7 @@ class KemonopartyExtractor(Extractor):
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = text.parse_datetime(
- post["published"] or post["added"],
- "%a, %d %b %Y %H:%M:%S %Z")
+ post["published"] or post["added"], "%Y-%m-%dT%H:%M:%S")
if username:
post["username"] = username
if comments:
@@ -197,14 +196,25 @@ class KemonopartyExtractor(Extractor):
dms = []
for dm in text.extract_iter(page, "<article", "</article>"):
+ footer = text.extr(dm, "<footer", "</footer>")
dms.append({
- "body": text.unescape(text.extract(
+ "body": text.unescape(text.extr(
dm, "<pre>", "</pre></",
- )[0].strip()),
- "date": text.extr(dm, 'datetime="', '"'),
+ ).strip()),
+ "date": text.extr(footer, 'Published: ', '\n'),
})
return dms
+ @memcache(keyarg=1)
+ def _discord_channels(self, server):
+ url = "{}/api/v1/discord/channel/lookup/{}".format(
+ self.root, server)
+ return self.request(url).json()
+
+ @memcache(keyarg=1)
+ def _post_revisions(self, url):
+ return self.request(url + "/revisions").json()
+
def _validate(response):
return (response.headers["content-length"] != "9" or
@@ -214,48 +224,82 @@ def _validate(response):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])"
example = "https://kemono.party/SERVICE/user/12345"
def __init__(self, match):
- _, _, service, user_id, offset = match.groups()
+ _, _, service, user_id, self.query = match.groups()
self.subcategory = service
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
+ self.api_url = "{}/api/v1/{}/user/{}".format(
+ self.root, service, user_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
- self.offset = text.parse_int(offset)
def posts(self):
url = self.api_url
- params = {"o": self.offset}
+ params = text.parse_query(self.query)
+ params["o"] = text.parse_int(params.get("o"))
+ revisions = self.config("revisions")
while True:
posts = self.request(url, params=params).json()
- yield from posts
- cnt = len(posts)
- if cnt < 25:
- return
- params["o"] += cnt
+ if revisions:
+ for post in posts:
+ post["revision_id"] = 0
+ yield post
+ post_url = "{}/post/{}".format(self.api_url, post["id"])
+ try:
+ revs = self._post_revisions(post_url)
+ except exception.HttpError:
+ pass
+ else:
+ yield from revs
+ else:
+ yield from posts
+
+ if len(posts) < 50:
+ break
+ params["o"] += 50
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
- pattern = USER_PATTERN + r"/post/([^/?#]+)"
+ pattern = USER_PATTERN + r"/post/([^/?#]+)(/revisions?(?:/(\d*))?)?"
example = "https://kemono.party/SERVICE/user/12345/post/12345"
def __init__(self, match):
- _, _, service, user_id, post_id = match.groups()
+ _, _, service, user_id, post_id, self.revision, self.revision_id = \
+ match.groups()
self.subcategory = service
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/{}/user/{}/post/{}".format(
+ self.api_url = "{}/api/v1/{}/user/{}/post/{}".format(
self.root, service, user_id, post_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
- posts = self.request(self.api_url).json()
- return (posts[0],) if len(posts) > 1 else posts
+ if not self.revision:
+ post = self.request(self.api_url).json()
+ if self.config("revisions"):
+ post["revision_id"] = 0
+ try:
+ revs = self._post_revisions(self.api_url)
+ except exception.HttpError:
+ pass
+ else:
+ return itertools.chain((post,), revs)
+ return (post,)
+
+ revs = self._post_revisions(self.api_url)
+ if not self.revision_id:
+ return revs
+
+ for rev in revs:
+ if str(rev["revision_id"]) == self.revision_id:
+ return (rev,)
+
+ raise exception.NotFoundError("revision")
class KemonopartyDiscordExtractor(KemonopartyExtractor):
@@ -270,11 +314,29 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- _, _, self.server, self.channel, self.channel_name = match.groups()
+ _, _, self.server, self.channel_id, self.channel = match.groups()
+ self.channel_name = ""
def items(self):
self._prepare_ddosguard_cookies()
+ if self.channel_id:
+ self.channel_name = self.channel
+ else:
+ if self.channel.isdecimal() and len(self.channel) >= 16:
+ key = "id"
+ else:
+ key = "name"
+
+ for channel in self._discord_channels(self.server):
+ if channel[key] == self.channel:
+ break
+ else:
+ raise exception.NotFoundError("channel")
+
+ self.channel_id = channel["id"]
+ self.channel_name = channel["name"]
+
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
@@ -299,7 +361,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
post["channel_name"] = self.channel_name
post["date"] = text.parse_datetime(
- post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ post["published"], "%Y-%m-%dT%H:%M:%S.%f")
post["count"] = len(files)
yield Message.Directory, post
@@ -319,27 +381,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
yield Message.Url, url, post
def posts(self):
- if self.channel is None:
- url = "{}/api/discord/channels/lookup?q={}".format(
- self.root, self.server)
- for channel in self.request(url).json():
- if channel["name"] == self.channel_name:
- self.channel = channel["id"]
- break
- else:
- raise exception.NotFoundError("channel")
-
- url = "{}/api/discord/channel/{}".format(self.root, self.channel)
- params = {"skip": 0}
+ url = "{}/api/v1/discord/channel/{}".format(
+ self.root, self.channel_id)
+ params = {"o": 0}
while True:
posts = self.request(url, params=params).json()
yield from posts
- cnt = len(posts)
- if cnt < 25:
+ if len(posts) < 150:
break
- params["skip"] += cnt
+ params["o"] += 150
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
@@ -352,11 +404,7 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
self.server = match.group(3)
def items(self):
- url = "{}/api/discord/channels/lookup?q={}".format(
- self.root, self.server)
- channels = self.request(url).json()
-
- for channel in channels:
+ for channel in self._discord_channels(self.server):
url = "{}/discord/server/{}/channel/{}#{}".format(
self.root, self.server, channel["id"], channel["name"])
channel["_extractor"] = KemonopartyDiscordExtractor