summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/kemonoparty.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-11-01 05:03:49 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-11-01 05:03:49 -0400
commit4a965d875415907cc1a016b428ae305a964f9228 (patch)
tree7cece9948a7ba390348e00c669f9cb1f7a9ba39a /gallery_dl/extractor/kemonoparty.py
parent34ba2951b8c523713425c98addb9256ea05c946f (diff)
New upstream version 1.19.1.upstream/1.19.1
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
-rw-r--r--gallery_dl/extractor/kemonoparty.py135
1 files changed, 128 insertions, 7 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index c5f5ae7..d5aad67 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,8 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
+BASE_PATTERN = r"(?:https?://)?kemono\.party"
+USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
class KemonopartyExtractor(Extractor):
@@ -29,7 +30,9 @@ class KemonopartyExtractor(Extractor):
def items(self):
self._prepare_ddosguard_cookies()
- find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+ find_inline = re.compile(
+ r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
+ r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
skip_service = \
"patreon" if self.config("patreon-skip-file", True) else None
@@ -101,7 +104,7 @@ class KemonopartyExtractor(Extractor):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
test = (
("https://kemono.party/fanbox/user/6993449", {
"range": "1-25",
@@ -138,11 +141,11 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/post/([^/?#]+)"
+ pattern = USER_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/data/files/fanbox"
- r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
+ "pattern": r"https://kemono.party/data/21/0f"
+ r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
"content": str,
@@ -197,10 +200,128 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
return (posts[0],) if len(posts) > 1 else posts
+class KemonopartyDiscordExtractor(KemonopartyExtractor):
+ """Extractor for kemono.party discord servers"""
+ subcategory = "discord"
+ directory_fmt = ("{category}", "discord", "{server}",
+ "{channel_name|channel}")
+ filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
+ archive_fmt = "discord_{server}_{id}_{num}"
+ pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
+ test = (
+ (("https://kemono.party/discord"
+ "/server/488668827274444803#finish-work"), {
+ "count": 4,
+ "keyword": {"channel_name": "finish-work"},
+ }),
+ (("https://kemono.party/discord"
+ "/server/256559665620451329/channel/462437519519383555#"), {
+ "pattern": r"https://kemono\.party/data/attachments/discord"
+ r"/256559665620451329/\d+/\d+/.+",
+ "count": ">= 2",
+ }),
+ # 'inline' files
+ (("https://kemono.party/discord"
+ "/server/315262215055736843/channel/315262215055736843#general"), {
+ "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
+ "range": "1-5",
+ "options": (("image-filter", "type == 'inline'"),),
+ }),
+ )
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.server, self.channel, self.channel_name = match.groups()
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+
+ find_inline = re.compile(
+ r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
+ r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
+
+ posts = self.posts()
+ max_posts = self.config("max-posts")
+ if max_posts:
+ posts = itertools.islice(posts, max_posts)
+
+ for post in posts:
+ files = []
+ append = files.append
+ for attachment in post["attachments"]:
+ attachment["type"] = "attachment"
+ append(attachment)
+ for path in find_inline(post["content"] or ""):
+ append({"path": "https://cdn.discordapp.com" + path,
+ "name": path, "type": "inline"})
+
+ post["channel_name"] = self.channel_name
+ post["date"] = text.parse_datetime(
+ post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ yield Message.Directory, post
+
+ for post["num"], file in enumerate(files, 1):
+ post["type"] = file["type"]
+ url = file["path"]
+ if url[0] == "/":
+ url = self.root + "/data" + url
+ elif url.startswith("https://kemono.party"):
+ url = self.root + "/data" + url[20:]
+
+ text.nameext_from_url(file["name"], post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ if self.channel is None:
+ url = "{}/api/discord/channels/lookup?q={}".format(
+ self.root, self.server)
+ for channel in self.request(url).json():
+ if channel["name"] == self.channel_name:
+ self.channel = channel["id"]
+ break
+ else:
+ raise exception.NotFoundError("channel")
+
+ url = "{}/api/discord/channel/{}".format(self.root, self.channel)
+ params = {"skip": 0}
+
+ while True:
+ posts = self.request(url, params=params).json()
+ yield from posts
+
+ if len(posts) < 25:
+ break
+ params["skip"] += 25
+
+
+class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
+ subcategory = "discord-server"
+ pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
+ test = ("https://kemono.party/discord/server/488668827274444803", {
+ "pattern": KemonopartyDiscordExtractor.pattern,
+ "count": 13,
+ })
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.server = match.group(1)
+
+ def items(self):
+ url = "{}/api/discord/channels/lookup?q={}".format(
+ self.root, self.server)
+ channels = self.request(url).json()
+
+ for channel in channels:
+ url = "{}/discord/server/{}/channel/{}#{}".format(
+ self.root, self.server, channel["id"], channel["name"])
+ channel["_extractor"] = KemonopartyDiscordExtractor
+ yield Message.Queue, url, channel
+
+
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
- pattern = r"(?:https?://)?kemono\.party/favorites"
+ pattern = BASE_PATTERN + r"/favorites"
test = ("https://kemono.party/favorites", {
"pattern": KemonopartyUserExtractor.pattern,
"url": "f4b5b796979bcba824af84206578c79101c7f0e1",