summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/kemonoparty.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-12-01 14:44:00 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2021-12-01 14:44:00 -0500
commita5aecc343fd2886e7ae09bb3e2afeec38f175755 (patch)
tree06a284b3d73700bd38116423e2480afa516255c2 /gallery_dl/extractor/kemonoparty.py
parentfc8c5e642017e2b4e5299e2093e72b316479690d (diff)
New upstream version 1.19.3.upstream/1.19.3
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
-rw-r--r--gallery_dl/extractor/kemonoparty.py125
1 files changed, 96 insertions, 29 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 2e1d0b2..6483278 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,7 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?kemono\.party"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?kemono\.party"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
@@ -30,19 +30,20 @@ class KemonopartyExtractor(Extractor):
def items(self):
self._prepare_ddosguard_cookies()
- find_inline = re.compile(
+ self._find_inline = re.compile(
r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
- skip_service = \
- "patreon" if self.config("patreon-skip-file", True) else None
+ find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
+ generators = self._build_file_generators(self.config("files"))
comments = self.config("comments")
+ username = dms = None
if self.config("metadata"):
username = text.unescape(text.extract(
self.request(self.user_url).text,
'<meta name="artist_name" content="', '"')[0])
- else:
- username = None
+ if self.config("dms"):
+ dms = True
posts = self.posts()
max_posts = self.config("max-posts")
@@ -51,31 +52,38 @@ class KemonopartyExtractor(Extractor):
for post in posts:
- files = []
- append = files.append
- file = post["file"]
-
- if file:
- file["type"] = "file"
- if post["service"] != skip_service or not post["attachments"]:
- append(file)
- for attachment in post["attachments"]:
- attachment["type"] = "attachment"
- append(attachment)
- for path in find_inline(post["content"] or ""):
- append({"path": path, "name": path, "type": "inline"})
-
post["date"] = text.parse_datetime(
- post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ post["published"] or post["added"],
+ "%a, %d %b %Y %H:%M:%S %Z")
if username:
post["username"] = username
if comments:
post["comments"] = self._extract_comments(post)
+ if dms is not None:
+ if dms is True:
+ dms = self._extract_dms(post)
+ post["dms"] = dms
yield Message.Directory, post
- for post["num"], file in enumerate(files, 1):
- post["type"] = file["type"]
+ hashes = set()
+ post["num"] = 0
+ for file in itertools.chain.from_iterable(
+ g(post) for g in generators):
url = file["path"]
+
+ match = find_hash(url)
+ if match:
+ post["hash"] = hash = match.group(1)
+ if hash in hashes:
+ self.log.debug("Skipping %s (duplicate)", url)
+ continue
+ hashes.add(hash)
+ else:
+ post["hash"] = ""
+
+ post["type"] = file["type"]
+ post["num"] += 1
+
if url[0] == "/":
url = self.root + "/data" + url
elif url.startswith("https://kemono.party"):
@@ -103,6 +111,34 @@ class KemonopartyExtractor(Extractor):
return {c.name: c.value for c in response.history[0].cookies}
+ def _file(self, post):
+ file = post["file"]
+ if not file:
+ return ()
+ file["type"] = "file"
+ return (file,)
+
+ def _attachments(self, post):
+ for attachment in post["attachments"]:
+ attachment["type"] = "attachment"
+ return post["attachments"]
+
+ def _inline(self, post):
+ for path in self._find_inline(post["content"] or ""):
+ yield {"path": path, "name": path, "type": "inline"}
+
+ def _build_file_generators(self, filetypes):
+ if filetypes is None:
+ return (self._file, self._attachments, self._inline)
+ genmap = {
+ "file" : self._file,
+ "attachments": self._attachments,
+ "inline" : self._inline,
+ }
+ if isinstance(filetypes, str):
+ filetypes = filetypes.split(",")
+ return [genmap[ft] for ft in filetypes]
+
def _extract_comments(self, post):
url = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
@@ -121,6 +157,21 @@ class KemonopartyExtractor(Extractor):
})
return comments
+ def _extract_dms(self, post):
+ url = "{}/{}/user/{}/dms".format(
+ self.root, post["service"], post["user"])
+ page = self.request(url).text
+
+ dms = []
+ for dm in text.extract_iter(page, "<article", "</article>"):
+ dms.append({
+ "body": text.unescape(text.extract(
+ dm, '<div class="dm-card__content">', '</div>',
+ )[0].strip()),
+ "date": text.extract(dm, 'datetime="', '"')[0],
+ })
+ return dms
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
@@ -175,6 +226,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"embed": dict,
"extension": "jpeg",
"filename": "P058kDFYus7DbqAkGlfWTlOr",
+ "hash": "210f35388e28bbcf756db18dd516e2d8"
+ "2ce758e0d32881eeee76d43e1716d382",
"id": "506575",
"num": 1,
"published": "Sun, 11 Aug 2019 02:09:04 GMT",
@@ -188,25 +241,39 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/data/inline/fanbox"
- r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
+ "pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
+ r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
+ "keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a"
+ "76336997ae8596f332e97d956a460ad2"},
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://kemono\.party/data/(file|attachment)s"
- r"/gumroad/trylsc/IURjT/",
+ "pattern": r"https://kemono\.party/data/("
+ r"files/gumroad/trylsc/IURjT/reward8\.jpg|"
+ r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
}),
# username (#1548, #1652)
("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
"options": (("metadata", True),),
"keyword": {"username": "Kudalyn's Creations"},
}),
- # skip patreon main file (#1667, #1689)
+ # skip patreon duplicates
("https://kemono.party/patreon/user/4158582/post/32099982", {
"count": 2,
- "keyword": {"type": "attachment"},
+ }),
+ # DMs (#2008)
+ ("https://kemono.party/patreon/user/34134344/post/38129255", {
+ "options": (("dms", True),),
+ "keyword": {"dms": [{
+ "body": r"re:Hi! Thank you very much for supporting the work I"
+ r" did in May. Here's your reward pack! I hope you fin"
+ r"d something you enjoy in it. :\)\n\nhttps://www.medi"
+ r"afire.com/file/\w+/Set13_tier_2.zip/file",
+ "date": "2021-07-31 02:47:51.327865",
+ }]},
}),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
+ ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
)
def __init__(self, match):