summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/kemonoparty.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
-rw-r--r--gallery_dl/extractor/kemonoparty.py23
1 files changed, 16 insertions, 7 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 63e3084..33e8370 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -16,6 +16,7 @@ import re
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.party"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
+HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
class KemonopartyExtractor(Extractor):
@@ -41,7 +42,7 @@ class KemonopartyExtractor(Extractor):
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.party)?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
- find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
+ find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
duplicates = self.config("duplicates")
comments = self.config("comments")
@@ -89,10 +90,11 @@ class KemonopartyExtractor(Extractor):
match = find_hash(url)
if match:
file["hash"] = hash = match.group(1)
- if hash in hashes and not duplicates:
- self.log.debug("Skipping %s (duplicate)", url)
- continue
- hashes.add(hash)
+ if not duplicates:
+ if hash in hashes:
+ self.log.debug("Skipping %s (duplicate)", url)
+ continue
+ hashes.add(hash)
else:
file["hash"] = ""
@@ -362,14 +364,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
"pattern": r"https://kemono\.party/data/("
r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
+ "keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08"
+ "|51453640a5e0a4d23fbf57fb85390f9c5ec154"},
"count": ">= 2",
}),
# 'inline' files
(("https://kemono.party/discord"
"/server/315262215055736843/channel/315262215055736843#general"), {
"pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
- "range": "1-5",
"options": (("image-filter", "type == 'inline'"),),
+ "keyword": {"hash": ""},
+ "range": "1-5",
}),
)
@@ -383,6 +388,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
+ find_hash = re.compile(HASH_PATTERN).match
posts = self.posts()
max_posts = self.config("max-posts")
@@ -393,11 +399,13 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
files = []
append = files.append
for attachment in post["attachments"]:
+ match = find_hash(attachment["path"])
+ attachment["hash"] = match.group(1) if match else ""
attachment["type"] = "attachment"
append(attachment)
for path in find_inline(post["content"] or ""):
append({"path": "https://cdn.discordapp.com" + path,
- "name": path, "type": "inline"})
+ "name": path, "type": "inline", "hash": ""})
post["channel_name"] = self.channel_name
post["date"] = text.parse_datetime(
@@ -406,6 +414,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
+ post["hash"] = file["hash"]
post["type"] = file["type"]
url = file["path"]