diff options
Diffstat (limited to 'gallery_dl/extractor/kemonoparty.py')
| -rw-r--r-- | gallery_dl/extractor/kemonoparty.py | 83 |
1 files changed, 53 insertions, 30 deletions
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index a5b5e00..377e00b 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -10,17 +10,22 @@ from .common import Extractor, Message from .. import text +import re + +BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)" class KemonopartyExtractor(Extractor): """Base class for kemonoparty extractors""" category = "kemonoparty" root = "https://kemono.party" - directory_fmt = ("{category}", "{user}") + directory_fmt = ("{category}", "{service}", "{user}") filename_fmt = "{id}_{title}_{filename}.{extension}" - archive_fmt = "{user}_{id}_{filename}.{extension}" + archive_fmt = "{service}_{user}_{id}_{filename}.{extension}" def items(self): + find_inline = re.compile(r'src="(/inline/[^"]+)').findall + for post in self.posts(): files = [] @@ -28,23 +33,32 @@ class KemonopartyExtractor(Extractor): files.append(post["file"]) if post["attachments"]: files.extend(post["attachments"]) + for path in find_inline(post["content"] or ""): + files.append({"path": path, "name": path}) + post["date"] = text.parse_datetime( post["published"], "%a, %d %b %Y %H:%M:%S %Z") yield Message.Directory, post for post["num"], file in enumerate(files, 1): + url = file["path"] + if url[0] == "/": + url = self.root + url text.nameext_from_url(file["name"], post) - yield Message.Url, self.root + file["path"], post + yield Message.Url, url, post class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" subcategory = "user" - pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/?(?:$|[?#])" - test = ("https://kemono.party/fanbox/user/6993449", { - "range": "1-25", - "count": 25, - }) + pattern = BASE_PATTERN + r"/?(?:$|[?#])" + test = ( + ("https://kemono.party/fanbox/user/6993449", { + "range": "1-25", + "count": 25, + }), + ("https://kemono.party/subscribestar/user/alcorart"), + ) def __init__(self, match): KemonopartyExtractor.__init__(self, match) @@ -67,28 +81,37 @@ class KemonopartyUserExtractor(KemonopartyExtractor): class KemonopartyPostExtractor(KemonopartyExtractor): """Extractor for a single kemono.party post""" subcategory = "post" - pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/post/(\d+)" - test = ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://kemono\.party/files/fanbox" - r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", - "keyword": { - "added": "Wed, 06 May 2020 20:28:02 GMT", - "content": str, - "date": "dt:2019-08-11 02:09:04", - "edited": None, - "embed": dict, - "extension": "jpeg", - "filename": "P058kDFYus7DbqAkGlfWTlOr", - "id": "506575", - "num": 1, - "published": "Sun, 11 Aug 2019 02:09:04 GMT", - "service": "fanbox", - "shared_file": False, - "subcategory": "post", - "title": "c96取り置き", - "user": "6993449", - }, - }) + pattern = BASE_PATTERN + r"/post/([^/?#]+)" + test = ( + ("https://kemono.party/fanbox/user/6993449/post/506575", { + "pattern": r"https://kemono\.party/files/fanbox" + r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", + "keyword": { + "added": "Wed, 06 May 2020 20:28:02 GMT", + "content": str, + "date": "dt:2019-08-11 02:09:04", + "edited": None, + "embed": dict, + "extension": "jpeg", + "filename": "P058kDFYus7DbqAkGlfWTlOr", + "id": "506575", + "num": 1, + "published": "Sun, 11 Aug 2019 02:09:04 GMT", + "service": "fanbox", + "shared_file": False, + "subcategory": "post", + "title": "c96取り置き", + "user": "6993449", + }, + }), + # inline image (#1286) + ("https://kemono.party/fanbox/user/7356311/post/802343", { + "pattern": r"https://kemono\.party/inline/fanbox" + r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg", + }), + ("https://kemono.party/subscribestar/user/alcorart/post/184330"), + ("https://kemono.party/gumroad/user/trylsc/post/IURjT"), + ) def __init__(self, match): KemonopartyExtractor.__init__(self, match) |
