diff options
Diffstat (limited to 'gallery_dl/extractor/seisoparty.py')
| -rw-r--r-- | gallery_dl/extractor/seisoparty.py | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py new file mode 100644 index 0000000..b736b4b --- /dev/null +++ b/gallery_dl/extractor/seisoparty.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://seiso.party/""" + +from .common import Extractor, Message +from .. import text +import re + + +class SeisopartyExtractor(Extractor): + """Base class for seisoparty extractors""" + category = "seisoparty" + root = "https://seiso.party" + directory_fmt = ("{category}", "{service}", "{username}") + filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}" + archive_fmt = "{service}_{user}_{id}_{num}" + cookiedomain = ".seiso.party" + _warning = True + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_name = None + self._find_files = re.compile( + r'href="(https://cdn(?:-\d)?\.seiso\.party/files/[^"]+)').findall + + def items(self): + if self._warning: + if not self._check_cookies(("__ddg1", "__ddg2")): + self.log.warning("no DDoS-GUARD cookies set (__ddg1, __ddg2)") + SeisopartyExtractor._warning = False + + for post in self.posts(): + files = post.pop("files") + yield Message.Directory, post + for post["num"], url in enumerate(files, 1): + yield Message.Url, url, text.nameext_from_url(url, post) + + def _parse_post(self, page, post_id): + extr = text.extract_from(page) + return { + "service" : self.service, + "user" : self.user_id, + "username": self.user_name, + "id" : post_id, + "date" : text.parse_datetime(extr( + '<div class="margin-bottom-15 minor-text">', '<'), + "%Y-%m-%d %H:%M:%S %Z"), + "title" : text.unescape(extr('class="post-title">', '<')), + "content" : text.unescape(extr("\n<p>\n", "\n</p>\n").strip()), + "files" : self._find_files(page), + } + + +class SeisopartyUserExtractor(SeisopartyExtractor): + """Extractor for all posts from a seiso.party user listing""" + subcategory = "user" + pattern = r"(?:https?://)?seiso\.party/artists/([^/?#]+)/([^/?#]+)" + test = ( + ("https://seiso.party/artists/fanbox/21", { + "pattern": r"https://cdn\.seiso\.party/files/fanbox/\d+/", + "count": ">=15", + "keyword": { + "content": str, + "date": "type:datetime", + "id": r"re:\d+", + "num": int, + "service": "fanbox", + "title": str, + "user": "21", + "username": "雨", + }, + }), + ) + + def __init__(self, match): + SeisopartyExtractor.__init__(self, match) + self.service, self.user_id = match.groups() + + def posts(self): + url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id) + page = self.request(url).text + self.user_name, pos = text.extract(page, '<span class="title">', '<') + + url = self.root + text.extract( + page, 'href="', '"', page.index('id="content"', pos))[0] + response = self.request(url) + headers = {"Referer": url} + + while True: + yield self._parse_post(response.text, url.rpartition("/")[2]) + response = self.request(url + "/next", headers=headers) + if url == response.url: + return + url = headers["Referer"] = response.url + + +class SeisopartyPostExtractor(SeisopartyExtractor): + """Extractor for a single seiso.party post""" + subcategory = "post" + pattern = r"(?:https?://)?seiso\.party/post/([^/?#]+)/([^/?#]+)/([^/?#]+)" + test = ( + ("https://seiso.party/post/fanbox/21/371", { + "url": "75f13b92de0ce399b6163c3de18f1f36011c2366", + "count": 2, + "keyword": { + "content": "この前描いためぐるちゃんのPSDファイルです。\n" + "どうぞよろしくお願いします。", + "date": "dt:2021-05-06 12:38:31", + "extension": "re:psd|jpg", + "filename": "re:backcourt|ffb2ccb7a3586d05f9a4620329dd131e", + "id": "371", + "num": int, + "service": "fanbox", + "title": "MEGURU.PSD", + "user": "21", + "username": "雨", + }, + }), + ("https://seiso.party/post/patreon/429/95949", { + "pattern": r"https://cdn-2\.seiso\.party/files/patreon/95949/", + "count": 2, + }), + ) + + def __init__(self, match): + SeisopartyExtractor.__init__(self, match) + self.service, self.user_id, self.post_id = match.groups() + + def posts(self): + url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id) + page = self.request(url).text + self.user_name, pos = text.extract(page, '<span class="title">', '<') + + url = "{}/post/{}/{}/{}".format( + self.root, self.service, self.user_id, self.post_id) + return (self._parse_post(self.request(url).text, self.post_id),) |
