diff options
Diffstat (limited to 'gallery_dl/extractor/poipiku.py')
| -rw-r--r-- | gallery_dl/extractor/poipiku.py | 203 |
1 files changed, 135 insertions, 68 deletions
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index 957e316..32ca528 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022-2023 Mike Fährmann +# Copyright 2022-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,102 +21,172 @@ class PoipikuExtractor(Extractor): directory_fmt = ("{category}", "{user_id} {user_name}") filename_fmt = "{post_id}_{num}.{extension}" archive_fmt = "{post_id}_{num}" + cookies_domain = "poipiku.com" + cookies_warning = True request_interval = (0.5, 1.5) def _init(self): self.cookies.set( - "LANG", "en", domain="poipiku.com") + "LANG", "en", domain=self.cookies_domain) self.cookies.set( - "POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com") + "POIPIKU_CONTENTS_VIEW_MODE", "1", domain=self.cookies_domain) + self.headers = { + "Accept" : "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + "Origin" : self.root, + "Referer": None, + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + } + self.password = self.config("password", "") def items(self): - password = self.config("password", "") + if self.cookies_check(("POIPIKU_LK",)): + extract_files = self._extract_files_auth + logged_in = True + else: + extract_files = self._extract_files_noauth + logged_in = False + if self.cookies_warning: + self.log.warning("no 'POIPIKU_LK' cookie set") + PoipikuExtractor.cookies_warning = False for post_url in self.posts(): - parts = post_url.split("/") if post_url[0] == "/": - post_url = self.root + post_url + post_url = f"{self.root}{post_url}" page = self.request(post_url).text extr = text.extract_from(page) - + parts = post_url.rsplit("/", 2) post = { "post_category": extr("<title>[", "]"), "count" : text.parse_int(extr("(", " ")), - "post_id" : parts[-1].partition(".")[0], - "user_id" : parts[-2], + "post_id" : parts[2].partition(".")[0], + "user_id" : parts[1], "user_name" : text.unescape(extr( '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]), "description": text.unescape(extr( 'class="IllustItemDesc" >', '</h1>')), + "warning" : False, + "password" : False, + "requires" : None, + "original" : logged_in, "_http_headers": {"Referer": post_url}, } + thumb = self._extract_thumb(post, extr) + self.headers["Referer"] = post_url + + if post["requires"] and not post["password"] and extr( + "PasswordIcon", ">"): + post["password"] = True + yield Message.Directory, post - post["num"] = warning = 0 - - while True: - thumb = extr('class="IllustItemThumbImg" src="', '"') - if not thumb: - break - elif thumb.startswith(("//img.poipiku.com/img/", "/img/")): - if "/warning" in thumb: - warning = True - self.log.debug("%s: %s", post["post_id"], thumb) - continue - post["num"] += 1 - url = text.ensure_http_scheme(thumb[:-8]).replace( - "//img.", "//img-org.", 1) + for post["num"], url in enumerate(extract_files( + post, thumb, extr), 1): yield Message.Url, url, text.nameext_from_url(url, post) - if not warning and not extr('ShowAppendFile', '<'): - continue + def _extract_thumb(self, post, extr): + thumb = "" - url = self.root + "/f/ShowAppendFileF.jsp" - headers = { - "Accept" : "application/json, text/javascript, */*; q=0.01", - "X-Requested-With": "XMLHttpRequest", - "Origin" : self.root, - "Referer": post_url, - } - data = { - "UID": post["user_id"], - "IID": post["post_id"], - "PAS": password, - "MD" : "0", - "TWF": "-1", - } - resp = self.request_json( - url, method="POST", headers=headers, data=data) - - page = resp["html"] - if (resp.get("result_num") or 0) < 0: - self.log.warning("%s: '%s'", - post["post_id"], page.replace("<br/>", " ")) - - for thumb in text.extract_iter( - page, 'class="IllustItemThumbImg" src="', '"'): - post["num"] += 1 - url = text.ensure_http_scheme(thumb[:-8]).replace( - "//img.", "//img-org.", 1) - yield Message.Url, url, text.nameext_from_url(url, post) + while True: + img = extr('class="IllustItemThumbImg" src="', '"') + + if not img: + return thumb + elif img.startswith("https://cdn.poipiku.com/img/"): + self.log.debug("%s: %s", post["post_id"], img) + type = text.rextr(img, "/", ".") + if type == "warning": + post["warning"] = True + elif type == "publish_pass": + post["password"] = True + elif type == "publish_login": + post["requires"] = "login" + elif type == "publish_follower": + post["requires"] = "follow" + elif type == "publish_t_rt": + post["requires"] = "retweet" + elif img.startswith(( + "https://img.poipiku.com/img/", + "//img.poipiku.com/img/", + "/img/", + )): + self.log.debug("%s: %s", post["post_id"], img) + if "/warning" in img: + post["warning"] = True + else: + thumb = img + + def _extract_files_auth(self, post, thumb, extr): + data = self._show_illust_detail(post) + + if data.get("error_code"): + data = self._show_append_file(post) + html = data["html"] + self.log.warning("%s: '%s'", + post["post_id"], html.replace("<br/>", " ")) + return () + return text.extract_iter(data["html"], 'src="', '"') + + def _extract_files_noauth(self, post, thumb, extr): + if thumb: + if not extr('ShowAppendFile', '<'): + return (thumb,) + files = [thumb] + else: + files = [] + + data = self._show_append_file(post) + html = data["html"] + if (data.get("result_num") or 0) < 0: + self.log.warning("%s: '%s'", + post["post_id"], html.replace("<br/>", " ")) + + files.extend(text.extract_iter( + html, 'class="IllustItemThumbImg" src="', '"')) + return files + + def _show_illust_detail(self, post): + url = f"{self.root}/f/ShowIllustDetailF.jsp" + data = { + "ID" : post["user_id"], + "TD" : post["post_id"], + "AD" : "-1", + "PAS": self.password, + } + return self.request_json( + url, method="POST", headers=self.headers, data=data, + interval=False) + + def _show_append_file(self, post): + url = f"{self.root}/f/ShowAppendFileF.jsp" + data = { + "UID": post["user_id"], + "IID": post["post_id"], + "PAS": self.password, + "MD" : "0", + "TWF": "-1", + } + return self.request_json( + url, method="POST", headers=self.headers, data=data, + interval=False) class PoipikuUserExtractor(PoipikuExtractor): """Extractor for posts from a poipiku user""" subcategory = "user" - pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?" - r"(\d+)/?(?:$|[?&#])") + pattern = (rf"{BASE_PATTERN}/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?" + rf"(\d+)/?(?:$|[?&#])") example = "https://poipiku.com/12345/" - def __init__(self, match): - PoipikuExtractor.__init__(self, match) - self._page, self.user_id = match.groups() - def posts(self): - url = self.root + "/IllustListPcV.jsp" + pnum, user_id = self.groups + + url = f"{self.root}/IllustListPcV.jsp" params = { - "PG" : text.parse_int(self._page, 0), - "ID" : self.user_id, + "PG" : text.parse_int(pnum, 0), + "ID" : user_id, "KWD": "", } @@ -137,12 +207,9 @@ class PoipikuUserExtractor(PoipikuExtractor): class PoipikuPostExtractor(PoipikuExtractor): """Extractor for a poipiku post""" subcategory = "post" - pattern = BASE_PATTERN + r"/(\d+)/(\d+)" + pattern = rf"{BASE_PATTERN}/(\d+)/(\d+)" example = "https://poipiku.com/12345/12345.html" - def __init__(self, match): - PoipikuExtractor.__init__(self, match) - self.user_id, self.post_id = match.groups() - def posts(self): - return (f"/{self.user_id}/{self.post_id}.html",) + user_id, post_id = self.groups + return (f"/{user_id}/{post_id}.html",) |
