# -*- coding: utf-8 -*- # Copyright 2022-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://poipiku.com/""" from .common import Extractor, Message from .. import text BASE_PATTERN = r"(?:https?://)?poipiku\.com" class PoipikuExtractor(Extractor): """Base class for poipiku extractors""" category = "poipiku" root = "https://poipiku.com" directory_fmt = ("{category}", "{user_id} {user_name}") filename_fmt = "{post_id}_{num}.{extension}" archive_fmt = "{post_id}_{num}" cookies_domain = "poipiku.com" cookies_warning = True request_interval = (0.5, 1.5) def _init(self): self.cookies.set( "LANG", "en", domain=self.cookies_domain) self.cookies.set( "POIPIKU_CONTENTS_VIEW_MODE", "1", domain=self.cookies_domain) self.headers = { "Accept" : "application/json, text/javascript, */*; q=0.01", "X-Requested-With": "XMLHttpRequest", "Origin" : self.root, "Referer": None, "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", } self.password = self.config("password", "") def items(self): if self.cookies_check(("POIPIKU_LK",)): extract_files = self._extract_files_auth logged_in = True else: extract_files = self._extract_files_noauth logged_in = False if self.cookies_warning: self.log.warning("no 'POIPIKU_LK' cookie set") PoipikuExtractor.cookies_warning = False for post_url in self.posts(): if post_url[0] == "/": post_url = self.root + post_url page = self.request(post_url).text extr = text.extract_from(page) parts = post_url.rsplit("/", 2) post = { "post_category": extr("[", "]"), "count" : text.parse_int(extr("(", " ")), "post_id" : parts[2].partition(".")[0], "user_id" : parts[1], "user_name" : text.unescape(extr( '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]), "description": text.unescape(extr( 'class="IllustItemDesc" >', '</h1>')), "warning" : False, "password" : False, "requires" : None, "original" : logged_in, "_http_headers": {"Referer": post_url}, } thumb = self._extract_thumb(post, extr) self.headers["Referer"] = post_url if post["requires"] and not post["password"] and extr( "PasswordIcon", ">"): post["password"] = True yield Message.Directory, "", post for post["num"], url in enumerate(extract_files( post, thumb, extr), 1): yield Message.Url, url, text.nameext_from_url(url, post) def _extract_thumb(self, post, extr): thumb = "" while True: img = extr('class="IllustItemThumbImg" src="', '"') if not img: return thumb elif img.startswith("https://cdn.poipiku.com/img/"): self.log.debug("%s: %s", post["post_id"], img) type = text.rextr(img, "/", ".") if type == "warning": post["warning"] = True elif type == "publish_pass": post["password"] = True elif type == "publish_login": post["requires"] = "login" elif type == "publish_follower": post["requires"] = "follow" elif type == "publish_t_rt": post["requires"] = "retweet" elif img.startswith(( "https://img.poipiku.com/img/", "//img.poipiku.com/img/", "/img/", )): self.log.debug("%s: %s", post["post_id"], img) if "/warning" in img: post["warning"] = True else: thumb = img def _extract_files_auth(self, post, thumb, extr): data = self._show_illust_detail(post) if data.get("error_code"): data = self._show_append_file(post) html = data["html"] self.log.warning("%s: '%s'", post["post_id"], html.replace("<br/>", " ")) return () return text.extract_iter(data["html"], 'src="', '"') def _extract_files_noauth(self, post, thumb, extr): if thumb: if not extr('ShowAppendFile', '<'): return (thumb,) files = [thumb] else: files = [] data = self._show_append_file(post) html = data["html"] if (data.get("result_num") or 0) < 0: self.log.warning("%s: '%s'", post["post_id"], html.replace("<br/>", " ")) files.extend(text.extract_iter( html, 'class="IllustItemThumbImg" src="', '"')) return files def _show_illust_detail(self, post): url = self.root + "/f/ShowIllustDetailF.jsp" data = { "ID" : post["user_id"], "TD" : post["post_id"], "AD" : "-1", "PAS": self.password, } return self.request_json( url, method="POST", headers=self.headers, data=data, interval=False) def _show_append_file(self, post): url = self.root + "/f/ShowAppendFileF.jsp" data = { "UID": post["user_id"], "IID": post["post_id"], "PAS": self.password, "MD" : "0", "TWF": "-1", } return self.request_json( url, method="POST", headers=self.headers, data=data, interval=False) class PoipikuUserExtractor(PoipikuExtractor): """Extractor for posts from a poipiku user""" subcategory = "user" pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?" r"(\d+)/?(?:$|[?&#])") example = "https://poipiku.com/12345/" def posts(self): pnum, user_id = self.groups url = self.root + "/IllustListPcV.jsp" params = { "PG" : text.parse_int(pnum, 0), "ID" : user_id, "KWD": "", } while True: page = self.request(url, params=params).text cnt = 0 for path in text.extract_iter( page, 'class="IllustInfo" href="', '"'): yield path cnt += 1 if cnt < 48: return params["PG"] += 1 class PoipikuPostExtractor(PoipikuExtractor): """Extractor for a poipiku post""" subcategory = "post" pattern = BASE_PATTERN + r"/(\d+)/(\d+)" example = "https://poipiku.com/12345/12345.html" def posts(self): user_id, post_id = self.groups return (f"/{user_id}/{post_id}.html",)