summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/poipiku.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-10-14 00:23:10 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-10-14 00:23:10 -0400
commit33f8a8a37a9cba738ef25fb99955f0730da9eb48 (patch)
treeb51fb48b160f5e5e034e6b4542e6f00703bae7ec /gallery_dl/extractor/poipiku.py
parentbbe7fac03d881662a458e7fbf870c9d71f5257f4 (diff)
New upstream version 1.30.10.upstream/1.30.10
Diffstat (limited to 'gallery_dl/extractor/poipiku.py')
-rw-r--r--gallery_dl/extractor/poipiku.py203
1 files changed, 135 insertions, 68 deletions
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 957e316..32ca528 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022-2023 Mike Fährmann
+# Copyright 2022-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,102 +21,172 @@ class PoipikuExtractor(Extractor):
directory_fmt = ("{category}", "{user_id} {user_name}")
filename_fmt = "{post_id}_{num}.{extension}"
archive_fmt = "{post_id}_{num}"
+ cookies_domain = "poipiku.com"
+ cookies_warning = True
request_interval = (0.5, 1.5)
def _init(self):
self.cookies.set(
- "LANG", "en", domain="poipiku.com")
+ "LANG", "en", domain=self.cookies_domain)
self.cookies.set(
- "POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com")
+ "POIPIKU_CONTENTS_VIEW_MODE", "1", domain=self.cookies_domain)
+ self.headers = {
+ "Accept" : "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer": None,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ }
+ self.password = self.config("password", "")
def items(self):
- password = self.config("password", "")
+ if self.cookies_check(("POIPIKU_LK",)):
+ extract_files = self._extract_files_auth
+ logged_in = True
+ else:
+ extract_files = self._extract_files_noauth
+ logged_in = False
+ if self.cookies_warning:
+ self.log.warning("no 'POIPIKU_LK' cookie set")
+ PoipikuExtractor.cookies_warning = False
for post_url in self.posts():
- parts = post_url.split("/")
if post_url[0] == "/":
- post_url = self.root + post_url
+ post_url = f"{self.root}{post_url}"
page = self.request(post_url).text
extr = text.extract_from(page)
-
+ parts = post_url.rsplit("/", 2)
post = {
"post_category": extr("<title>[", "]"),
"count" : text.parse_int(extr("(", " ")),
- "post_id" : parts[-1].partition(".")[0],
- "user_id" : parts[-2],
+ "post_id" : parts[2].partition(".")[0],
+ "user_id" : parts[1],
"user_name" : text.unescape(extr(
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
"description": text.unescape(extr(
'class="IllustItemDesc" >', '</h1>')),
+ "warning" : False,
+ "password" : False,
+ "requires" : None,
+ "original" : logged_in,
"_http_headers": {"Referer": post_url},
}
+ thumb = self._extract_thumb(post, extr)
+ self.headers["Referer"] = post_url
+
+ if post["requires"] and not post["password"] and extr(
+ "PasswordIcon", ">"):
+ post["password"] = True
+
yield Message.Directory, post
- post["num"] = warning = 0
-
- while True:
- thumb = extr('class="IllustItemThumbImg" src="', '"')
- if not thumb:
- break
- elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
- if "/warning" in thumb:
- warning = True
- self.log.debug("%s: %s", post["post_id"], thumb)
- continue
- post["num"] += 1
- url = text.ensure_http_scheme(thumb[:-8]).replace(
- "//img.", "//img-org.", 1)
+ for post["num"], url in enumerate(extract_files(
+ post, thumb, extr), 1):
yield Message.Url, url, text.nameext_from_url(url, post)
- if not warning and not extr('ShowAppendFile', '<'):
- continue
+ def _extract_thumb(self, post, extr):
+ thumb = ""
- url = self.root + "/f/ShowAppendFileF.jsp"
- headers = {
- "Accept" : "application/json, text/javascript, */*; q=0.01",
- "X-Requested-With": "XMLHttpRequest",
- "Origin" : self.root,
- "Referer": post_url,
- }
- data = {
- "UID": post["user_id"],
- "IID": post["post_id"],
- "PAS": password,
- "MD" : "0",
- "TWF": "-1",
- }
- resp = self.request_json(
- url, method="POST", headers=headers, data=data)
-
- page = resp["html"]
- if (resp.get("result_num") or 0) < 0:
- self.log.warning("%s: '%s'",
- post["post_id"], page.replace("<br/>", " "))
-
- for thumb in text.extract_iter(
- page, 'class="IllustItemThumbImg" src="', '"'):
- post["num"] += 1
- url = text.ensure_http_scheme(thumb[:-8]).replace(
- "//img.", "//img-org.", 1)
- yield Message.Url, url, text.nameext_from_url(url, post)
+ while True:
+ img = extr('class="IllustItemThumbImg" src="', '"')
+
+ if not img:
+ return thumb
+ elif img.startswith("https://cdn.poipiku.com/img/"):
+ self.log.debug("%s: %s", post["post_id"], img)
+ type = text.rextr(img, "/", ".")
+ if type == "warning":
+ post["warning"] = True
+ elif type == "publish_pass":
+ post["password"] = True
+ elif type == "publish_login":
+ post["requires"] = "login"
+ elif type == "publish_follower":
+ post["requires"] = "follow"
+ elif type == "publish_t_rt":
+ post["requires"] = "retweet"
+ elif img.startswith((
+ "https://img.poipiku.com/img/",
+ "//img.poipiku.com/img/",
+ "/img/",
+ )):
+ self.log.debug("%s: %s", post["post_id"], img)
+ if "/warning" in img:
+ post["warning"] = True
+ else:
+ thumb = img
+
+ def _extract_files_auth(self, post, thumb, extr):
+ data = self._show_illust_detail(post)
+
+ if data.get("error_code"):
+ data = self._show_append_file(post)
+ html = data["html"]
+ self.log.warning("%s: '%s'",
+ post["post_id"], html.replace("<br/>", " "))
+ return ()
+ return text.extract_iter(data["html"], 'src="', '"')
+
+ def _extract_files_noauth(self, post, thumb, extr):
+ if thumb:
+ if not extr('ShowAppendFile', '<'):
+ return (thumb,)
+ files = [thumb]
+ else:
+ files = []
+
+ data = self._show_append_file(post)
+ html = data["html"]
+ if (data.get("result_num") or 0) < 0:
+ self.log.warning("%s: '%s'",
+ post["post_id"], html.replace("<br/>", " "))
+
+ files.extend(text.extract_iter(
+ html, 'class="IllustItemThumbImg" src="', '"'))
+ return files
+
+ def _show_illust_detail(self, post):
+ url = f"{self.root}/f/ShowIllustDetailF.jsp"
+ data = {
+ "ID" : post["user_id"],
+ "TD" : post["post_id"],
+ "AD" : "-1",
+ "PAS": self.password,
+ }
+ return self.request_json(
+ url, method="POST", headers=self.headers, data=data,
+ interval=False)
+
+ def _show_append_file(self, post):
+ url = f"{self.root}/f/ShowAppendFileF.jsp"
+ data = {
+ "UID": post["user_id"],
+ "IID": post["post_id"],
+ "PAS": self.password,
+ "MD" : "0",
+ "TWF": "-1",
+ }
+ return self.request_json(
+ url, method="POST", headers=self.headers, data=data,
+ interval=False)
class PoipikuUserExtractor(PoipikuExtractor):
"""Extractor for posts from a poipiku user"""
subcategory = "user"
- pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
- r"(\d+)/?(?:$|[?&#])")
+ pattern = (rf"{BASE_PATTERN}/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
+ rf"(\d+)/?(?:$|[?&#])")
example = "https://poipiku.com/12345/"
- def __init__(self, match):
- PoipikuExtractor.__init__(self, match)
- self._page, self.user_id = match.groups()
-
def posts(self):
- url = self.root + "/IllustListPcV.jsp"
+ pnum, user_id = self.groups
+
+ url = f"{self.root}/IllustListPcV.jsp"
params = {
- "PG" : text.parse_int(self._page, 0),
- "ID" : self.user_id,
+ "PG" : text.parse_int(pnum, 0),
+ "ID" : user_id,
"KWD": "",
}
@@ -137,12 +207,9 @@ class PoipikuUserExtractor(PoipikuExtractor):
class PoipikuPostExtractor(PoipikuExtractor):
"""Extractor for a poipiku post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(\d+)/(\d+)"
example = "https://poipiku.com/12345/12345.html"
- def __init__(self, match):
- PoipikuExtractor.__init__(self, match)
- self.user_id, self.post_id = match.groups()
-
def posts(self):
- return (f"/{self.user_id}/{self.post_id}.html",)
+ user_id, post_id = self.groups
+ return (f"/{user_id}/{post_id}.html",)