summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/poipiku.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-06-28 19:54:18 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-06-28 19:54:18 -0400
commitce35450b5308adab049c5bd99095986d4c607027 (patch)
treef0c2b600f8ef720941bdf615164b942c6c4a5d07 /gallery_dl/extractor/poipiku.py
parent25442ea49f031d4d2df3353dd7e9ad2080e332da (diff)
New upstream version 1.22.3.upstream/1.22.3
Diffstat (limited to 'gallery_dl/extractor/poipiku.py')
-rw-r--r--gallery_dl/extractor/poipiku.py169
1 files changed, 169 insertions, 0 deletions
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
new file mode 100644
index 0000000..e1846cc
--- /dev/null
+++ b/gallery_dl/extractor/poipiku.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://poipiku.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?poipiku\.com"
+
+
+class PoipikuExtractor(Extractor):
+ """Base class for poipiku extractors"""
+ category = "poipiku"
+ root = "https://poipiku.com"
+ directory_fmt = ("{category}", "{user_id} {user_name}")
+ filename_fmt = "{post_id}_{num}.{extension}"
+ archive_fmt = "{post_id}_{num}"
+ request_interval = (0.5, 1.5)
+
+ def items(self):
+ password = self.config("password", "")
+
+ for post_url in self.posts():
+ parts = post_url.split("/")
+ if post_url[0] == "/":
+ post_url = self.root + post_url
+ page = self.request(post_url).text
+ extr = text.extract_from(page)
+
+ post = {
+ "post_category": extr("<title>[", "]"),
+ "count" : extr("(", " "),
+ "post_id" : parts[-1].partition(".")[0],
+ "user_id" : parts[-2],
+ "user_name" : text.unescape(extr(
+ '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
+ "description": text.unescape(extr(
+ 'class="IllustItemDesc" >', '<')),
+ }
+
+ yield Message.Directory, post
+ post["num"] = 0
+
+ while True:
+ thumb = extr('class="IllustItemThumbImg" src="', '"')
+ if not thumb:
+ break
+ elif thumb.startswith("/img/"):
+ continue
+ post["num"] += 1
+ url = text.ensure_http_scheme(thumb[:-8])
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ if not extr('</i> show all', '<'):
+ continue
+
+ url = self.root + "/f/ShowAppendFileF.jsp"
+ headers = {
+ "Accept" : "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer": post_url,
+ }
+ data = {
+ "UID": post["user_id"],
+ "IID": post["post_id"],
+ "PAS": password,
+ "MD" : "0",
+ "TWF": "-1",
+ }
+ page = self.request(
+ url, method="POST", headers=headers, data=data).json()["html"]
+
+ for thumb in text.extract_iter(
+ page, 'class="IllustItemThumbImg" src="', '"'):
+ post["num"] += 1
+ url = text.ensure_http_scheme(thumb[:-8])
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+
+class PoipikuUserExtractor(PoipikuExtractor):
+ """Extractor for posts from a poipiku user"""
+ subcategory = "user"
+ pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
+ r"(\d+)/?(?:$|[?&#])")
+ test = (
+ ("https://poipiku.com/25049/", {
+ "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
+ r"/\d+_\w+\.(jpe?g|png)$",
+ "range": "1-10",
+ "count": 10,
+ }),
+ ("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=")
+ )
+
+ def __init__(self, match):
+ PoipikuExtractor.__init__(self, match)
+ self._page, self.user_id = match.groups()
+
+ def posts(self):
+ url = self.root + "/IllustListPcV.jsp"
+ params = {
+ "PG" : text.parse_int(self._page, 0),
+ "ID" : self.user_id,
+ "KWD": "",
+ }
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for path in text.extract_iter(
+ page, 'class="IllustInfo" href="', '"'):
+ yield path
+ cnt += 1
+
+ if cnt < 48:
+ return
+ params["PG"] += 1
+
+
+class PoipikuPostExtractor(PoipikuExtractor):
+ """Extractor for a poipiku post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
+ test = (
+ ("https://poipiku.com/25049/5864576.html", {
+ "pattern": r"https://img\.poipiku\.com/user_img03/000025049"
+ r"/005864576_EWN1Y65gQ\.png$",
+ "keyword": {
+ "count": "1",
+ "description": "",
+ "extension": "png",
+ "filename": "005864576_EWN1Y65gQ",
+ "num": 1,
+ "post_category": "DOODLE",
+ "post_id": "5864576",
+ "user_id": "25049",
+ "user_name": "ユキウサギ",
+ },
+ }),
+ ("https://poipiku.com/2166245/6411749.html", {
+ "pattern": r"https://img\.poipiku\.com/user_img01/002166245"
+ r"/006411749_\w+\.jpeg$",
+ "count": 4,
+ "keyword": {
+ "count": "4",
+ "description": "絵茶の産物ネタバレあるやつ",
+ "num": int,
+ "post_category": "SPOILER",
+ "post_id": "6411749",
+ "user_id": "2166245",
+ "user_name": "wadahito",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ PoipikuExtractor.__init__(self, match)
+ self.user_id, self.post_id = match.groups()
+
+ def posts(self):
+ return ("/{}/{}.html".format(self.user_id, self.post_id),)