summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/fantia.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/fantia.py')
-rw-r--r--gallery_dl/extractor/fantia.py92
1 files changed, 34 insertions, 58 deletions
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index f92b904..f1d51e2 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -19,13 +19,12 @@ class FantiaExtractor(Extractor):
archive_fmt = "{post_id}_{file_id}"
_warning = True
- def items(self):
+ def _init(self):
self.headers = {
"Accept" : "application/json, text/plain, */*",
- "Referer": self.root,
"X-Requested-With": "XMLHttpRequest",
}
- _empty_plan = {
+ self._empty_plan = {
"id" : 0,
"price": 0,
"limit": 0,
@@ -33,22 +32,18 @@ class FantiaExtractor(Extractor):
"description": "",
"thumb": self.root + "/images/fallback/plan/thumb_default.png",
}
-
if self._warning:
- if not self._check_cookies(("_session_id",)):
+ if not self.cookies_check(("_session_id",)):
self.log.warning("no '_session_id' cookie set")
FantiaExtractor._warning = False
+ def items(self):
for post_id in self.posts():
post = self._get_post_data(post_id)
post["num"] = 0
for content in self._get_post_contents(post):
- post["content_category"] = content["category"]
- post["content_title"] = content["title"]
- post["content_filename"] = content.get("filename", "")
- post["content_id"] = content["id"]
- post["plan"] = content["plan"] or _empty_plan
+ files = self._process_content(post, content)
yield Message.Directory, post
if content["visible_status"] != "visible":
@@ -57,23 +52,21 @@ class FantiaExtractor(Extractor):
"%s#post-content-id-%s", content["visible_status"],
post["post_url"], content["id"])
- for url in self._get_content_urls(post, content):
- text.nameext_from_url(
- post["content_filename"] or url, post)
- post["file_url"] = url
+ for file in files:
+ post.update(file)
post["num"] += 1
- yield Message.Url, url, post
+ text.nameext_from_url(
+ post["content_filename"] or file["file_url"], post)
+ yield Message.Url, file["file_url"], post
def posts(self):
"""Return post IDs"""
def _pagination(self, url):
params = {"page": 1}
- headers = self.headers.copy()
- del headers["X-Requested-With"]
while True:
- page = self.request(url, params=params, headers=headers).text
+ page = self.request(url, params=params).text
self._csrf_token(page)
post_id = None
@@ -132,59 +125,52 @@ class FantiaExtractor(Extractor):
return contents
- def _get_content_urls(self, post, content):
- """Extract individual URL data from the response"""
- if "comment" in content:
- post["content_comment"] = content["comment"]
+ def _process_content(self, post, content):
+ post["content_category"] = content["category"]
+ post["content_title"] = content["title"]
+ post["content_filename"] = content.get("filename") or ""
+ post["content_id"] = content["id"]
+ post["content_comment"] = content.get("comment") or ""
+ post["plan"] = content["plan"] or self._empty_plan
+
+ files = []
if "post_content_photos" in content:
for photo in content["post_content_photos"]:
- post["file_id"] = photo["id"]
- yield photo["url"]["original"]
+ files.append({"file_id" : photo["id"],
+ "file_url": photo["url"]["original"]})
if "download_uri" in content:
- post["file_id"] = content["id"]
url = content["download_uri"]
if url[0] == "/":
url = self.root + url
- yield url
+ files.append({"file_id" : content["id"],
+ "file_url": url})
if content["category"] == "blog" and "comment" in content:
comment_json = util.json_loads(content["comment"])
- ops = comment_json.get("ops") or ()
- # collect blogpost text first
blog_text = ""
- for op in ops:
+ for op in comment_json.get("ops") or ():
insert = op.get("insert")
if isinstance(insert, str):
blog_text += insert
+ elif isinstance(insert, dict) and "fantiaImage" in insert:
+ img = insert["fantiaImage"]
+ files.append({"file_id" : img["id"],
+ "file_url": self.root + img["original_url"]})
post["blogpost_text"] = blog_text
+ else:
+ post["blogpost_text"] = ""
- # collect images
- for op in ops:
- insert = op.get("insert")
- if isinstance(insert, dict) and "fantiaImage" in insert:
- img = insert["fantiaImage"]
- post["file_id"] = img["id"]
- yield self.root + img["original_url"]
+ return files
class FantiaCreatorExtractor(FantiaExtractor):
"""Extractor for a Fantia creator's works"""
subcategory = "creator"
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)"
- test = (
- ("https://fantia.jp/fanclubs/6939", {
- "range": "1-25",
- "count": ">= 25",
- "keyword": {
- "fanclub_user_id" : 52152,
- "tags" : list,
- "title" : str,
- },
- }),
- )
+ example = "https://fantia.jp/fanclubs/12345"
def __init__(self, match):
FantiaExtractor.__init__(self, match)
@@ -199,17 +185,7 @@ class FantiaPostExtractor(FantiaExtractor):
"""Extractor for media from a single Fantia post"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)"
- test = (
- ("https://fantia.jp/posts/508363", {
- "count": 6,
- "keyword": {
- "post_title": "zunda逆バニーでおしりコッショリ",
- "tags": list,
- "rating": "adult",
- "post_id": 508363
- },
- }),
- )
+ example = "https://fantia.jp/posts/12345"
def __init__(self, match):
FantiaExtractor.__init__(self, match)