summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/fantia.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/fantia.py')
-rw-r--r--gallery_dl/extractor/fantia.py142
1 files changed, 87 insertions, 55 deletions
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 13dfead..35c4cc4 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -24,6 +24,14 @@ class FantiaExtractor(Extractor):
"Accept" : "application/json, text/plain, */*",
"Referer": self.root,
}
+ _empty_plan = {
+ "id" : 0,
+ "price": 0,
+ "limit": 0,
+ "name" : "",
+ "description": "",
+ "thumb": self.root + "/images/fallback/plan/thumb_default.png",
+ }
if self._warning:
if not self._check_cookies(("_session_id",)):
@@ -31,15 +39,29 @@ class FantiaExtractor(Extractor):
FantiaExtractor._warning = False
for post_id in self.posts():
- full_response, post = self._get_post_data(post_id)
- yield Message.Directory, post
+ post = self._get_post_data(post_id)
post["num"] = 0
- for url, url_data in self._get_urls_from_post(full_response, post):
- post["num"] += 1
- fname = url_data["content_filename"] or url
- text.nameext_from_url(fname, url_data)
- url_data["file_url"] = url
- yield Message.Url, url, url_data
+
+ for content in self._get_post_contents(post):
+ post["content_category"] = content["category"]
+ post["content_title"] = content["title"]
+ post["content_filename"] = content.get("filename", "")
+ post["content_id"] = content["id"]
+ post["plan"] = content["plan"] or _empty_plan
+ yield Message.Directory, post
+
+ if content["visible_status"] != "visible":
+ self.log.warning(
+ "Unable to download '%s' files from "
+ "%s#post-content-id-%s", content["visible_status"],
+ post["post_url"], content["id"])
+
+ for url in self._get_content_urls(post, content):
+ text.nameext_from_url(
+ post["content_filename"] or url, post)
+ post["file_url"] = url
+ post["num"] += 1
+ yield Message.Url, url, post
def posts(self):
"""Return post IDs"""
@@ -71,7 +93,7 @@ class FantiaExtractor(Extractor):
"""Fetch and process post data"""
url = self.root+"/api/v1/posts/"+post_id
resp = self.request(url, headers=self.headers).json()["post"]
- post = {
+ return {
"post_id": resp["id"],
"post_url": self.root + "/posts/" + str(resp["id"]),
"post_title": resp["title"],
@@ -85,55 +107,65 @@ class FantiaExtractor(Extractor):
"fanclub_user_name": resp["fanclub"]["user"]["name"],
"fanclub_name": resp["fanclub"]["name"],
"fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
- "tags": resp["tags"]
+ "tags": resp["tags"],
+ "_data": resp,
}
- return resp, post
- def _get_urls_from_post(self, resp, post):
+ def _get_post_contents(self, post):
+ contents = post["_data"]["post_contents"]
+
+ try:
+ url = post["_data"]["thumb"]["original"]
+ except Exception:
+ pass
+ else:
+ contents.insert(0, {
+ "id": "thumb",
+ "title": "thumb",
+ "category": "thumb",
+ "download_uri": url,
+ "visible_status": "visible",
+ "plan": None,
+ })
+
+ return contents
+
+ def _get_content_urls(self, post, content):
"""Extract individual URL data from the response"""
- if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]:
- post["content_filename"] = ""
- post["content_category"] = "thumb"
- post["file_id"] = "thumb"
- yield resp["thumb"]["original"], post
-
- for content in resp["post_contents"]:
- post["content_category"] = content["category"]
- post["content_title"] = content["title"]
- post["content_filename"] = content.get("filename", "")
- post["content_id"] = content["id"]
-
- if "comment" in content:
- post["content_comment"] = content["comment"]
-
- if "post_content_photos" in content:
- for photo in content["post_content_photos"]:
- post["file_id"] = photo["id"]
- yield photo["url"]["original"], post
-
- if "download_uri" in content:
- post["file_id"] = content["id"]
- yield self.root+"/"+content["download_uri"], post
-
- if content["category"] == "blog" and "comment" in content:
- comment_json = util.json_loads(content["comment"])
- ops = comment_json.get("ops", ())
-
- # collect blogpost text first
- blog_text = ""
- for op in ops:
- insert = op.get("insert")
- if isinstance(insert, str):
- blog_text += insert
- post["blogpost_text"] = blog_text
-
- # collect images
- for op in ops:
- insert = op.get("insert")
- if isinstance(insert, dict) and "fantiaImage" in insert:
- img = insert["fantiaImage"]
- post["file_id"] = img["id"]
- yield "https://fantia.jp" + img["original_url"], post
+ if "comment" in content:
+ post["content_comment"] = content["comment"]
+
+ if "post_content_photos" in content:
+ for photo in content["post_content_photos"]:
+ post["file_id"] = photo["id"]
+ yield photo["url"]["original"]
+
+ if "download_uri" in content:
+ post["file_id"] = content["id"]
+ url = content["download_uri"]
+ if url[0] == "/":
+ url = self.root + url
+ yield url
+
+ if content["category"] == "blog" and "comment" in content:
+ comment_json = util.json_loads(content["comment"])
+ ops = comment_json.get("ops") or ()
+
+ # collect blogpost text first
+ blog_text = ""
+ for op in ops:
+ insert = op.get("insert")
+ if isinstance(insert, str):
+ blog_text += insert
+ post["blogpost_text"] = blog_text
+
+ # collect images
+ for op in ops:
+ insert = op.get("insert")
+ if isinstance(insert, dict) and "fantiaImage" in insert:
+ img = insert["fantiaImage"]
+ post["file_id"] = img["id"]
+ yield self.root + img["original_url"]
class FantiaCreatorExtractor(FantiaExtractor):