diff options
| author | 2023-10-03 18:31:58 -0400 | |
|---|---|---|
| committer | 2023-10-03 18:31:58 -0400 | |
| commit | b8758ecd073910ce3220b2e68399147b425c37b8 (patch) | |
| tree | d6aee20213508c8f425cbacb3d714367eca904c5 /gallery_dl/extractor/fantia.py | |
| parent | e2f67519f8c1750a71aab3dc56b8345fff21bac5 (diff) | |
New upstream version 1.26.0.upstream/1.26.0
Diffstat (limited to 'gallery_dl/extractor/fantia.py')
| -rw-r--r-- | gallery_dl/extractor/fantia.py | 92 |
1 files changed, 34 insertions, 58 deletions
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py index f92b904..f1d51e2 100644 --- a/gallery_dl/extractor/fantia.py +++ b/gallery_dl/extractor/fantia.py @@ -19,13 +19,12 @@ class FantiaExtractor(Extractor): archive_fmt = "{post_id}_{file_id}" _warning = True - def items(self): + def _init(self): self.headers = { "Accept" : "application/json, text/plain, */*", - "Referer": self.root, "X-Requested-With": "XMLHttpRequest", } - _empty_plan = { + self._empty_plan = { "id" : 0, "price": 0, "limit": 0, @@ -33,22 +32,18 @@ class FantiaExtractor(Extractor): "description": "", "thumb": self.root + "/images/fallback/plan/thumb_default.png", } - if self._warning: - if not self._check_cookies(("_session_id",)): + if not self.cookies_check(("_session_id",)): self.log.warning("no '_session_id' cookie set") FantiaExtractor._warning = False + def items(self): for post_id in self.posts(): post = self._get_post_data(post_id) post["num"] = 0 for content in self._get_post_contents(post): - post["content_category"] = content["category"] - post["content_title"] = content["title"] - post["content_filename"] = content.get("filename", "") - post["content_id"] = content["id"] - post["plan"] = content["plan"] or _empty_plan + files = self._process_content(post, content) yield Message.Directory, post if content["visible_status"] != "visible": @@ -57,23 +52,21 @@ class FantiaExtractor(Extractor): "%s#post-content-id-%s", content["visible_status"], post["post_url"], content["id"]) - for url in self._get_content_urls(post, content): - text.nameext_from_url( - post["content_filename"] or url, post) - post["file_url"] = url + for file in files: + post.update(file) post["num"] += 1 - yield Message.Url, url, post + text.nameext_from_url( + post["content_filename"] or file["file_url"], post) + yield Message.Url, file["file_url"], post def posts(self): """Return post IDs""" def _pagination(self, url): params = {"page": 1} - headers = self.headers.copy() - del headers["X-Requested-With"] while True: - page = self.request(url, params=params, headers=headers).text + page = self.request(url, params=params).text self._csrf_token(page) post_id = None @@ -132,59 +125,52 @@ class FantiaExtractor(Extractor): return contents - def _get_content_urls(self, post, content): - """Extract individual URL data from the response""" - if "comment" in content: - post["content_comment"] = content["comment"] + def _process_content(self, post, content): + post["content_category"] = content["category"] + post["content_title"] = content["title"] + post["content_filename"] = content.get("filename") or "" + post["content_id"] = content["id"] + post["content_comment"] = content.get("comment") or "" + post["plan"] = content["plan"] or self._empty_plan + + files = [] if "post_content_photos" in content: for photo in content["post_content_photos"]: - post["file_id"] = photo["id"] - yield photo["url"]["original"] + files.append({"file_id" : photo["id"], + "file_url": photo["url"]["original"]}) if "download_uri" in content: - post["file_id"] = content["id"] url = content["download_uri"] if url[0] == "/": url = self.root + url - yield url + files.append({"file_id" : content["id"], + "file_url": url}) if content["category"] == "blog" and "comment" in content: comment_json = util.json_loads(content["comment"]) - ops = comment_json.get("ops") or () - # collect blogpost text first blog_text = "" - for op in ops: + for op in comment_json.get("ops") or (): insert = op.get("insert") if isinstance(insert, str): blog_text += insert + elif isinstance(insert, dict) and "fantiaImage" in insert: + img = insert["fantiaImage"] + files.append({"file_id" : img["id"], + "file_url": self.root + img["original_url"]}) post["blogpost_text"] = blog_text + else: + post["blogpost_text"] = "" - # collect images - for op in ops: - insert = op.get("insert") - if isinstance(insert, dict) and "fantiaImage" in insert: - img = insert["fantiaImage"] - post["file_id"] = img["id"] - yield self.root + img["original_url"] + return files class FantiaCreatorExtractor(FantiaExtractor): """Extractor for a Fantia creator's works""" subcategory = "creator" pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)" - test = ( - ("https://fantia.jp/fanclubs/6939", { - "range": "1-25", - "count": ">= 25", - "keyword": { - "fanclub_user_id" : 52152, - "tags" : list, - "title" : str, - }, - }), - ) + example = "https://fantia.jp/fanclubs/12345" def __init__(self, match): FantiaExtractor.__init__(self, match) @@ -199,17 +185,7 @@ class FantiaPostExtractor(FantiaExtractor): """Extractor for media from a single Fantia post""" subcategory = "post" pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)" - test = ( - ("https://fantia.jp/posts/508363", { - "count": 6, - "keyword": { - "post_title": "zunda逆バニーでおしりコッショリ", - "tags": list, - "rating": "adult", - "post_id": 508363 - }, - }), - ) + example = "https://fantia.jp/posts/12345" def __init__(self, match): FantiaExtractor.__init__(self, match) |
