New upstream version 1.26.0.upstream/1.26.0

author: Unit 193 <unit193@unit193.net> 2023-10-03 18:31:58 -0400
committer: Unit 193 <unit193@unit193.net> 2023-10-03 18:31:58 -0400
commit: b8758ecd073910ce3220b2e68399147b425c37b8 (patch)
tree: d6aee20213508c8f425cbacb3d714367eca904c5 /gallery_dl/extractor/fantia.py
parent: e2f67519f8c1750a71aab3dc56b8345fff21bac5 (diff)
1 files changed, 34 insertions, 58 deletions
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index f92b904..f1d51e2 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -19,13 +19,12 @@ class FantiaExtractor(Extractor):
     archive_fmt = "{post_id}_{file_id}"
     _warning = True
 
-    def items(self):
+    def _init(self):
         self.headers = {
             "Accept" : "application/json, text/plain, */*",
-            "Referer": self.root,
             "X-Requested-With": "XMLHttpRequest",
         }
-        _empty_plan = {
+        self._empty_plan = {
             "id"   : 0,
             "price": 0,
             "limit": 0,
@@ -33,22 +32,18 @@ class FantiaExtractor(Extractor):
             "description": "",
             "thumb": self.root + "/images/fallback/plan/thumb_default.png",
         }
-
         if self._warning:
-            if not self._check_cookies(("_session_id",)):
+            if not self.cookies_check(("_session_id",)):
                 self.log.warning("no '_session_id' cookie set")
             FantiaExtractor._warning = False
 
+    def items(self):
         for post_id in self.posts():
             post = self._get_post_data(post_id)
             post["num"] = 0
 
             for content in self._get_post_contents(post):
-                post["content_category"] = content["category"]
-                post["content_title"] = content["title"]
-                post["content_filename"] = content.get("filename", "")
-                post["content_id"] = content["id"]
-                post["plan"] = content["plan"] or _empty_plan
+                files = self._process_content(post, content)
                 yield Message.Directory, post
 
                 if content["visible_status"] != "visible":
@@ -57,23 +52,21 @@ class FantiaExtractor(Extractor):
                         "%s#post-content-id-%s", content["visible_status"],
                         post["post_url"], content["id"])
 
-                for url in self._get_content_urls(post, content):
-                    text.nameext_from_url(
-                        post["content_filename"] or url, post)
-                    post["file_url"] = url
+                for file in files:
+                    post.update(file)
                     post["num"] += 1
-                    yield Message.Url, url, post
+                    text.nameext_from_url(
+                        post["content_filename"] or file["file_url"], post)
+                    yield Message.Url, file["file_url"], post
 
     def posts(self):
         """Return post IDs"""
 
     def _pagination(self, url):
         params = {"page": 1}
-        headers = self.headers.copy()
-        del headers["X-Requested-With"]
 
         while True:
-            page = self.request(url, params=params, headers=headers).text
+            page = self.request(url, params=params).text
             self._csrf_token(page)
 
             post_id = None
@@ -132,59 +125,52 @@ class FantiaExtractor(Extractor):
 
         return contents
 
-    def _get_content_urls(self, post, content):
-        """Extract individual URL data from the response"""
-        if "comment" in content:
-            post["content_comment"] = content["comment"]
+    def _process_content(self, post, content):
+        post["content_category"] = content["category"]
+        post["content_title"] = content["title"]
+        post["content_filename"] = content.get("filename") or ""
+        post["content_id"] = content["id"]
+        post["content_comment"] = content.get("comment") or ""
+        post["plan"] = content["plan"] or self._empty_plan
+
+        files = []
 
         if "post_content_photos" in content:
             for photo in content["post_content_photos"]:
-                post["file_id"] = photo["id"]
-                yield photo["url"]["original"]
+                files.append({"file_id" : photo["id"],
+                              "file_url": photo["url"]["original"]})
 
         if "download_uri" in content:
-            post["file_id"] = content["id"]
             url = content["download_uri"]
             if url[0] == "/":
                 url = self.root + url
-            yield url
+            files.append({"file_id" : content["id"],
+                          "file_url": url})
 
         if content["category"] == "blog" and "comment" in content:
             comment_json = util.json_loads(content["comment"])
-            ops = comment_json.get("ops") or ()
 
-            # collect blogpost text first
             blog_text = ""
-            for op in ops:
+            for op in comment_json.get("ops") or ():
                 insert = op.get("insert")
                 if isinstance(insert, str):
                     blog_text += insert
+                elif isinstance(insert, dict) and "fantiaImage" in insert:
+                    img = insert["fantiaImage"]
+                    files.append({"file_id" : img["id"],
+                                  "file_url": self.root + img["original_url"]})
             post["blogpost_text"] = blog_text
+        else:
+            post["blogpost_text"] = ""
 
-            # collect images
-            for op in ops:
-                insert = op.get("insert")
-                if isinstance(insert, dict) and "fantiaImage" in insert:
-                    img = insert["fantiaImage"]
-                    post["file_id"] = img["id"]
-                    yield self.root + img["original_url"]
+        return files
 
 
 class FantiaCreatorExtractor(FantiaExtractor):
     """Extractor for a Fantia creator's works"""
     subcategory = "creator"
     pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)"
-    test = (
-        ("https://fantia.jp/fanclubs/6939", {
-            "range": "1-25",
-            "count": ">= 25",
-            "keyword": {
-                "fanclub_user_id" : 52152,
-                "tags"            : list,
-                "title"           : str,
-            },
-        }),
-    )
+    example = "https://fantia.jp/fanclubs/12345"
 
     def __init__(self, match):
         FantiaExtractor.__init__(self, match)
@@ -199,17 +185,7 @@ class FantiaPostExtractor(FantiaExtractor):
     """Extractor for media from a single Fantia post"""
     subcategory = "post"
     pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)"
-    test = (
-        ("https://fantia.jp/posts/508363", {
-            "count": 6,
-            "keyword": {
-                "post_title": "zunda逆バニーでおしりｺｯｼｮﾘ",
-                "tags": list,
-                "rating": "adult",
-                "post_id": 508363
-            },
-        }),
-    )
+    example = "https://fantia.jp/posts/12345"
 
     def __init__(self, match):
         FantiaExtractor.__init__(self, match)
author	Unit 193 <unit193@unit193.net>	2023-10-03 18:31:58 -0400
committer	Unit 193 <unit193@unit193.net>	2023-10-03 18:31:58 -0400
commit	b8758ecd073910ce3220b2e68399147b425c37b8 (patch)
tree	d6aee20213508c8f425cbacb3d714367eca904c5 /gallery_dl/extractor/fantia.py
parent	e2f67519f8c1750a71aab3dc56b8345fff21bac5 (diff)