New upstream version 1.27.3.upstream/1.27.3

author: Unit 193 <unit193@unit193.net> 2024-08-12 02:42:36 -0400
committer: Unit 193 <unit193@unit193.net> 2024-08-12 02:42:36 -0400
commit: b5e56c51e491b41f9eb6a895459c185788a377e5 (patch)
tree: f933c7df043d8949e0dc39b560ab534a5d0dc60f /gallery_dl/extractor
parent: 032e5bed275a253e122ed9ac86dac7b8c4204172 (diff)
11 files changed, 93 insertions, 40 deletions
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index f24059f..72f9195 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -49,7 +49,7 @@ class BehanceExtractor(Extractor):
 
     def _update(self, data):
         # compress data to simple lists
-        if data["fields"] and isinstance(data["fields"][0], dict):
+        if data.get("fields") and isinstance(data["fields"][0], dict):
             data["fields"] = [
                 field.get("name") or field.get("label")
                 for field in data["fields"]
@@ -165,6 +165,19 @@ class BehanceGalleryExtractor(BehanceExtractor):
 
             elif mtype == "video":
                 try:
+                    url = text.extr(module["embed"], 'src="', '"')
+                    page = self.request(text.unescape(url)).text
+
+                    url = text.extr(page, '<source src="', '"')
+                    if text.ext_from_url(url) == "m3u8":
+                        url = "ytdl:" + url
+                        module["extension"] = "mp4"
+                    append((url, module))
+                    continue
+                except Exception as exc:
+                    self.log.debug("%s: %s", exc.__class__.__name__, exc)
+
+                try:
                     renditions = module["videoData"]["renditions"]
                 except Exception:
                     self.log.warning("No download URLs for video %s",
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 77f0de6..240bbd3 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -13,7 +13,7 @@ from .. import text
 
 BASE_PATTERN = (
     r"(?:https?://)?(?:app\.)?(bunkr+"
-    r"\.(?:s[kiu]|fi|ru|la|is|to|ac|black|cat|media|red|site|ws))"
+    r"\.(?:s[kiu]|[cf]i|ru|la|is|to|ac|black|cat|media|red|site|ws|org))"
 )
 
 LEGACY_DOMAINS = {
@@ -55,6 +55,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             "album_name" : text.unescape(info[0]),
             "album_size" : size[1:-1],
             "count"      : len(urls),
+            "_http_validate": self._validate,
         }
 
     def _extract_files(self, urls):
@@ -74,6 +75,12 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
             text.rextract(page, ' href="', '"', page.rindex("Download"))[0]
         )
 
+    def _validate(self, response):
+        if response.history and response.url.endswith("/maintenance-vid.mp4"):
+            self.log.warning("File server in maintenance mode")
+            return False
+        return True
+
 
 class BunkrMediaExtractor(BunkrAlbumExtractor):
     """Extractor for bunkr.sk media links"""
@@ -95,4 +102,5 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
             "album_size" : -1,
             "description": "",
             "count"      : 1,
+            "_http_validate": self._validate,
         }
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
index bae86d0..378365e 100644
--- a/gallery_dl/extractor/cien.py
+++ b/gallery_dl/extractor/cien.py
@@ -59,7 +59,7 @@ class CienArticleExtractor(CienExtractor):
         post = util.json_loads(text.extr(
             page, '<script type="application/ld+json">', '</script>'))[0]
 
-        files = self._extract_files(post.get("articleBody") or page)
+        files = self._extract_files(page)
 
         post["post_url"] = url
         post["post_id"] = text.parse_int(self.groups[1])
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index a70710c..f3ea4e7 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -12,7 +12,6 @@ from .common import Extractor, Message
 from .. import text, util, exception
 from ..cache import cache, memcache
 import collections
-import itertools
 import mimetypes
 import binascii
 import time
@@ -246,7 +245,6 @@ class DeviantartExtractor(Extractor):
             deviation["username"] = deviation["author"]["username"]
             deviation["_username"] = deviation["username"].lower()
 
-        deviation["da_category"] = deviation["category"]
         deviation["published_time"] = text.parse_int(
             deviation["published_time"])
         deviation["date"] = text.parse_timestamp(
@@ -301,15 +299,6 @@ class DeviantartExtractor(Extractor):
             )
         else:
             needle = '<div usr class="gr">'
-            catlist = deviation["category_path"].split("/")
-            categories = " / ".join(
-                ('<span class="crumb"><a href="{}/{}/"><span>{}</span></a>'
-                 '</span>').format(self.root, cpath, cat.capitalize())
-                for cat, cpath in zip(
-                    catlist,
-                    itertools.accumulate(catlist, lambda t, c: t + "/" + c)
-                )
-            )
             username = deviation["author"]["username"]
             urlname = deviation.get("username") or username.lower()
             header = HEADER_TEMPLATE.format(
@@ -318,7 +307,6 @@ class DeviantartExtractor(Extractor):
                 userurl="{}/{}/".format(self.root, urlname),
                 username=username,
                 date=deviation["date"],
-                categories=categories,
             )
 
         if needle in html:
@@ -624,7 +612,7 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
     def _make_deviation(self, url, user, index, fmt):
         return {
             "author"         : user,
-            "category"       : "avatar",
+            "da_category"    : "avatar",
             "index"          : text.parse_int(index),
             "is_deleted"     : False,
             "is_downloadable": False,
@@ -1773,9 +1761,6 @@ HEADER_TEMPLATE = """<div usr class="gr">
 <span class="user-symbol regular"></span></span></span>,
             <span>{date}</span>
         </li>
-        <li class="category">
-            {categories}
-        </li>
     </ul>
 </div>
 """
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index d81fd0b..d8337b6 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -309,8 +309,20 @@ class FanboxCreatorExtractor(FanboxExtractor):
         self.creator_id = match.group(1) or match.group(2)
 
     def posts(self):
-        url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10"
-        return self._pagination(url.format(self.creator_id))
+        url = "https://api.fanbox.cc/post.paginateCreator?creatorId="
+        return self._pagination_creator(url + self.creator_id)
+
+    def _pagination_creator(self, url):
+        urls = self.request(url, headers=self.headers).json()["body"]
+        for url in urls:
+            url = text.ensure_http_scheme(url)
+            body = self.request(url, headers=self.headers).json()["body"]
+            for item in body:
+                try:
+                    yield self._get_post_data(item["id"])
+                except Exception as exc:
+                    self.log.warning("Skipping post %s (%s: %s)",
+                                     item["id"], exc.__class__.__name__, exc)
 
 
 class FanboxPostExtractor(FanboxExtractor):
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index f48a984..3055426 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -78,14 +78,12 @@ class FuraffinityExtractor(Extractor):
 
         path = extr('href="//d', '"')
         if not path:
-            self.log.warning(
-                "Unable to download post %s (\"%s\")",
-                post_id, text.remove_html(
-                    extr('System Message', '</section>') or
-                    extr('System Message', '</table>')
-                )
-            )
-            return None
+            msg = text.remove_html(
+                extr('System Message', '</section>') or
+                extr('System Message', '</table>')
+            ).partition(" . Continue ")[0]
+            return self.log.warning(
+                "Unable to download post %s (\"%s\")", post_id, msg)
 
         pi = text.parse_int
         rh = text.remove_html
@@ -335,3 +333,29 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor):
             if url.endswith(path):
                 return
             url = self.root + path
+
+
+class FuraffinitySubmissionsExtractor(FuraffinityExtractor):
+    """Extractor for new furaffinity submissions"""
+    subcategory = "submissions"
+    pattern = BASE_PATTERN + r"(/msg/submissions(?:/[^/?#]+)?)"
+    example = "https://www.furaffinity.net/msg/submissions"
+
+    def posts(self):
+        self.user = None
+        url = self.root + self.groups[0]
+        return self._pagination_submissions(url)
+
+    def _pagination_submissions(self, url):
+        while True:
+            page = self.request(url).text
+
+            for post_id in text.extract_iter(page, 'id="sid-', '"'):
+                yield post_id
+
+            path = (text.extr(page, '<a class="button standard more" href="', '"') or  # noqa 501
+                    text.extr(page, '<a class="more-half" href="', '"') or
+                    text.extr(page, '<a class="more" href="', '"'))
+            if not path:
+                return
+            url = self.root + text.unescape(path)
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index d5ff8c8..fbbae16 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -4,7 +4,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://hentai-cosplays.com/
+"""Extractors for https://hentai-cosplay-xxx.com/
 (also works for hentai-img.com and porn-images-xxx.com)"""
 
 from .common import GalleryExtractor
@@ -13,19 +13,21 @@ from .. import text
 
 class HentaicosplaysGalleryExtractor(GalleryExtractor):
     """Extractor for image galleries from
-    hentai-cosplays.com, hentai-img.com, and porn-images-xxx.com"""
+    hentai-cosplay-xxx.com, hentai-img.com, and porn-images-xxx.com"""
     category = "hentaicosplays"
     directory_fmt = ("{site}", "{title}")
     filename_fmt = "{filename}.{extension}"
     archive_fmt = "{title}_{filename}"
     pattern = r"((?:https?://)?(?:\w{2}\.)?" \
-              r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com)/" \
+              r"(hentai-cosplay(?:s|-xxx)|hentai-img|porn-images-xxx)\.com)/" \
               r"(?:image|story)/([\w-]+)"
-    example = "https://hentai-cosplays.com/image/TITLE/"
+    example = "https://hentai-cosplay-xxx.com/image/TITLE/"
 
     def __init__(self, match):
         root, self.site, self.slug = match.groups()
         self.root = text.ensure_http_scheme(root)
+        if self.root == "https://hentai-cosplays.com":
+            self.root = "https://hentai-cosplay-xxx.com"
         url = "{}/story/{}/".format(self.root, self.slug)
         GalleryExtractor.__init__(self, match, url)
 
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index 34fbabd..ddfc54b 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -23,7 +23,7 @@ class HotleakExtractor(Extractor):
 
     def items(self):
         for post in self.posts():
-            if self.type == "photo":
+            if not post["url"].startswith("ytdl:"):
                 post["url"] = (
                     post["url"]
                     .replace("/storage/storage/", "/storage/")
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index dbe2df3..c05fe72 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -936,23 +936,23 @@ class InstagramGraphqlAPI():
 
     def tags_media(self, tag):
         query_hash = "9b498c08113f1e09617a1703c22b2f32"
-        variables = {"tag_name": text.unescape(tag), "first": 50}
+        variables = {"tag_name": text.unescape(tag), "first": 24}
         return self._pagination(query_hash, variables,
                                 "hashtag", "edge_hashtag_to_media")
 
     def user_clips(self, user_id):
         query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
-        variables = {"id": user_id, "first": 50}
+        variables = {"id": user_id, "first": 24}
         return self._pagination(query_hash, variables)
 
     def user_feed(self, user_id):
         query_hash = "69cba40317214236af40e7efa697781d"
-        variables = {"id": user_id, "first": 50}
+        variables = {"id": user_id, "first": 24}
         return self._pagination(query_hash, variables)
 
     def user_tagged(self, user_id):
         query_hash = "be13233562af2d229b008d2976b998b5"
-        variables = {"id": user_id, "first": 50}
+        variables = {"id": user_id, "first": 24}
         return self._pagination(query_hash, variables)
 
     def _call(self, query_hash, variables):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 9fa5b3f..ea57d76 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1849,6 +1849,11 @@ def _login_impl(extr, username, password):
             url, params=params, headers=headers, json=data,
             method="POST", fatal=None)
 
+        # update 'x-csrf-token' header (#5945)
+        csrf_token = response.cookies.get("ct0")
+        if csrf_token:
+            headers["x-csrf-token"] = csrf_token
+
         try:
             data = response.json()
         except ValueError:
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 126ef49..f9b1a7f 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -198,11 +198,15 @@ class ZerochanTagExtractor(ZerochanExtractor):
 
         while True:
             response = self.request(url, params=params, allow_redirects=False)
+
             if response.status_code >= 300:
                 url = text.urljoin(self.root, response.headers["location"])
-                response = self.request(url, params=params)
-            data = response.json()
+                self.log.warning("HTTP redirect to %s", url)
+                if self.config("redirects"):
+                    continue
+                raise exception.StopExtraction()
 
+            data = response.json()
             try:
                 posts = data["items"]
             except Exception:
author	Unit 193 <unit193@unit193.net>	2024-08-12 02:42:36 -0400
committer	Unit 193 <unit193@unit193.net>	2024-08-12 02:42:36 -0400
commit	b5e56c51e491b41f9eb6a895459c185788a377e5 (patch)
tree	f933c7df043d8949e0dc39b560ab534a5d0dc60f /gallery_dl/extractor
parent	032e5bed275a253e122ed9ac86dac7b8c4204172 (diff)