From b5e56c51e491b41f9eb6a895459c185788a377e5 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 12 Aug 2024 02:42:36 -0400 Subject: New upstream version 1.27.3. --- gallery_dl/extractor/behance.py | 15 ++++++++++++- gallery_dl/extractor/bunkr.py | 10 ++++++++- gallery_dl/extractor/cien.py | 2 +- gallery_dl/extractor/deviantart.py | 17 +-------------- gallery_dl/extractor/fanbox.py | 16 ++++++++++++-- gallery_dl/extractor/furaffinity.py | 40 +++++++++++++++++++++++++++------- gallery_dl/extractor/hentaicosplays.py | 10 +++++---- gallery_dl/extractor/hotleak.py | 2 +- gallery_dl/extractor/instagram.py | 8 +++---- gallery_dl/extractor/twitter.py | 5 +++++ gallery_dl/extractor/zerochan.py | 8 +++++-- 11 files changed, 93 insertions(+), 40 deletions(-) (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index f24059f..72f9195 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -49,7 +49,7 @@ class BehanceExtractor(Extractor): def _update(self, data): # compress data to simple lists - if data["fields"] and isinstance(data["fields"][0], dict): + if data.get("fields") and isinstance(data["fields"][0], dict): data["fields"] = [ field.get("name") or field.get("label") for field in data["fields"] @@ -164,6 +164,19 @@ class BehanceGalleryExtractor(BehanceExtractor): append((size["url"], module)) elif mtype == "video": + try: + url = text.extr(module["embed"], 'src="', '"') + page = self.request(text.unescape(url)).text + + url = text.extr(page, '', ''))[0] - files = self._extract_files(post.get("articleBody") or page) + files = self._extract_files(page) post["post_url"] = url post["post_id"] = text.parse_int(self.groups[1]) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a70710c..f3ea4e7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache, memcache import collections -import itertools import mimetypes import binascii import time @@ -246,7 +245,6 @@ class DeviantartExtractor(Extractor): deviation["username"] = deviation["author"]["username"] deviation["_username"] = deviation["username"].lower() - deviation["da_category"] = deviation["category"] deviation["published_time"] = text.parse_int( deviation["published_time"]) deviation["date"] = text.parse_timestamp( @@ -301,15 +299,6 @@ class DeviantartExtractor(Extractor): ) else: needle = '
' - catlist = deviation["category_path"].split("/") - categories = " / ".join( - ('{}' - '').format(self.root, cpath, cat.capitalize()) - for cat, cpath in zip( - catlist, - itertools.accumulate(catlist, lambda t, c: t + "/" + c) - ) - ) username = deviation["author"]["username"] urlname = deviation.get("username") or username.lower() header = HEADER_TEMPLATE.format( @@ -318,7 +307,6 @@ class DeviantartExtractor(Extractor): userurl="{}/{}/".format(self.root, urlname), username=username, date=deviation["date"], - categories=categories, ) if needle in html: @@ -624,7 +612,7 @@ class DeviantartAvatarExtractor(DeviantartExtractor): def _make_deviation(self, url, user, index, fmt): return { "author" : user, - "category" : "avatar", + "da_category" : "avatar", "index" : text.parse_int(index), "is_deleted" : False, "is_downloadable": False, @@ -1773,9 +1761,6 @@ HEADER_TEMPLATE = """
, {date} -
  • - {categories} -
  • """ diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index d81fd0b..d8337b6 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -309,8 +309,20 @@ class FanboxCreatorExtractor(FanboxExtractor): self.creator_id = match.group(1) or match.group(2) def posts(self): - url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10" - return self._pagination(url.format(self.creator_id)) + url = "https://api.fanbox.cc/post.paginateCreator?creatorId=" + return self._pagination_creator(url + self.creator_id) + + def _pagination_creator(self, url): + urls = self.request(url, headers=self.headers).json()["body"] + for url in urls: + url = text.ensure_http_scheme(url) + body = self.request(url, headers=self.headers).json()["body"] + for item in body: + try: + yield self._get_post_data(item["id"]) + except Exception as exc: + self.log.warning("Skipping post %s (%s: %s)", + item["id"], exc.__class__.__name__, exc) class FanboxPostExtractor(FanboxExtractor): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index f48a984..3055426 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -78,14 +78,12 @@ class FuraffinityExtractor(Extractor): path = extr('href="//d', '"') if not path: - self.log.warning( - "Unable to download post %s (\"%s\")", - post_id, text.remove_html( - extr('System Message', '') or - extr('System Message', '') - ) - ) - return None + msg = text.remove_html( + extr('System Message', '') or + extr('System Message', '') + ).partition(" . Continue ")[0] + return self.log.warning( + "Unable to download post %s (\"%s\")", post_id, msg) pi = text.parse_int rh = text.remove_html @@ -335,3 +333,29 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor): if url.endswith(path): return url = self.root + path + + +class FuraffinitySubmissionsExtractor(FuraffinityExtractor): + """Extractor for new furaffinity submissions""" + subcategory = "submissions" + pattern = BASE_PATTERN + r"(/msg/submissions(?:/[^/?#]+)?)" + example = "https://www.furaffinity.net/msg/submissions" + + def posts(self): + self.user = None + url = self.root + self.groups[0] + return self._pagination_submissions(url) + + def _pagination_submissions(self, url): + while True: + page = self.request(url).text + + for post_id in text.extract_iter(page, 'id="sid-', '"'): + yield post_id + + path = (text.extr(page, '= 300: url = text.urljoin(self.root, response.headers["location"]) - response = self.request(url, params=params) - data = response.json() + self.log.warning("HTTP redirect to %s", url) + if self.config("redirects"): + continue + raise exception.StopExtraction() + data = response.json() try: posts = data["items"] except Exception: -- cgit v1.2.3