From b5e56c51e491b41f9eb6a895459c185788a377e5 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 12 Aug 2024 02:42:36 -0400 Subject: New upstream version 1.27.3. --- gallery_dl/extractor/behance.py | 15 ++++++++++++- gallery_dl/extractor/bunkr.py | 10 ++++++++- gallery_dl/extractor/cien.py | 2 +- gallery_dl/extractor/deviantart.py | 17 +-------------- gallery_dl/extractor/fanbox.py | 16 ++++++++++++-- gallery_dl/extractor/furaffinity.py | 40 +++++++++++++++++++++++++++------- gallery_dl/extractor/hentaicosplays.py | 10 +++++---- gallery_dl/extractor/hotleak.py | 2 +- gallery_dl/extractor/instagram.py | 8 +++---- gallery_dl/extractor/twitter.py | 5 +++++ gallery_dl/extractor/zerochan.py | 8 +++++-- gallery_dl/postprocessor/metadata.py | 23 ++++++++++++++++--- gallery_dl/version.py | 2 +- 13 files changed, 114 insertions(+), 44 deletions(-) (limited to 'gallery_dl') diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index f24059f..72f9195 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -49,7 +49,7 @@ class BehanceExtractor(Extractor): def _update(self, data): # compress data to simple lists - if data["fields"] and isinstance(data["fields"][0], dict): + if data.get("fields") and isinstance(data["fields"][0], dict): data["fields"] = [ field.get("name") or field.get("label") for field in data["fields"] @@ -164,6 +164,19 @@ class BehanceGalleryExtractor(BehanceExtractor): append((size["url"], module)) elif mtype == "video": + try: + url = text.extr(module["embed"], 'src="', '"') + page = self.request(text.unescape(url)).text + + url = text.extr(page, '', ''))[0] - files = self._extract_files(post.get("articleBody") or page) + files = self._extract_files(page) post["post_url"] = url post["post_id"] = text.parse_int(self.groups[1]) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a70710c..f3ea4e7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache, memcache import collections -import itertools import mimetypes import binascii import time @@ -246,7 +245,6 @@ class DeviantartExtractor(Extractor): deviation["username"] = deviation["author"]["username"] deviation["_username"] = deviation["username"].lower() - deviation["da_category"] = deviation["category"] deviation["published_time"] = text.parse_int( deviation["published_time"]) deviation["date"] = text.parse_timestamp( @@ -301,15 +299,6 @@ class DeviantartExtractor(Extractor): ) else: needle = '
' - catlist = deviation["category_path"].split("/") - categories = " / ".join( - ('{}' - '').format(self.root, cpath, cat.capitalize()) - for cat, cpath in zip( - catlist, - itertools.accumulate(catlist, lambda t, c: t + "/" + c) - ) - ) username = deviation["author"]["username"] urlname = deviation.get("username") or username.lower() header = HEADER_TEMPLATE.format( @@ -318,7 +307,6 @@ class DeviantartExtractor(Extractor): userurl="{}/{}/".format(self.root, urlname), username=username, date=deviation["date"], - categories=categories, ) if needle in html: @@ -624,7 +612,7 @@ class DeviantartAvatarExtractor(DeviantartExtractor): def _make_deviation(self, url, user, index, fmt): return { "author" : user, - "category" : "avatar", + "da_category" : "avatar", "index" : text.parse_int(index), "is_deleted" : False, "is_downloadable": False, @@ -1773,9 +1761,6 @@ HEADER_TEMPLATE = """
, {date} -
  • - {categories} -
  • """ diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index d81fd0b..d8337b6 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -309,8 +309,20 @@ class FanboxCreatorExtractor(FanboxExtractor): self.creator_id = match.group(1) or match.group(2) def posts(self): - url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10" - return self._pagination(url.format(self.creator_id)) + url = "https://api.fanbox.cc/post.paginateCreator?creatorId=" + return self._pagination_creator(url + self.creator_id) + + def _pagination_creator(self, url): + urls = self.request(url, headers=self.headers).json()["body"] + for url in urls: + url = text.ensure_http_scheme(url) + body = self.request(url, headers=self.headers).json()["body"] + for item in body: + try: + yield self._get_post_data(item["id"]) + except Exception as exc: + self.log.warning("Skipping post %s (%s: %s)", + item["id"], exc.__class__.__name__, exc) class FanboxPostExtractor(FanboxExtractor): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index f48a984..3055426 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -78,14 +78,12 @@ class FuraffinityExtractor(Extractor): path = extr('href="//d', '"') if not path: - self.log.warning( - "Unable to download post %s (\"%s\")", - post_id, text.remove_html( - extr('System Message', '') or - extr('System Message', '') - ) - ) - return None + msg = text.remove_html( + extr('System Message', '') or + extr('System Message', '') + ).partition(" . Continue ")[0] + return self.log.warning( + "Unable to download post %s (\"%s\")", post_id, msg) pi = text.parse_int rh = text.remove_html @@ -335,3 +333,29 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor): if url.endswith(path): return url = self.root + path + + +class FuraffinitySubmissionsExtractor(FuraffinityExtractor): + """Extractor for new furaffinity submissions""" + subcategory = "submissions" + pattern = BASE_PATTERN + r"(/msg/submissions(?:/[^/?#]+)?)" + example = "https://www.furaffinity.net/msg/submissions" + + def posts(self): + self.user = None + url = self.root + self.groups[0] + return self._pagination_submissions(url) + + def _pagination_submissions(self, url): + while True: + page = self.request(url).text + + for post_id in text.extract_iter(page, 'id="sid-', '"'): + yield post_id + + path = (text.extr(page, '= 300: url = text.urljoin(self.root, response.headers["location"]) - response = self.request(url, params=params) - data = response.json() + self.log.warning("HTTP redirect to %s", url) + if self.config("redirects"): + continue + raise exception.StopExtraction() + data = response.json() try: posts = data["items"] except Exception: diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index a520a34..e89b170 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -55,6 +55,20 @@ class MetadataPP(PostProcessor): self._json_encode = self._make_encoder(options, 4).encode ext = "json" + base_directory = options.get("base-directory") + if base_directory: + if base_directory is True: + self._base = lambda p: p.basedirectory + else: + sep = os.sep + altsep = os.altsep + base_directory = util.expand_path(base_directory) + if altsep and altsep in base_directory: + base_directory = base_directory.replace(altsep, sep) + if base_directory[-1] != sep: + base_directory += sep + self._base = lambda p: base_directory + directory = options.get("directory") if isinstance(directory, list): self._directory = self._directory_format @@ -147,11 +161,14 @@ class MetadataPP(PostProcessor): except Exception: pass - def _directory(self, pathfmt): + def _base(self, pathfmt): return pathfmt.realdirectory + def _directory(self, pathfmt): + return self._base(pathfmt) + def _directory_custom(self, pathfmt): - return os.path.join(pathfmt.realdirectory, self._metadir) + return os.path.join(self._base(pathfmt), self._metadir) def _directory_format(self, pathfmt): formatters = pathfmt.directory_formatters @@ -161,7 +178,7 @@ class MetadataPP(PostProcessor): pathfmt.directory_conditions = () segments = pathfmt.build_directory(pathfmt.kwdict) directory = pathfmt.clean_path(os.sep.join(segments) + os.sep) - return os.path.join(pathfmt.realdirectory, directory) + return os.path.join(self._base(pathfmt), directory) finally: pathfmt.directory_conditions = conditions pathfmt.directory_formatters = formatters diff --git a/gallery_dl/version.py b/gallery_dl/version.py index f234af1..f2462ee 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.27.2" +__version__ = "1.27.3" __variant__ = None -- cgit v1.2.3