diff options
| author | 2025-03-10 03:44:57 -0400 | |
|---|---|---|
| committer | 2025-03-10 03:44:57 -0400 | |
| commit | 243d1f1beb4e4eb75a524f1aff948c47761a4f1d (patch) | |
| tree | 54f7ada7698d946f410500ad14f62798ca646956 /gallery_dl/extractor | |
| parent | 889c7b8caec8fc0b9c7a583ed1d9cfa43518fc42 (diff) | |
New upstream version 1.29.1.upstream/1.29.1
Diffstat (limited to 'gallery_dl/extractor')
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 32 | ||||
| -rw-r--r-- | gallery_dl/extractor/chevereto.py | 18 | ||||
| -rw-r--r-- | gallery_dl/extractor/common.py | 23 | ||||
| -rw-r--r-- | gallery_dl/extractor/danbooru.py | 62 | ||||
| -rw-r--r-- | gallery_dl/extractor/e621.py | 9 | ||||
| -rw-r--r-- | gallery_dl/extractor/erome.py | 19 | ||||
| -rw-r--r-- | gallery_dl/extractor/furaffinity.py | 5 | ||||
| -rw-r--r-- | gallery_dl/extractor/reddit.py | 15 | ||||
| -rw-r--r-- | gallery_dl/extractor/redgifs.py | 41 | ||||
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 7 | ||||
| -rw-r--r-- | gallery_dl/extractor/tenor.py | 148 | ||||
| -rw-r--r-- | gallery_dl/extractor/tiktok.py | 7 | ||||
| -rw-r--r-- | gallery_dl/extractor/vsco.py | 17 |
14 files changed, 297 insertions, 107 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 00b22d4..8208241 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -171,6 +171,7 @@ modules = [ "tapas", "tcbscans", "telegraph", + "tenor", "tiktok", "tmohentai", "toyhouse", diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 201b8f4..d74f59c 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -11,7 +11,6 @@ from .common import Extractor from .lolisafe import LolisafeAlbumExtractor from .. import text, util, config, exception -import binascii import random if config.get(("extractor", "bunkr"), "tlds"): @@ -71,6 +70,17 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): domain = self.groups[0] or self.groups[1] if domain not in LEGACY_DOMAINS: self.root = "https://" + domain + + def _init(self): + LolisafeAlbumExtractor._init(self) + + endpoint = self.config("endpoint") + if not endpoint: + endpoint = self.root_dl + "/api/_001" + elif endpoint[0] == "/": + endpoint = self.root_dl + endpoint + + self.endpoint = endpoint self.offset = 0 def skip(self, num): @@ -169,13 +179,13 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): data_id = text.extr(page, 'data-file-id="', '"') referer = self.root_dl + "/file/" + data_id - url = self.root_dl + "/api/vs" - headers = {"Referer": referer} - data = self.request( - url, method="POST", headers=headers, json={"id": data_id}).json() + headers = {"Referer": referer, "Origin": self.root_dl} + data = self.request(self.endpoint, method="POST", headers=headers, + json={"id": data_id}).json() if data.get("encrypted"): - file_url = self._decrypt_url(data["url"], data["timestamp"]) + key = "SECRET_KEY_{}".format(data["timestamp"] // 3600) + file_url = util.decrypt_xor(data["url"], key.encode()) else: file_url = data["url"] @@ -192,16 +202,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): "_http_validate": self._validate, } - def _decrypt_url(self, encrypted_b64, timestamp): - encrypted_bytes = binascii.a2b_base64(encrypted_b64) - key = "SECRET_KEY_{}".format(timestamp // 3600).encode() - div = len(key) - - return bytes([ - encrypted_bytes[i] ^ key[i % div] - for i in range(len(encrypted_bytes)) - ]).decode() - def _validate(self, response): if response.history and response.url.endswith("/maintenance-vid.mp4"): self.log.warning("File server in maintenance mode") diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py index de22a7b..c9ccb7d 100644 --- a/gallery_dl/extractor/chevereto.py +++ b/gallery_dl/extractor/chevereto.py @@ -9,7 +9,7 @@ """Extractors for Chevereto galleries""" from .common import BaseExtractor, Message -from .. import text +from .. import text, util class CheveretoExtractor(BaseExtractor): @@ -53,12 +53,22 @@ class CheveretoImageExtractor(CheveretoExtractor): def items(self): url = self.root + self.path - extr = text.extract_from(self.request(url).text) + page = self.request(url).text + extr = text.extract_from(page) + + url = (extr('<meta property="og:image" content="', '"') or + extr('url: "', '"')) + if not url or url.endswith("/loading.svg"): + pos = page.find(" download=") + url = text.rextract(page, 'href="', '"', pos)[0] + if not url.startswith("https://"): + url = util.decrypt_xor( + url, b"seltilovessimpcity@simpcityhatesscrapers", + fromhex=True) image = { "id" : self.path.rpartition(".")[2], - "url" : (extr('<meta property="og:image" content="', '"') or - extr('url: "', '"')), + "url" : url, "album": text.extr(extr("Added to <a", "/a>"), ">", "<"), "user" : extr('username: "', '"'), } diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index d58db6f..a85eedd 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -205,25 +205,10 @@ class Extractor(): msg = "'{} {}' for '{}'".format( code, response.reason, response.url) - server = response.headers.get("Server") - if server and server.startswith("cloudflare") and \ - code in (403, 503): - mitigated = response.headers.get("cf-mitigated") - if mitigated and mitigated.lower() == "challenge": - self.log.warning("Cloudflare challenge") - break - content = response.content - if b"_cf_chl_opt" in content or b"jschl-answer" in content: - self.log.warning("Cloudflare challenge") - break - if b'name="captcha-bypass"' in content: - self.log.warning("Cloudflare CAPTCHA") - break - elif server and server.startswith("ddos-guard") and \ - code == 403: - if b"/ddos-guard/js-challenge/" in response.content: - self.log.warning("DDoS-Guard challenge") - break + + challenge = util.detect_challenge(response) + if challenge is not None: + self.log.warning(challenge) if code == 429 and self._handle_429(response): continue diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index d0a9397..8d00728 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -205,12 +205,8 @@ class DanbooruTagExtractor(DanbooruExtractor): pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)" example = "https://danbooru.donmai.us/posts?tags=TAG" - def __init__(self, match): - DanbooruExtractor.__init__(self, match) - tags = match.group(match.lastindex) - self.tags = text.unquote(tags.replace("+", " ")) - def metadata(self): + self.tags = text.unquote(self.groups[-1].replace("+", " ")) return {"search_tags": self.tags} def posts(self): @@ -235,15 +231,13 @@ class DanbooruPoolExtractor(DanbooruExtractor): """Extractor for posts from danbooru pools""" subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}") + filename_fmt = "{num:>04}_{id}_{filename}.{extension}" archive_fmt = "p_{pool[id]}_{id}" pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" example = "https://danbooru.donmai.us/pools/12345" - def __init__(self, match): - DanbooruExtractor.__init__(self, match) - self.pool_id = match.group(match.lastindex) - def metadata(self): + self.pool_id = self.groups[-1] url = "{}/pools/{}.json".format(self.root, self.pool_id) pool = self.request(url).json() pool["name"] = pool["name"].replace("_", " ") @@ -251,8 +245,42 @@ class DanbooruPoolExtractor(DanbooruExtractor): return {"pool": pool} def posts(self): - params = {"tags": "pool:" + self.pool_id} - return self._pagination("/posts.json", params, "b") + reverse = prefix = None + + order = self.config("order-posts") + if not order or order in ("asc", "pool", "pool_asc", "asc_pool"): + params = {"tags": "ordpool:" + self.pool_id} + elif order in ("id", "desc_id", "id_desc"): + params = {"tags": "pool:" + self.pool_id} + prefix = "b" + elif order in ("desc", "desc_pool", "pool_desc"): + params = {"tags": "ordpool:" + self.pool_id} + reverse = True + elif order in ("asc_id", "id_asc"): + params = {"tags": "pool:" + self.pool_id} + reverse = True + + posts = self._pagination("/posts.json", params, prefix) + if reverse: + return self._enumerate_posts_reverse(posts) + else: + return self._enumerate_posts(posts) + + def _enumerate_posts(self, posts): + pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)} + for post in posts: + post["num"] = pid_to_num[post["id"]] + yield post + + def _enumerate_posts_reverse(self, posts): + self.log.info("Collecting posts of pool %s", self.pool_id) + posts = list(posts) + posts.reverse() + + pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)} + for post in posts: + post["num"] = pid_to_num[post["id"]] + return posts class DanbooruPostExtractor(DanbooruExtractor): @@ -262,12 +290,8 @@ class DanbooruPostExtractor(DanbooruExtractor): pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" example = "https://danbooru.donmai.us/posts/12345" - def __init__(self, match): - DanbooruExtractor.__init__(self, match) - self.post_id = match.group(match.lastindex) - def posts(self): - url = "{}/posts/{}.json".format(self.root, self.post_id) + url = "{}/posts/{}.json".format(self.root, self.groups[-1]) post = self.request(url).json() if self.includes: params = {"only": self.includes} @@ -283,12 +307,8 @@ class DanbooruPopularExtractor(DanbooruExtractor): pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?" example = "https://danbooru.donmai.us/explore/posts/popular" - def __init__(self, match): - DanbooruExtractor.__init__(self, match) - self.params = match.group(match.lastindex) - def metadata(self): - self.params = params = text.parse_query(self.params) + self.params = params = text.parse_query(self.groups[-1]) scale = params.get("scale", "day") date = params.get("date") or datetime.date.today().isoformat() diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index eddcb12..76ea792 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -100,7 +100,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor): example = "https://e621.net/pools/12345" def posts(self): - self.log.info("Fetching posts of pool %s", self.pool_id) + self.log.info("Collecting posts of pool %s", self.pool_id) id_to_post = { post["id"]: post @@ -126,7 +126,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): example = "https://e621.net/posts/12345" def posts(self): - url = "{}/posts/{}.json".format(self.root, self.post_id) + url = "{}/posts/{}.json".format(self.root, self.groups[-1]) return (self.request(url).json()["post"],) @@ -147,11 +147,8 @@ class E621FavoriteExtractor(E621Extractor): pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" example = "https://e621.net/favorites" - def __init__(self, match): - E621Extractor.__init__(self, match) - self.query = text.parse_query(match.group(match.lastindex)) - def metadata(self): + self.query = text.parse_query(self.groups[-1]) return {"user_id": self.query.get("user_id", "")} def posts(self): diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py index 55549de..7582528 100644 --- a/gallery_dl/extractor/erome.py +++ b/gallery_dl/extractor/erome.py @@ -23,12 +23,8 @@ class EromeExtractor(Extractor): archive_fmt = "{album_id}_{num}" root = "https://www.erome.com" - def __init__(self, match): - Extractor.__init__(self, match) - self.item = match.group(1) - self.__cookies = True - def items(self): + self.__cookies = True for album_id in self.albums(): url = "{}/a/{}".format(self.root, album_id) @@ -66,8 +62,9 @@ class EromeExtractor(Extractor): "user" : text.unquote(user), "count" : len(urls), "date" : date, - "tags" : [t.replace("+", " ") - for t in text.extract_iter(tags, "?q=", '"')], + "tags" : ([t.replace("+", " ") + for t in text.extract_iter(tags, "?q=", '"')] + if tags else ()), "_http_headers": {"Referer": url}, } @@ -110,7 +107,7 @@ class EromeAlbumExtractor(EromeExtractor): example = "https://www.erome.com/a/ID" def albums(self): - return (self.item,) + return (self.groups[0],) class EromeUserExtractor(EromeExtractor): @@ -119,18 +116,18 @@ class EromeUserExtractor(EromeExtractor): example = "https://www.erome.com/USER" def albums(self): - url = "{}/{}".format(self.root, self.item) + url = "{}/{}".format(self.root, self.groups[0]) return self._pagination(url, {}) class EromeSearchExtractor(EromeExtractor): subcategory = "search" - pattern = BASE_PATTERN + r"/search\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search/?\?(q=[^#]+)" example = "https://www.erome.com/search?q=QUERY" def albums(self): url = self.root + "/search" - params = {"q": text.unquote(self.item)} + params = text.parse_query(self.groups[0]) return self._pagination(url, params) diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 1466390..216aeb1 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -98,7 +98,8 @@ class FuraffinityExtractor(Extractor): data["tags"] = text.split_html(extr( 'class="tags-row">', '</section>')) data["title"] = text.unescape(extr("<h2><p>", "</p></h2>")) - data["artist"] = extr("<strong>", "<") + data["artist_url"] = extr('title="', '"').strip() + data["artist"] = extr(">", "<") data["_description"] = extr( 'class="submission-description user-submitted-links">', ' </div>') @@ -121,6 +122,7 @@ class FuraffinityExtractor(Extractor): else: # old site layout data["title"] = text.unescape(extr("<h2>", "</h2>")) + data["artist_url"] = extr('title="', '"').strip() data["artist"] = extr(">", "<") data["fa_category"] = extr("<b>Category:</b>", "<").strip() data["theme"] = extr("<b>Theme:</b>", "<").strip() @@ -139,7 +141,6 @@ class FuraffinityExtractor(Extractor): 'style="padding:8px">', ' </td>') data["folders"] = () # folders not present in old layout - data["artist_url"] = data["artist"].replace("_", "").lower() data["user"] = self.user or data["artist_url"] data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["description"] = self._process_description(data["_description"]) diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index f36b1f5..7a9e3c5 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -41,6 +41,11 @@ class RedditExtractor(Extractor): self._extract_video = self._extract_video_dash videos = True + selftext = self.config("selftext") + if selftext is None: + selftext = self.api.comments + selftext = True if selftext else False + submissions = self.submissions() visited = set() depth = 0 @@ -92,12 +97,12 @@ class RedditExtractor(Extractor): elif parentdir: yield Message.Directory, comments[0] + if selftext and submission: + for url in text.extract_iter( + submission["selftext_html"] or "", ' href="', '"'): + urls.append((url, submission)) + if self.api.comments: - if submission: - for url in text.extract_iter( - submission["selftext_html"] or "", - ' href="', '"'): - urls.append((url, submission)) for comment in comments: html = comment["body_html"] or "" href = (' href="' in html) diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 506f6ac..612faac 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -163,24 +163,27 @@ class RedgifsSearchExtractor(RedgifsExtractor): subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com" - r"/(?:gifs/([^/?#]+)|browse)(?:/?\?([^#]+))?") + r"/(?:gifs/([^/?#]+)|search(?:/gifs)?()|browse)" + r"(?:/?\?([^#]+))?") example = "https://www.redgifs.com/gifs/TAG" - def __init__(self, match): - RedgifsExtractor.__init__(self, match) - self.search, self.query = match.groups() - def metadata(self): - self.params = text.parse_query(self.query) - if self.search: - self.params["tags"] = text.unquote(self.search) + tag, self.search, query = self.groups - return {"search": (self.params.get("tags") or - self.params.get("order") or + self.params = params = text.parse_query(query) + if tag is not None: + params["tags"] = text.unquote(tag) + + return {"search": (params.get("query") or + params.get("tags") or + params.get("order") or "trending")} def gifs(self): - return self.api.search(self.params) + if self.search is None: + return self.api.gifs_search(self.params) + else: + return self.api.search_gifs(self.params) class RedgifsImageExtractor(RedgifsExtractor): @@ -205,9 +208,9 @@ class RedgifsAPI(): def __init__(self, extractor): self.extractor = extractor self.headers = { - "authorization" : None, - "content-type" : "application/json", - "x-customheader": extractor.root + "/", + "Accept" : "application/json, text/plain, */*", + "Referer" : extractor.root + "/", + "Authorization" : None, "Origin" : extractor.root, } @@ -242,14 +245,18 @@ class RedgifsAPI(): params = {"count": 30, "order": order} return self._pagination(endpoint, params) - def search(self, params): + def gifs_search(self, params): endpoint = "/v2/gifs/search" params["search_text"] = params.pop("tags", None) return self._pagination(endpoint, params) + def search_gifs(self, params): + endpoint = "/v2/search/gifs" + return self._pagination(endpoint, params) + def _call(self, endpoint, params=None): url = self.API_ROOT + endpoint - self.headers["authorization"] = self._auth() + self.headers["Authorization"] = self._auth() return self.extractor.request( url, params=params, headers=self.headers).json() @@ -270,6 +277,6 @@ class RedgifsAPI(): def _auth(self): # https://github.com/Redgifs/api/wiki/Temporary-tokens url = self.API_ROOT + "/v2/auth/temporary" - self.headers["authorization"] = None + self.headers["Authorization"] = None return "Bearer " + self.extractor.request( url, headers=self.headers).json()["token"] diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index b5cdb9c..b2f31dd 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -241,9 +241,10 @@ class SankakuAPI(): if response.status_code == 429: until = response.headers.get("X-RateLimit-Reset") - if not until and b"tags-limit" in response.content: - raise exception.StopExtraction("Search tag limit exceeded") - seconds = None if until else 60 + if not until and b"_tags-explicit-limit" in response.content: + raise exception.AuthorizationError( + "Search tag limit exceeded") + seconds = None if until else 600 self.extractor.wait(until=until, seconds=seconds) continue diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py new file mode 100644 index 0000000..7273eac --- /dev/null +++ b/gallery_dl/extractor/tenor.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://tenor.com/""" + +from .common import Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?tenor\.com/(?:\w\w(?:-\w\w)?/)?" + + +class TenorExtractor(Extractor): + """Base class for tenor extractors""" + category = "tenor" + root = "https://tenor.com" + filename_fmt = "{id}{title:? //}.{extension}" + archive_fmt = "{id}" + request_interval = (0.5, 1.5) + + def _init(self): + formats = self.config("format") + if formats is None: + self.formats = ("gif", "mp4", "webm", "webp") + else: + if isinstance(formats, str): + formats = formats.split(",") + self.formats = formats + + def items(self): + meta = self.metadata() + + for gif in self.gifs(): + fmt = self._extract_format(gif) + if not fmt: + self.log.warning("%s: Selected format(s) not available", + gif.get("id")) + continue + + url = fmt["url"] + gif["width"], gif["height"] = fmt["dims"] + gif["title"] = gif["h1_title"][:-4] + gif["description"] = gif.pop("content_description", "") + gif["date"] = text.parse_timestamp(gif["created"]) + if meta: + gif.update(meta) + + yield Message.Directory, gif + yield Message.Url, url, text.nameext_from_url(url, gif) + + def _extract_format(self, gif): + media_formats = gif["media_formats"] + for fmt in self.formats: + if fmt in media_formats: + return media_formats[fmt] + + def _search_results(self, query): + url = "https://tenor.googleapis.com/v2/search" + params = { + "appversion": "browser-r20250225-1", + "prettyPrint": "false", + "key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8", + "client_key": "tenor_web", + "locale": "en", + "anon_id": "", + "q": query, + "limit": "50", + "contentfilter": "low", + "media_filter": "gif,gif_transparent,mediumgif,tinygif," + "tinygif_transparent,webp,webp_transparent," + "tinywebp,tinywebp_transparent,tinymp4,mp4,webm," + "originalgif,gifpreview", + "fields": "next,results.id,results.media_formats,results.title," + "results.h1_title,results.long_title,results.itemurl," + "results.url,results.created,results.user," + "results.shares,results.embed,results.hasaudio," + "results.policy_status,results.source_id,results.flags," + "results.tags,results.content_rating,results.bg_color," + "results.legacy_info,results.geographic_restriction," + "results.content_description", + "pos": None, + "component": "web_desktop", + } + headers = { + "Referer": self.root + "/", + "Origin" : self.root, + } + + while True: + data = self.request(url, params=params, headers=headers).json() + + yield from data["results"] + + params["pos"] = data.get("next") + if not params["pos"]: + return + + def metadata(self): + return False + + def gifs(self): + return () + + +class TenorImageExtractor(TenorExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)" + example = "https://tenor.com/view/SLUG-1234567890" + + def gifs(self): + url = "{}/view/{}".format(self.root, self.groups[0]) + page = self.request(url).text + pos = page.index('id="store-cache"') + data = util.json_loads(text.extract(page, ">", "</script>", pos)[0]) + return (data["gifs"]["byId"].popitem()[1]["results"][0],) + + +class TenorSearchExtractor(TenorExtractor): + subcategory = "search" + directory_fmt = ("{category}", "{search_tags}") + pattern = BASE_PATTERN + r"search/([^/?#]+)" + example = "https://tenor.com/search/QUERY" + + def metadata(self): + query = text.unquote(self.groups[0]) + rest, _, last = query.rpartition("-") + if last == "gifs": + query = rest + self.search_tags = query.replace("-", " ") + + return {"search_tags": self.search_tags} + + def gifs(self): + return self._search_results(self.search_tags) + + +class TenorUserExtractor(TenorExtractor): + subcategory = "user" + directory_fmt = ("{category}", "@{user[username]}") + pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)" + example = "https://tenor.com/users/USER" + + def gifs(self): + return self._search_results("@" + self.groups[0]) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index f129b1c..203b1ac 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -219,6 +219,11 @@ class TiktokUserExtractor(TiktokExtractor): self.log.debug("", exc_info=exc) raise exception.ExtractionError("yt-dlp or youtube-dl is required " "for this feature!") + + ytdl_range = self.config("tiktok-range") + if ytdl_range is None or not ytdl_range and ytdl_range != 0: + ytdl_range = "" + extr_opts = { "extract_flat" : True, "ignore_no_formats_error": True, @@ -227,7 +232,7 @@ class TiktokUserExtractor(TiktokExtractor): "retries" : self._retries, "socket_timeout" : self._timeout, "nocheckcertificate" : not self._verify, - "playlist_items" : str(self.config("tiktok-range", "")), + "playlist_items" : str(ytdl_range), } if self._proxies: user_opts["proxy"] = self._proxies.get("http") diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index a53409c..524bd81 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -32,7 +32,11 @@ class VscoExtractor(Extractor): yield Message.Directory, {"user": self.user} for img in self.images(): - if not img or "responsive_url" not in img: + if not img: + continue + elif "playback_url" in img: + img = self._transform_video(img) + elif "responsive_url" not in img: continue if img["is_video"]: @@ -118,6 +122,15 @@ class VscoExtractor(Extractor): media["image_meta"] = media.get("imageMeta") return media + @staticmethod + def _transform_video(media): + media["is_video"] = True + media["grid_name"] = "" + media["video_url"] = media["playback_url"] + media["responsive_url"] = media["poster_url"] + media["upload_date"] = media["created_date"] + return media + class VscoUserExtractor(VscoExtractor): """Extractor for a vsco user profile""" @@ -322,7 +335,7 @@ class VscoVideoExtractor(VscoExtractor): "grid_name" : "", "upload_date" : media["createdDate"], "responsive_url": media["posterUrl"], - "video_url" : "ytdl:" + media.get("playbackUrl"), + "video_url" : media.get("playbackUrl"), "image_meta" : None, "width" : media["width"], "height" : media["height"], |
