aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor')
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/bunkr.py32
-rw-r--r--gallery_dl/extractor/chevereto.py18
-rw-r--r--gallery_dl/extractor/common.py23
-rw-r--r--gallery_dl/extractor/danbooru.py62
-rw-r--r--gallery_dl/extractor/e621.py9
-rw-r--r--gallery_dl/extractor/erome.py19
-rw-r--r--gallery_dl/extractor/furaffinity.py5
-rw-r--r--gallery_dl/extractor/reddit.py15
-rw-r--r--gallery_dl/extractor/redgifs.py41
-rw-r--r--gallery_dl/extractor/sankaku.py7
-rw-r--r--gallery_dl/extractor/tenor.py148
-rw-r--r--gallery_dl/extractor/tiktok.py7
-rw-r--r--gallery_dl/extractor/vsco.py17
14 files changed, 297 insertions, 107 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 00b22d4..8208241 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -171,6 +171,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
+ "tenor",
"tiktok",
"tmohentai",
"toyhouse",
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 201b8f4..d74f59c 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -11,7 +11,6 @@
from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor
from .. import text, util, config, exception
-import binascii
import random
if config.get(("extractor", "bunkr"), "tlds"):
@@ -71,6 +70,17 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
domain = self.groups[0] or self.groups[1]
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
+
+ def _init(self):
+ LolisafeAlbumExtractor._init(self)
+
+ endpoint = self.config("endpoint")
+ if not endpoint:
+ endpoint = self.root_dl + "/api/_001"
+ elif endpoint[0] == "/":
+ endpoint = self.root_dl + endpoint
+
+ self.endpoint = endpoint
self.offset = 0
def skip(self, num):
@@ -169,13 +179,13 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
data_id = text.extr(page, 'data-file-id="', '"')
referer = self.root_dl + "/file/" + data_id
- url = self.root_dl + "/api/vs"
- headers = {"Referer": referer}
- data = self.request(
- url, method="POST", headers=headers, json={"id": data_id}).json()
+ headers = {"Referer": referer, "Origin": self.root_dl}
+ data = self.request(self.endpoint, method="POST", headers=headers,
+ json={"id": data_id}).json()
if data.get("encrypted"):
- file_url = self._decrypt_url(data["url"], data["timestamp"])
+ key = "SECRET_KEY_{}".format(data["timestamp"] // 3600)
+ file_url = util.decrypt_xor(data["url"], key.encode())
else:
file_url = data["url"]
@@ -192,16 +202,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"_http_validate": self._validate,
}
- def _decrypt_url(self, encrypted_b64, timestamp):
- encrypted_bytes = binascii.a2b_base64(encrypted_b64)
- key = "SECRET_KEY_{}".format(timestamp // 3600).encode()
- div = len(key)
-
- return bytes([
- encrypted_bytes[i] ^ key[i % div]
- for i in range(len(encrypted_bytes))
- ]).decode()
-
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
self.log.warning("File server in maintenance mode")
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index de22a7b..c9ccb7d 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -9,7 +9,7 @@
"""Extractors for Chevereto galleries"""
from .common import BaseExtractor, Message
-from .. import text
+from .. import text, util
class CheveretoExtractor(BaseExtractor):
@@ -53,12 +53,22 @@ class CheveretoImageExtractor(CheveretoExtractor):
def items(self):
url = self.root + self.path
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ url = (extr('<meta property="og:image" content="', '"') or
+ extr('url: "', '"'))
+ if not url or url.endswith("/loading.svg"):
+ pos = page.find(" download=")
+ url = text.rextract(page, 'href="', '"', pos)[0]
+ if not url.startswith("https://"):
+ url = util.decrypt_xor(
+ url, b"seltilovessimpcity@simpcityhatesscrapers",
+ fromhex=True)
image = {
"id" : self.path.rpartition(".")[2],
- "url" : (extr('<meta property="og:image" content="', '"') or
- extr('url: "', '"')),
+ "url" : url,
"album": text.extr(extr("Added to <a", "/a>"), ">", "<"),
"user" : extr('username: "', '"'),
}
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d58db6f..a85eedd 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -205,25 +205,10 @@ class Extractor():
msg = "'{} {}' for '{}'".format(
code, response.reason, response.url)
- server = response.headers.get("Server")
- if server and server.startswith("cloudflare") and \
- code in (403, 503):
- mitigated = response.headers.get("cf-mitigated")
- if mitigated and mitigated.lower() == "challenge":
- self.log.warning("Cloudflare challenge")
- break
- content = response.content
- if b"_cf_chl_opt" in content or b"jschl-answer" in content:
- self.log.warning("Cloudflare challenge")
- break
- if b'name="captcha-bypass"' in content:
- self.log.warning("Cloudflare CAPTCHA")
- break
- elif server and server.startswith("ddos-guard") and \
- code == 403:
- if b"/ddos-guard/js-challenge/" in response.content:
- self.log.warning("DDoS-Guard challenge")
- break
+
+ challenge = util.detect_challenge(response)
+ if challenge is not None:
+ self.log.warning(challenge)
if code == 429 and self._handle_429(response):
continue
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index d0a9397..8d00728 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -205,12 +205,8 @@ class DanbooruTagExtractor(DanbooruExtractor):
pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
example = "https://danbooru.donmai.us/posts?tags=TAG"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- tags = match.group(match.lastindex)
- self.tags = text.unquote(tags.replace("+", " "))
-
def metadata(self):
+ self.tags = text.unquote(self.groups[-1].replace("+", " "))
return {"search_tags": self.tags}
def posts(self):
@@ -235,15 +231,13 @@ class DanbooruPoolExtractor(DanbooruExtractor):
"""Extractor for posts from danbooru pools"""
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
+ filename_fmt = "{num:>04}_{id}_{filename}.{extension}"
archive_fmt = "p_{pool[id]}_{id}"
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/pools/12345"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.pool_id = match.group(match.lastindex)
-
def metadata(self):
+ self.pool_id = self.groups[-1]
url = "{}/pools/{}.json".format(self.root, self.pool_id)
pool = self.request(url).json()
pool["name"] = pool["name"].replace("_", " ")
@@ -251,8 +245,42 @@ class DanbooruPoolExtractor(DanbooruExtractor):
return {"pool": pool}
def posts(self):
- params = {"tags": "pool:" + self.pool_id}
- return self._pagination("/posts.json", params, "b")
+ reverse = prefix = None
+
+ order = self.config("order-posts")
+ if not order or order in ("asc", "pool", "pool_asc", "asc_pool"):
+ params = {"tags": "ordpool:" + self.pool_id}
+ elif order in ("id", "desc_id", "id_desc"):
+ params = {"tags": "pool:" + self.pool_id}
+ prefix = "b"
+ elif order in ("desc", "desc_pool", "pool_desc"):
+ params = {"tags": "ordpool:" + self.pool_id}
+ reverse = True
+ elif order in ("asc_id", "id_asc"):
+ params = {"tags": "pool:" + self.pool_id}
+ reverse = True
+
+ posts = self._pagination("/posts.json", params, prefix)
+ if reverse:
+ return self._enumerate_posts_reverse(posts)
+ else:
+ return self._enumerate_posts(posts)
+
+ def _enumerate_posts(self, posts):
+ pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)}
+ for post in posts:
+ post["num"] = pid_to_num[post["id"]]
+ yield post
+
+ def _enumerate_posts_reverse(self, posts):
+ self.log.info("Collecting posts of pool %s", self.pool_id)
+ posts = list(posts)
+ posts.reverse()
+
+ pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)}
+ for post in posts:
+ post["num"] = pid_to_num[post["id"]]
+ return posts
class DanbooruPostExtractor(DanbooruExtractor):
@@ -262,12 +290,8 @@ class DanbooruPostExtractor(DanbooruExtractor):
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/posts/12345"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.post_id = match.group(match.lastindex)
-
def posts(self):
- url = "{}/posts/{}.json".format(self.root, self.post_id)
+ url = "{}/posts/{}.json".format(self.root, self.groups[-1])
post = self.request(url).json()
if self.includes:
params = {"only": self.includes}
@@ -283,12 +307,8 @@ class DanbooruPopularExtractor(DanbooruExtractor):
pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
example = "https://danbooru.donmai.us/explore/posts/popular"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.params = match.group(match.lastindex)
-
def metadata(self):
- self.params = params = text.parse_query(self.params)
+ self.params = params = text.parse_query(self.groups[-1])
scale = params.get("scale", "day")
date = params.get("date") or datetime.date.today().isoformat()
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index eddcb12..76ea792 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -100,7 +100,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
example = "https://e621.net/pools/12345"
def posts(self):
- self.log.info("Fetching posts of pool %s", self.pool_id)
+ self.log.info("Collecting posts of pool %s", self.pool_id)
id_to_post = {
post["id"]: post
@@ -126,7 +126,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
example = "https://e621.net/posts/12345"
def posts(self):
- url = "{}/posts/{}.json".format(self.root, self.post_id)
+ url = "{}/posts/{}.json".format(self.root, self.groups[-1])
return (self.request(url).json()["post"],)
@@ -147,11 +147,8 @@ class E621FavoriteExtractor(E621Extractor):
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
example = "https://e621.net/favorites"
- def __init__(self, match):
- E621Extractor.__init__(self, match)
- self.query = text.parse_query(match.group(match.lastindex))
-
def metadata(self):
+ self.query = text.parse_query(self.groups[-1])
return {"user_id": self.query.get("user_id", "")}
def posts(self):
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 55549de..7582528 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -23,12 +23,8 @@ class EromeExtractor(Extractor):
archive_fmt = "{album_id}_{num}"
root = "https://www.erome.com"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.item = match.group(1)
- self.__cookies = True
-
def items(self):
+ self.__cookies = True
for album_id in self.albums():
url = "{}/a/{}".format(self.root, album_id)
@@ -66,8 +62,9 @@ class EromeExtractor(Extractor):
"user" : text.unquote(user),
"count" : len(urls),
"date" : date,
- "tags" : [t.replace("+", " ")
- for t in text.extract_iter(tags, "?q=", '"')],
+ "tags" : ([t.replace("+", " ")
+ for t in text.extract_iter(tags, "?q=", '"')]
+ if tags else ()),
"_http_headers": {"Referer": url},
}
@@ -110,7 +107,7 @@ class EromeAlbumExtractor(EromeExtractor):
example = "https://www.erome.com/a/ID"
def albums(self):
- return (self.item,)
+ return (self.groups[0],)
class EromeUserExtractor(EromeExtractor):
@@ -119,18 +116,18 @@ class EromeUserExtractor(EromeExtractor):
example = "https://www.erome.com/USER"
def albums(self):
- url = "{}/{}".format(self.root, self.item)
+ url = "{}/{}".format(self.root, self.groups[0])
return self._pagination(url, {})
class EromeSearchExtractor(EromeExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
+ pattern = BASE_PATTERN + r"/search/?\?(q=[^#]+)"
example = "https://www.erome.com/search?q=QUERY"
def albums(self):
url = self.root + "/search"
- params = {"q": text.unquote(self.item)}
+ params = text.parse_query(self.groups[0])
return self._pagination(url, params)
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 1466390..216aeb1 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -98,7 +98,8 @@ class FuraffinityExtractor(Extractor):
data["tags"] = text.split_html(extr(
'class="tags-row">', '</section>'))
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
- data["artist"] = extr("<strong>", "<")
+ data["artist_url"] = extr('title="', '"').strip()
+ data["artist"] = extr(">", "<")
data["_description"] = extr(
'class="submission-description user-submitted-links">',
' </div>')
@@ -121,6 +122,7 @@ class FuraffinityExtractor(Extractor):
else:
# old site layout
data["title"] = text.unescape(extr("<h2>", "</h2>"))
+ data["artist_url"] = extr('title="', '"').strip()
data["artist"] = extr(">", "<")
data["fa_category"] = extr("<b>Category:</b>", "<").strip()
data["theme"] = extr("<b>Theme:</b>", "<").strip()
@@ -139,7 +141,6 @@ class FuraffinityExtractor(Extractor):
'style="padding:8px">', ' </td>')
data["folders"] = () # folders not present in old layout
- data["artist_url"] = data["artist"].replace("_", "").lower()
data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"])
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index f36b1f5..7a9e3c5 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -41,6 +41,11 @@ class RedditExtractor(Extractor):
self._extract_video = self._extract_video_dash
videos = True
+ selftext = self.config("selftext")
+ if selftext is None:
+ selftext = self.api.comments
+ selftext = True if selftext else False
+
submissions = self.submissions()
visited = set()
depth = 0
@@ -92,12 +97,12 @@ class RedditExtractor(Extractor):
elif parentdir:
yield Message.Directory, comments[0]
+ if selftext and submission:
+ for url in text.extract_iter(
+ submission["selftext_html"] or "", ' href="', '"'):
+ urls.append((url, submission))
+
if self.api.comments:
- if submission:
- for url in text.extract_iter(
- submission["selftext_html"] or "",
- ' href="', '"'):
- urls.append((url, submission))
for comment in comments:
html = comment["body_html"] or ""
href = (' href="' in html)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 506f6ac..612faac 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -163,24 +163,27 @@ class RedgifsSearchExtractor(RedgifsExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com"
- r"/(?:gifs/([^/?#]+)|browse)(?:/?\?([^#]+))?")
+ r"/(?:gifs/([^/?#]+)|search(?:/gifs)?()|browse)"
+ r"(?:/?\?([^#]+))?")
example = "https://www.redgifs.com/gifs/TAG"
- def __init__(self, match):
- RedgifsExtractor.__init__(self, match)
- self.search, self.query = match.groups()
-
def metadata(self):
- self.params = text.parse_query(self.query)
- if self.search:
- self.params["tags"] = text.unquote(self.search)
+ tag, self.search, query = self.groups
- return {"search": (self.params.get("tags") or
- self.params.get("order") or
+ self.params = params = text.parse_query(query)
+ if tag is not None:
+ params["tags"] = text.unquote(tag)
+
+ return {"search": (params.get("query") or
+ params.get("tags") or
+ params.get("order") or
"trending")}
def gifs(self):
- return self.api.search(self.params)
+ if self.search is None:
+ return self.api.gifs_search(self.params)
+ else:
+ return self.api.search_gifs(self.params)
class RedgifsImageExtractor(RedgifsExtractor):
@@ -205,9 +208,9 @@ class RedgifsAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "authorization" : None,
- "content-type" : "application/json",
- "x-customheader": extractor.root + "/",
+ "Accept" : "application/json, text/plain, */*",
+ "Referer" : extractor.root + "/",
+ "Authorization" : None,
"Origin" : extractor.root,
}
@@ -242,14 +245,18 @@ class RedgifsAPI():
params = {"count": 30, "order": order}
return self._pagination(endpoint, params)
- def search(self, params):
+ def gifs_search(self, params):
endpoint = "/v2/gifs/search"
params["search_text"] = params.pop("tags", None)
return self._pagination(endpoint, params)
+ def search_gifs(self, params):
+ endpoint = "/v2/search/gifs"
+ return self._pagination(endpoint, params)
+
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
- self.headers["authorization"] = self._auth()
+ self.headers["Authorization"] = self._auth()
return self.extractor.request(
url, params=params, headers=self.headers).json()
@@ -270,6 +277,6 @@ class RedgifsAPI():
def _auth(self):
# https://github.com/Redgifs/api/wiki/Temporary-tokens
url = self.API_ROOT + "/v2/auth/temporary"
- self.headers["authorization"] = None
+ self.headers["Authorization"] = None
return "Bearer " + self.extractor.request(
url, headers=self.headers).json()["token"]
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b5cdb9c..b2f31dd 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -241,9 +241,10 @@ class SankakuAPI():
if response.status_code == 429:
until = response.headers.get("X-RateLimit-Reset")
- if not until and b"tags-limit" in response.content:
- raise exception.StopExtraction("Search tag limit exceeded")
- seconds = None if until else 60
+ if not until and b"_tags-explicit-limit" in response.content:
+ raise exception.AuthorizationError(
+ "Search tag limit exceeded")
+ seconds = None if until else 600
self.extractor.wait(until=until, seconds=seconds)
continue
diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py
new file mode 100644
index 0000000..7273eac
--- /dev/null
+++ b/gallery_dl/extractor/tenor.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://tenor.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?tenor\.com/(?:\w\w(?:-\w\w)?/)?"
+
+
+class TenorExtractor(Extractor):
+ """Base class for tenor extractors"""
+ category = "tenor"
+ root = "https://tenor.com"
+ filename_fmt = "{id}{title:? //}.{extension}"
+ archive_fmt = "{id}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ formats = self.config("format")
+ if formats is None:
+ self.formats = ("gif", "mp4", "webm", "webp")
+ else:
+ if isinstance(formats, str):
+ formats = formats.split(",")
+ self.formats = formats
+
+ def items(self):
+ meta = self.metadata()
+
+ for gif in self.gifs():
+ fmt = self._extract_format(gif)
+ if not fmt:
+ self.log.warning("%s: Selected format(s) not available",
+ gif.get("id"))
+ continue
+
+ url = fmt["url"]
+ gif["width"], gif["height"] = fmt["dims"]
+ gif["title"] = gif["h1_title"][:-4]
+ gif["description"] = gif.pop("content_description", "")
+ gif["date"] = text.parse_timestamp(gif["created"])
+ if meta:
+ gif.update(meta)
+
+ yield Message.Directory, gif
+ yield Message.Url, url, text.nameext_from_url(url, gif)
+
+ def _extract_format(self, gif):
+ media_formats = gif["media_formats"]
+ for fmt in self.formats:
+ if fmt in media_formats:
+ return media_formats[fmt]
+
+ def _search_results(self, query):
+ url = "https://tenor.googleapis.com/v2/search"
+ params = {
+ "appversion": "browser-r20250225-1",
+ "prettyPrint": "false",
+ "key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8",
+ "client_key": "tenor_web",
+ "locale": "en",
+ "anon_id": "",
+ "q": query,
+ "limit": "50",
+ "contentfilter": "low",
+ "media_filter": "gif,gif_transparent,mediumgif,tinygif,"
+ "tinygif_transparent,webp,webp_transparent,"
+ "tinywebp,tinywebp_transparent,tinymp4,mp4,webm,"
+ "originalgif,gifpreview",
+ "fields": "next,results.id,results.media_formats,results.title,"
+ "results.h1_title,results.long_title,results.itemurl,"
+ "results.url,results.created,results.user,"
+ "results.shares,results.embed,results.hasaudio,"
+ "results.policy_status,results.source_id,results.flags,"
+ "results.tags,results.content_rating,results.bg_color,"
+ "results.legacy_info,results.geographic_restriction,"
+ "results.content_description",
+ "pos": None,
+ "component": "web_desktop",
+ }
+ headers = {
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ }
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+
+ yield from data["results"]
+
+ params["pos"] = data.get("next")
+ if not params["pos"]:
+ return
+
+ def metadata(self):
+ return False
+
+ def gifs(self):
+ return ()
+
+
+class TenorImageExtractor(TenorExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)"
+ example = "https://tenor.com/view/SLUG-1234567890"
+
+ def gifs(self):
+ url = "{}/view/{}".format(self.root, self.groups[0])
+ page = self.request(url).text
+ pos = page.index('id="store-cache"')
+ data = util.json_loads(text.extract(page, ">", "</script>", pos)[0])
+ return (data["gifs"]["byId"].popitem()[1]["results"][0],)
+
+
+class TenorSearchExtractor(TenorExtractor):
+ subcategory = "search"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"search/([^/?#]+)"
+ example = "https://tenor.com/search/QUERY"
+
+ def metadata(self):
+ query = text.unquote(self.groups[0])
+ rest, _, last = query.rpartition("-")
+ if last == "gifs":
+ query = rest
+ self.search_tags = query.replace("-", " ")
+
+ return {"search_tags": self.search_tags}
+
+ def gifs(self):
+ return self._search_results(self.search_tags)
+
+
+class TenorUserExtractor(TenorExtractor):
+ subcategory = "user"
+ directory_fmt = ("{category}", "@{user[username]}")
+ pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)"
+ example = "https://tenor.com/users/USER"
+
+ def gifs(self):
+ return self._search_results("@" + self.groups[0])
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index f129b1c..203b1ac 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -219,6 +219,11 @@ class TiktokUserExtractor(TiktokExtractor):
self.log.debug("", exc_info=exc)
raise exception.ExtractionError("yt-dlp or youtube-dl is required "
"for this feature!")
+
+ ytdl_range = self.config("tiktok-range")
+ if ytdl_range is None or not ytdl_range and ytdl_range != 0:
+ ytdl_range = ""
+
extr_opts = {
"extract_flat" : True,
"ignore_no_formats_error": True,
@@ -227,7 +232,7 @@ class TiktokUserExtractor(TiktokExtractor):
"retries" : self._retries,
"socket_timeout" : self._timeout,
"nocheckcertificate" : not self._verify,
- "playlist_items" : str(self.config("tiktok-range", "")),
+ "playlist_items" : str(ytdl_range),
}
if self._proxies:
user_opts["proxy"] = self._proxies.get("http")
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index a53409c..524bd81 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -32,7 +32,11 @@ class VscoExtractor(Extractor):
yield Message.Directory, {"user": self.user}
for img in self.images():
- if not img or "responsive_url" not in img:
+ if not img:
+ continue
+ elif "playback_url" in img:
+ img = self._transform_video(img)
+ elif "responsive_url" not in img:
continue
if img["is_video"]:
@@ -118,6 +122,15 @@ class VscoExtractor(Extractor):
media["image_meta"] = media.get("imageMeta")
return media
+ @staticmethod
+ def _transform_video(media):
+ media["is_video"] = True
+ media["grid_name"] = ""
+ media["video_url"] = media["playback_url"]
+ media["responsive_url"] = media["poster_url"]
+ media["upload_date"] = media["created_date"]
+ return media
+
class VscoUserExtractor(VscoExtractor):
"""Extractor for a vsco user profile"""
@@ -322,7 +335,7 @@ class VscoVideoExtractor(VscoExtractor):
"grid_name" : "",
"upload_date" : media["createdDate"],
"responsive_url": media["posterUrl"],
- "video_url" : "ytdl:" + media.get("playbackUrl"),
+ "video_url" : media.get("playbackUrl"),
"image_meta" : None,
"width" : media["width"],
"height" : media["height"],