summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/tenor.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/tenor.py')
-rw-r--r--gallery_dl/extractor/tenor.py148
1 files changed, 148 insertions, 0 deletions
diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py
new file mode 100644
index 0000000..7273eac
--- /dev/null
+++ b/gallery_dl/extractor/tenor.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://tenor.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?tenor\.com/(?:\w\w(?:-\w\w)?/)?"
+
+
+class TenorExtractor(Extractor):
+ """Base class for tenor extractors"""
+ category = "tenor"
+ root = "https://tenor.com"
+ filename_fmt = "{id}{title:? //}.{extension}"
+ archive_fmt = "{id}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ formats = self.config("format")
+ if formats is None:
+ self.formats = ("gif", "mp4", "webm", "webp")
+ else:
+ if isinstance(formats, str):
+ formats = formats.split(",")
+ self.formats = formats
+
+ def items(self):
+ meta = self.metadata()
+
+ for gif in self.gifs():
+ fmt = self._extract_format(gif)
+ if not fmt:
+ self.log.warning("%s: Selected format(s) not available",
+ gif.get("id"))
+ continue
+
+ url = fmt["url"]
+ gif["width"], gif["height"] = fmt["dims"]
+ gif["title"] = gif["h1_title"][:-4]
+ gif["description"] = gif.pop("content_description", "")
+ gif["date"] = text.parse_timestamp(gif["created"])
+ if meta:
+ gif.update(meta)
+
+ yield Message.Directory, gif
+ yield Message.Url, url, text.nameext_from_url(url, gif)
+
+ def _extract_format(self, gif):
+ media_formats = gif["media_formats"]
+ for fmt in self.formats:
+ if fmt in media_formats:
+ return media_formats[fmt]
+
+ def _search_results(self, query):
+ url = "https://tenor.googleapis.com/v2/search"
+ params = {
+ "appversion": "browser-r20250225-1",
+ "prettyPrint": "false",
+ "key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8",
+ "client_key": "tenor_web",
+ "locale": "en",
+ "anon_id": "",
+ "q": query,
+ "limit": "50",
+ "contentfilter": "low",
+ "media_filter": "gif,gif_transparent,mediumgif,tinygif,"
+ "tinygif_transparent,webp,webp_transparent,"
+ "tinywebp,tinywebp_transparent,tinymp4,mp4,webm,"
+ "originalgif,gifpreview",
+ "fields": "next,results.id,results.media_formats,results.title,"
+ "results.h1_title,results.long_title,results.itemurl,"
+ "results.url,results.created,results.user,"
+ "results.shares,results.embed,results.hasaudio,"
+ "results.policy_status,results.source_id,results.flags,"
+ "results.tags,results.content_rating,results.bg_color,"
+ "results.legacy_info,results.geographic_restriction,"
+ "results.content_description",
+ "pos": None,
+ "component": "web_desktop",
+ }
+ headers = {
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ }
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+
+ yield from data["results"]
+
+ params["pos"] = data.get("next")
+ if not params["pos"]:
+ return
+
+ def metadata(self):
+ return False
+
+ def gifs(self):
+ return ()
+
+
+class TenorImageExtractor(TenorExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)"
+ example = "https://tenor.com/view/SLUG-1234567890"
+
+ def gifs(self):
+ url = "{}/view/{}".format(self.root, self.groups[0])
+ page = self.request(url).text
+ pos = page.index('id="store-cache"')
+ data = util.json_loads(text.extract(page, ">", "</script>", pos)[0])
+ return (data["gifs"]["byId"].popitem()[1]["results"][0],)
+
+
+class TenorSearchExtractor(TenorExtractor):
+ subcategory = "search"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"search/([^/?#]+)"
+ example = "https://tenor.com/search/QUERY"
+
+ def metadata(self):
+ query = text.unquote(self.groups[0])
+ rest, _, last = query.rpartition("-")
+ if last == "gifs":
+ query = rest
+ self.search_tags = query.replace("-", " ")
+
+ return {"search_tags": self.search_tags}
+
+ def gifs(self):
+ return self._search_results(self.search_tags)
+
+
+class TenorUserExtractor(TenorExtractor):
+ subcategory = "user"
+ directory_fmt = ("{category}", "@{user[username]}")
+ pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)"
+ example = "https://tenor.com/users/USER"
+
+ def gifs(self):
+ return self._search_results("@" + self.groups[0])