# -*- coding: utf-8 -*- # Copyright 2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://tenor.com/""" from .common import Extractor, Message from .. import text, util BASE_PATTERN = r"(?:https?://)?tenor\.com/(?:\w\w(?:-\w\w)?/)?" class TenorExtractor(Extractor): """Base class for tenor extractors""" category = "tenor" root = "https://tenor.com" filename_fmt = "{id}{title:? //}.{extension}" archive_fmt = "{id}" request_interval = (0.5, 1.5) def _init(self): formats = self.config("format") if formats is None: self.formats = ("gif", "mp4", "webm", "webp") else: if isinstance(formats, str): formats = formats.split(",") self.formats = formats def items(self): for gif in self.gifs(): if not (fmt := self._extract_format(gif)): self.log.warning("%s: Selected format(s) not available", gif.get("id")) continue url = fmt["url"] title = gif.pop("h1_title", "") gif["title"] = title[:-4] if title.endswith(" GIF") else title gif["width"], gif["height"] = fmt.pop("dims") or (0, 0) gif["description"] = gif.pop("content_description", "") gif["id_format"] = url.rsplit("/", 2)[1] gif["format"] = fmt["name"] gif["duration"] = fmt["duration"] gif["size"] = fmt["size"] gif["date"] = self.parse_timestamp(gif["created"]) yield Message.Directory, "", gif yield Message.Url, url, text.nameext_from_url(url, gif) def _extract_format(self, gif): media_formats = gif["media_formats"] for fmt in self.formats: if fmt in media_formats: media = media_formats[fmt] media["name"] = fmt return media def _search_results(self, query): url = "https://tenor.googleapis.com/v2/search" params = { "appversion": "browser-r20250225-1", "prettyPrint": "false", "key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8", "client_key": "tenor_web", "locale": "en", "anon_id": "", "q": query, "limit": "50", "contentfilter": "low", "media_filter": "gif,gif_transparent,mediumgif,tinygif," "tinygif_transparent,webp,webp_transparent," "tinywebp,tinywebp_transparent,tinymp4,mp4,webm," "originalgif,gifpreview", "fields": "next,results.id,results.media_formats,results.title," "results.h1_title,results.long_title,results.itemurl," "results.url,results.created,results.user," "results.shares,results.embed,results.hasaudio," "results.policy_status,results.source_id,results.flags," "results.tags,results.content_rating,results.bg_color," "results.legacy_info,results.geographic_restriction," "results.content_description", "pos": None, "component": "web_desktop", } headers = { "Referer": self.root + "/", "Origin" : self.root, } while True: data = self.request_json(url, params=params, headers=headers) yield from data["results"] params["pos"] = data.get("next") if not params["pos"]: return def metadata(self): return False def gifs(self): return () class TenorImageExtractor(TenorExtractor): subcategory = "image" pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)" example = "https://tenor.com/view/SLUG-1234567890" def gifs(self): url = f"{self.root}/view/{self.groups[0]}" page = self.request(url).text pos = page.index('id="store-cache"') data = util.json_loads(text.extract(page, ">", "", pos)[0]) return (data["gifs"]["byId"].popitem()[1]["results"][0],) class TenorSearchExtractor(TenorExtractor): subcategory = "search" directory_fmt = ("{category}", "{search_tags}") pattern = BASE_PATTERN + r"search/([^/?#]+)" example = "https://tenor.com/search/QUERY" def gifs(self): query = text.unquote(self.groups[0]) rest, _, last = query.rpartition("-") if last == "gifs": query = rest self.kwdict["search_tags"] = search_tags = query.replace("-", " ") return self._search_results(search_tags) class TenorUserExtractor(TenorExtractor): subcategory = "user" directory_fmt = ("{category}", "@{user[username]}") pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)" example = "https://tenor.com/users/USER" def gifs(self): return self._search_results("@" + self.groups[0])