summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/gfycat.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/gfycat.py')
-rw-r--r--gallery_dl/extractor/gfycat.py110
1 files changed, 105 insertions, 5 deletions
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 9cd3b95..b4b0e49 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -6,9 +6,11 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://gfycat.com/"""
+"""Extractors for https://gfycat.com/"""
from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
class GfycatExtractor(Extractor):
@@ -47,11 +49,45 @@ class GfycatExtractor(Extractor):
return ()
+class GfycatUserExtractor(GfycatExtractor):
+ """Extractor for gfycat user profiles"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "{userName}")
+ pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
+ test = ("https://gfycat.com/@gretta", {
+ "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
+ "count": ">= 100",
+ })
+
+ def gfycats(self):
+ return GfycatAPI(self).user(self.key)
+
+
+class GfycatSearchExtractor(GfycatExtractor):
+ """Extractor for gfycat search results"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "Search", "{search}")
+ pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
+ test = ("https://gfycat.com/gifs/search/funny+animals", {
+ "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
+ "archive": False,
+ "range": "100-300",
+ "count": "> 200",
+ })
+
+ def metadata(self):
+ self.key = text.unquote(self.key).replace("+", " ")
+ return {"search": self.key}
+
+ def gfycats(self):
+ return GfycatAPI(self).search(self.key)
+
+
class GfycatImageExtractor(GfycatExtractor):
"""Extractor for individual images from gfycat.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
- r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)")
+ r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
test = (
("https://gfycat.com/GrayGenerousCowrie", {
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
@@ -78,11 +114,75 @@ class GfycatImageExtractor(GfycatExtractor):
("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
"url": "e24c9f69897fd223343782425a429c5cab6a768e",
}),
+ # retry 404'ed videos on redgifs (#874)
+ ("https://www.gfycat.com/foolishforkedabyssiniancat", {
+ "pattern": "https://redgifs.com/watch/foolishforkedabyssiniancat",
+ }),
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
("https://gfycat.com/ru/UnequaledHastyAnkole"),
)
- def gfycats(self):
- url = "https://api.gfycat.com/v1/gfycats/" + self.key
- return (self.request(url).json()["gfyItem"],)
+ def items(self):
+ try:
+ gfycat = GfycatAPI(self).gfycat(self.key)
+ except exception.HttpError:
+ from .redgifs import RedgifsImageExtractor
+ url = "https://redgifs.com/watch/" + self.key
+ data = {"_extractor": RedgifsImageExtractor}
+ yield Message.Queue, url, data
+ else:
+ url = self._select_format(gfycat)
+ yield Message.Directory, gfycat
+ yield Message.Url, url, gfycat
+
+
+class GfycatAPI():
+ API_ROOT = "https://api.gfycat.com"
+ ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.headers = {}
+
+ def gfycat(self, gfycat_id):
+ endpoint = "/v1/gfycats/" + gfycat_id
+ return self._call(endpoint)["gfyItem"]
+
+ def user(self, user):
+ endpoint = "/v1/users/{}/gfycats".format(user.lower())
+ params = {"count": 100}
+ return self._pagination(endpoint, params)
+
+ def search(self, query):
+ endpoint = "/v1/gfycats/search"
+ params = {"search_text": query, "count": 150}
+ return self._pagination(endpoint, params)
+
+ @cache(keyarg=1, maxage=3600)
+ def _authenticate_impl(self, category):
+ url = "https://weblogin." + category + ".com/oauth/webtoken"
+ data = {"access_key": self.ACCESS_KEY}
+ headers = {"Referer": self.extractor.root + "/",
+ "Origin" : self.extractor.root}
+ response = self.extractor.request(
+ url, method="POST", headers=headers, json=data)
+ return "Bearer " + response.json()["access_token"]
+
+ def _call(self, endpoint, params=None):
+ url = self.API_ROOT + endpoint
+ self.headers["Authorization"] = self._authenticate_impl(
+ self.extractor.category)
+ return self.extractor.request(
+ url, params=params, headers=self.headers).json()
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+ gfycats = data["gfycats"]
+ yield from gfycats
+
+ if "found" not in data and len(gfycats) < params["count"] or \
+ not data["gfycats"]:
+ return
+ params["cursor"] = data["cursor"]