diff options
Diffstat (limited to 'gallery_dl/extractor/imgur.py')
| -rw-r--r-- | gallery_dl/extractor/imgur.py | 269 |
1 files changed, 158 insertions, 111 deletions
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index cb36c30..b1be995 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -10,8 +10,6 @@ from .common import Extractor, Message from .. import text, exception -import itertools -import json BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com" @@ -21,103 +19,89 @@ class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" root = "https://imgur.com" - api_root = "https://api.imgur.com" def __init__(self, match): Extractor.__init__(self, match) + self.api = ImgurAPI(self) self.key = match.group(1) self.mp4 = self.config("mp4", True) - def _extract_data(self, path): - response = self.request(self.root + path, notfound=self.subcategory) - data = json.loads(text.extract( - response.text, "image : ", ",\n")[0]) + def _prepare(self, image): try: - del data["adConfig"] - del data["isAd"] + del image["ad_url"] + del image["ad_type"] + del image["ad_config"] except KeyError: pass - return data - def _prepare(self, image): - image["ext"] = image["ext"].partition("?")[0] - if image["ext"] == ".gif" and ( - (self.mp4 and image["prefer_video"]) or self.mp4 == "always"): - image["ext"] = ".mp4" - url = "https://i.imgur.com/" + image["hash"] + image["ext"] - image["extension"] = image["ext"][1:] + url = image["mp4"] if image["animated"] and self.mp4 else image["link"] + image["date"] = text.parse_timestamp(image["datetime"]) + text.nameext_from_url(url, image) + return url - def _items_apiv3(self, urlfmt): + def _items_queue(self, items): album_ex = ImgurAlbumExtractor image_ex = ImgurImageExtractor - params = { - "IMGURPLATFORM" : "web", - "album_previews": "0", - "client_id" : "546c25a59c58ad7", - } - headers = { - "Origin" : self.root, - "Referer": self.root + "/", - } - yield Message.Version, 1 - - for num in itertools.count(0): - url = urlfmt.format(num) - data = self.request(url, params=params, headers=headers).json() - - for item in data["data"]: - item["_extractor"] = album_ex if item["is_album"] else image_ex - yield Message.Queue, item["link"], item - - if len(data["data"]) < 60: - return + for item in items: + item["_extractor"] = album_ex if item["is_album"] else image_ex + yield Message.Queue, item["link"], item class ImgurImageExtractor(ImgurExtractor): """Extractor for individual images on imgur.com""" subcategory = "image" - filename_fmt = "{category}_{hash}{title:?_//}.{extension}" - archive_fmt = "{hash}" + filename_fmt = "{category}_{id}{title:?_//}.{extension}" + archive_fmt = "{id}" pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?" test = ( ("https://imgur.com/21yMxCS", { "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { - "animated": False, - "datetime": "2016-11-10 14:24:35", - "description": str, - "ext": ".png", - "extension": "png", - "hash": "21yMxCS", - "height": "32", - "is_moderated": False, - "is_safe": False, - "is_viral": 0, - "looping": False, - "mimetype": "image/png", - "name": None, - "prefer_video": False, - "size": 182, - "source": "", - "title": "Test", - "video_host": None, - "video_source": None, - "width": "64", + "account_id" : None, + "account_url" : None, + "animated" : False, + "bandwidth" : int, + "date" : "type:datetime", + "datetime" : 1478787875, + "description" : None, + "edited" : "0", + "extension" : "png", + "favorite" : False, + "filename" : "21yMxCS", + "has_sound" : False, + "height" : 32, + "id" : "21yMxCS", + "in_gallery" : False, + "in_most_viral": False, + "is_ad" : False, + "link" : "https://i.imgur.com/21yMxCS.png", + "nsfw" : False, + "section" : None, + "size" : 182, + "tags" : [], + "title" : "Test", + "type" : "image/png", + "views" : int, + "vote" : None, + "width" : 64, }, }), ("http://imgur.com/0gybAXR", { # gifv/mp4 video "url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7", "content": "a3c080e43f58f55243ab830569ba02309d59abfc", }), + ("https://imgur.com/XFfsmuC", { # missing title in API response (#467) + "keyword": {"title": "Tears are a natural response to irritants"}, + }), ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1' - "url": "73f361b50753ab25da64160aa50bc5d139480d45", + "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e", }), ("https://imgur.com/zzzzzzz", { # not found - "exception": exception.NotFoundError, + "exception": exception.HttpError, }), ("https://www.imgur.com/21yMxCS"), # www ("https://m.imgur.com/21yMxCS"), # mobile @@ -129,7 +113,11 @@ class ImgurImageExtractor(ImgurExtractor): ) def items(self): - image = self._extract_data("/" + self.key) + image = self.api.image(self.key) + if not image["title"]: + page = self.request(self.root + "/" + self.key, fatal=False).text + title = text.extract(page, "<title>", "<")[0] + image["title"] = (title or "").rpartition(" - ")[0].strip() url = self._prepare(image) yield Message.Version, 1 yield Message.Directory, image @@ -139,42 +127,67 @@ class ImgurImageExtractor(ImgurExtractor): class ImgurAlbumExtractor(ImgurExtractor): """Extractor for imgur albums""" subcategory = "album" - directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") - filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" - archive_fmt = "{album[hash]}_{hash}" + directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}") + filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}" + archive_fmt = "{album[id]}_{id}" pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", "keyword": { "album": { - "album_cover": "693j2Kr", - "album_description": None, - "cover": "693j2Kr", - "datetime": "2015-10-09 10:37:50", - "description": None, - "hash": "TcBmP", - "id": "TcBmP", - "is_album": True, - "num_images": "19", - "title": "138", - "title_clean": "TcBmP", - "views": str, + "account_id" : None, + "account_url" : None, + "cover" : "693j2Kr", + "cover_edited": None, + "cover_height": 1400, + "cover_width" : 951, + "date" : "type:datetime", + "datetime" : 1444387070, + "description" : None, + "favorite" : False, + "id" : "TcBmP", + "images_count": 19, + "in_gallery" : False, + "is_ad" : False, + "is_album" : True, + "layout" : "blog", + "link" : "https://imgur.com/a/TcBmP", + "nsfw" : False, + "privacy" : "hidden", + "section" : None, + "title" : "138", + "views" : int, }, - "animated": bool, - "datetime": str, - "extension": str, - "hash": str, - "height": int, - "num": int, - "prefer_video": bool, - "size": int, - "title": str, - "width": int, + "account_id" : None, + "account_url": None, + "animated" : bool, + "bandwidth" : int, + "date" : "type:datetime", + "datetime" : int, + "description": None, + "edited" : "0", + "favorite" : False, + "has_sound" : False, + "height" : int, + "id" : str, + "in_gallery" : False, + "is_ad" : False, + "link" : r"re:https://i\.imgur\.com/\w+\.jpg", + "nsfw" : None, + "num" : int, + "section" : None, + "size" : int, + "tags" : list, + "title" : None, + "type" : "image/jpeg", + "views" : int, + "vote" : None, + "width" : int, }, }), ("https://imgur.com/a/eD9CT", { # large album - "url": "4ee94de31ff26be416271bc0b1ea27b9349c9937", + "url": "de748c181a04d18bef1de9d4f4866ef0a06d632b", }), ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash "url": "695ef0c950023362a0163ee5041796300db76674", @@ -183,21 +196,22 @@ class ImgurAlbumExtractor(ImgurExtractor): "url": "86b4747f8147cec7602f0214e267309af73a8655", }), ("https://imgur.com/a/TcBmQ", { - "exception": exception.NotFoundError, + "exception": exception.HttpError, }), ("https://www.imgur.com/a/TcBmP"), # www ("https://m.imgur.com/a/TcBmP"), # mobile ) def items(self): - album = self._extract_data("/a/" + self.key + "/all") - images = album["album_images"]["images"] - del album["album_images"] + album = self.api.album(self.key) + album["date"] = text.parse_timestamp(album["datetime"]) + images = album["images"] - if int(album["num_images"]) > len(images): - url = "{}/ajaxalbums/getimages/{}/hit.json".format( - self.root, self.key) - images = self.request(url).json()["data"]["images"] + try: + del album["images"] + del album["ad_config"] + except KeyError: + pass yield Message.Version, 1 yield Message.Directory, {"album": album, "count": len(images)} @@ -224,13 +238,11 @@ class ImgurGalleryExtractor(ImgurExtractor): def items(self): url = self.root + "/a/" + self.key with self.request(url, method="HEAD", fatal=False) as response: - code = response.status_code - - if code < 400: - extr = ImgurAlbumExtractor - else: - extr = ImgurImageExtractor - url = self.root + "/" + self.key + if response.status_code < 400: + extr = ImgurAlbumExtractor + else: + extr = ImgurImageExtractor + url = self.root + "/" + self.key yield Message.Version, 1 yield Message.Queue, url, {"_extractor": extr} @@ -251,9 +263,7 @@ class ImgurUserExtractor(ImgurExtractor): ) def items(self): - urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format( - self.api_root, self.key) - return self._items_apiv3(urlfmt) + return self._items_queue(self.api.account_submissions(self.key)) class ImgurFavoriteExtractor(ImgurExtractor): @@ -267,6 +277,43 @@ class ImgurFavoriteExtractor(ImgurExtractor): }) def items(self): - urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format( - self.api_root, self.key) - return self._items_apiv3(urlfmt) + return self._items_queue(self.api.account_favorites(self.key)) + + +class ImgurAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.headers = { + "Authorization": "Client-ID " + extractor.config( + "client-id", "546c25a59c58ad7"), + } + + def account_favorites(self, account): + endpoint = "account/{}/gallery_favorites".format(account) + return self._pagination(endpoint) + + def account_submissions(self, account): + endpoint = "account/{}/submissions".format(account) + return self._pagination(endpoint) + + def album(self, album_hash): + return self._call("album/" + album_hash) + + def image(self, image_hash): + return self._call("image/" + image_hash) + + def _call(self, endpoint): + return self.extractor.request( + "https://api.imgur.com/3/" + endpoint, headers=self.headers, + ).json()["data"] + + def _pagination(self, endpoint): + num = 0 + + while True: + data = self._call("{}/{}".format(endpoint, num)) + if not data: + return + yield from data + num += 1 |
