diff options
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 259 |
1 files changed, 211 insertions, 48 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a3187fa..37475df 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.deviantart.com/""" +"""Extractors for https://www.deviantart.com/""" from .common import Extractor, Message from .. import text, util, exception @@ -21,29 +21,30 @@ import re BASE_PATTERN = ( r"(?:https?://)?(?:" - r"(?:www\.)?deviantart\.com/(?!watch/)([\w-]+)|" - r"(?!www\.)([\w-]+)\.deviantart\.com)" + r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|" + r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)" ) class DeviantartExtractor(Extractor): """Base class for deviantart extractors""" category = "deviantart" + root = "https://www.deviantart.com" directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" cookiedomain = None - root = "https://www.deviantart.com" + cookienames = ("auth", "auth_secure", "userinfo") _last_request = 0 def __init__(self, match): Extractor.__init__(self, match) - self.offset = 0 self.flat = self.config("flat", True) self.extra = self.config("extra", False) self.original = self.config("original", True) self.comments = self.config("comments", False) self.user = match.group(1) or match.group(2) self.group = False + self.offset = 0 self.api = None unwatch = self.config("auto-unwatch") @@ -69,6 +70,14 @@ class DeviantartExtractor(Extractor): self.offset += num return num + def login(self): + if not self._check_cookies(self.cookienames): + username, password = self._get_auth_info() + if not username: + return False + self._update_cookies(_login_impl(self, username, password)) + return True + def items(self): self.api = DeviantartOAuthAPI(self) @@ -87,6 +96,13 @@ class DeviantartExtractor(Extractor): yield Message.Queue, url, data continue + if deviation["is_deleted"]: + # prevent crashing in case the deviation really is + # deleted + self.log.debug( + "Skipping %s (deleted)", deviation["deviationid"]) + continue + if "premium_folder_data" in deviation: data = self._fetch_premium(deviation) if not data: @@ -346,9 +362,7 @@ class DeviantartExtractor(Extractor): kwargs["fatal"] = None diff = time.time() - DeviantartExtractor._last_request if diff < 2.0: - delay = 2.0 - diff - self.log.debug("Sleeping %.2f seconds", delay) - time.sleep(delay) + self.sleep(2.0 - diff, "request") while True: response = self.request(url, **kwargs) @@ -406,6 +420,16 @@ class DeviantartExtractor(Extractor): self.log.info("Unwatching %s", username) self.api.user_friends_unwatch(username) + def _eclipse_to_oauth(self, eclipse_api, deviations): + for obj in deviations: + deviation = obj["deviation"] if "deviation" in obj else obj + deviation_uuid = eclipse_api.deviation_extended_fetch( + deviation["deviationId"], + deviation["author"]["username"], + "journal" if deviation["isJournal"] else "art", + )["deviation"]["extended"]["deviationUuid"] + yield self.api.deviation(deviation_uuid) + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -676,15 +700,9 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): ) def deviations(self): - folders = self.api.collections_folders(self.user) if self.flat: - deviations = itertools.chain.from_iterable( - self.api.collections(self.user, folder["folderid"]) - for folder in folders - ) - if self.offset: - deviations = util.advance(deviations, self.offset) - return deviations + return self.api.collections_all(self.user, self.offset) + folders = self.api.collections_folders(self.user) return self._folder_urls( folders, "favourites", DeviantartCollectionExtractor) @@ -796,6 +814,14 @@ class DeviantartStatusExtractor(DeviantartExtractor): "url" : "re:^https://sta.sh", }, }), + # "deleted" deviations in 'items' + ("https://www.deviantart.com/AndrejSKalin/posts/statuses", { + "options": (("journals", "none"), ("original", 0), + ("image-filter", "deviationid[:8] == '147C8B03'")), + "count": 2, + "archive": False, + "keyword": {"deviationid": "147C8B03-7D34-AE93-9241-FA3C6DBBC655"} + }), ("https://www.deviantart.com/justgalym/posts/statuses", { "options": (("journals", "text"),), "url": "c8744f7f733a3029116607b826321233c5ca452d", @@ -861,8 +887,7 @@ class DeviantartPopularExtractor(DeviantartExtractor): "{popular[range]}", "{popular[search]}") archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}" pattern = (r"(?:https?://)?www\.deviantart\.com/(?:" - r"search(?:/deviations)?" - r"|(?:deviations/?)?\?order=(popular-[^/?#]+)" + r"(?:deviations/?)?\?order=(popular-[^/?#]+)" r"|((?:[\w-]+/)*)(popular-[^/?#]+)" r")/?(?:\?([^#]*))?") test = ( @@ -876,8 +901,6 @@ class DeviantartPopularExtractor(DeviantartExtractor): "range": "1-30", "count": 30, }), - ("https://www.deviantart.com/search?q=tree"), - ("https://www.deviantart.com/search/deviations?order=popular-1-week"), ("https://www.deviantart.com/artisan/popular-all-time/?q=tree"), ) @@ -974,7 +997,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): subcategory = "deviation" archive_fmt = "g_{_username}_{index}.{extension}" pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)" - r"|(?:https?://)?(?:www\.)?deviantart\.com/" + r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/" r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)" r"(\d+)" # bare deviation ID without slug r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36 @@ -1068,6 +1091,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): # old /view/ URLs from the Wayback Machine ("https://www.deviantart.com/view.php?id=14864502"), ("http://www.deviantart.com/view-full.php?id=100842"), + + ("https://www.fxdeviantart.com/zzz/art/zzz-1234567890"), + ("https://www.fxdeviantart.com/view/1234567890"), ) skip = Extractor.skip @@ -1094,6 +1120,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor): subcategory = "scraps" directory_fmt = ("{category}", "{username}", "Scraps") archive_fmt = "s_{_username}_{index}.{extension}" + cookiedomain = ".deviantart.com" pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b" test = ( ("https://www.deviantart.com/shimoda7/gallery/scraps", { @@ -1102,34 +1129,109 @@ class DeviantartScrapsExtractor(DeviantartExtractor): ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"), ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), ) + + def deviations(self): + self.login() + + eclipse_api = DeviantartEclipseAPI(self) + return self._eclipse_to_oauth( + eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset)) + + +class DeviantartSearchExtractor(DeviantartExtractor): + """Extractor for deviantart search results""" + subcategory = "search" + directory_fmt = ("{category}", "Search", "{search_tags}") + archive_fmt = "Q_{search_tags}_{index}.{extension}" cookiedomain = ".deviantart.com" - cookienames = ("auth", "auth_secure", "userinfo") - _warning = True + pattern = (r"(?:https?://)?www\.deviantart\.com" + r"/search(?:/deviations)?/?\?([^#]+)") + test = ( + ("https://www.deviantart.com/search?q=tree"), + ("https://www.deviantart.com/search/deviations?order=popular-1-week"), + ) + + skip = Extractor.skip + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.query = text.parse_query(self.user) + self.search = self.query.get("q", "") + self.user = "" + + def deviations(self): + logged_in = self.login() + + eclipse_api = DeviantartEclipseAPI(self) + search = (eclipse_api.search_deviations + if logged_in else self._search_html) + return self._eclipse_to_oauth(eclipse_api, search(self.query)) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["search_tags"] = self.search + + def _search_html(self, params): + url = self.root + "/search" + deviation = { + "deviationId": None, + "author": {"username": "u"}, + "isJournal": False, + } + + while True: + page = self.request(url, params=params).text + + items , pos = text.rextract(page, r'\"items\":[', ']') + cursor, pos = text.extract(page, r'\"cursor\":\"', '\\', pos) + + for deviation_id in items.split(","): + deviation["deviationId"] = deviation_id + yield deviation + + if not cursor: + return + params["cursor"] = cursor + + +class DeviantartGallerySearchExtractor(DeviantartExtractor): + """Extractor for deviantart gallery searches""" + subcategory = "gallery-search" + archive_fmt = "g_{_username}_{index}.{extension}" + cookiedomain = ".deviantart.com" + pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)" + test = ( + ("https://www.deviantart.com/shimoda7/gallery?q=memory", { + "options": (("original", 0),), + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + }), + ("https://www.deviantart.com/shimoda7/gallery?q=memory&sort=popular"), + ) + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.query = match.group(3) def deviations(self): self.login() eclipse_api = DeviantartEclipseAPI(self) - for obj in eclipse_api.gallery_scraps(self.user, self.offset): - deviation = obj["deviation"] - deviation_uuid = eclipse_api.deviation_extended_fetch( - deviation["deviationId"], - deviation["author"]["username"], - "journal" if deviation["isJournal"] else "art", - )["deviation"]["extended"]["deviationUuid"] + info = eclipse_api.user_info(self.user) - yield self.api.deviation(deviation_uuid) + query = text.parse_query(self.query) + self.search = query["q"] - def login(self): - """Login and obtain session cookies""" - if not self._check_cookies(self.cookienames): - username, password = self._get_auth_info() - if username: - self._update_cookies(_login_impl(self, username, password)) - elif self._warning: - self.log.warning( - "No session cookies set: Unable to fetch mature scraps.") - DeviantartScrapsExtractor._warning = False + return self._eclipse_to_oauth( + eclipse_api, eclipse_api.galleries_search( + info["user"]["userId"], + self.search, + self.offset, + query.get("sort", "most-recent"), + )) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["search_tags"] = self.search class DeviantartFollowingExtractor(DeviantartExtractor): @@ -1261,6 +1363,13 @@ class DeviantartOAuthAPI(): "mature_content": self.mature} return self._pagination(endpoint, params) + def collections_all(self, username, offset=0): + """Yield all deviations in a user's collection""" + endpoint = "/collections/all" + params = {"username": username, "offset": offset, "limit": 24, + "mature_content": self.mature} + return self._pagination(endpoint, params) + @memcache(keyarg=1) def collections_folders(self, username, offset=0): """Yield all collection folders of a specific user""" @@ -1411,7 +1520,7 @@ class DeviantartOAuthAPI(): while True: if self.delay: - time.sleep(self.delay) + self.extractor.sleep(self.delay, "api") self.authenticate(None if public else self.refresh_token_key) kwargs["headers"] = self.headers @@ -1480,6 +1589,15 @@ class DeviantartOAuthAPI(): self._metadata(results) if self.folders: self._folders(results) + else: # attempt to fix "deleted" deviations + for dev in self._shared_content(results): + if not dev["is_deleted"]: + continue + patch = self._call( + "/deviation/" + dev["deviationid"], fatal=False) + if patch: + dev.update(patch) + yield from results if not data["has_more"] and ( @@ -1497,6 +1615,14 @@ class DeviantartOAuthAPI(): return params["offset"] = int(params["offset"]) + len(results) + @staticmethod + def _shared_content(results): + """Return an iterable of shared deviations in 'results'""" + for result in results: + for item in result.get("items") or (): + if "deviation" in item: + yield item["deviation"] + def _pagination_list(self, endpoint, params, key="results"): result = [] result.extend(self._pagination(endpoint, params, False, key=key)) @@ -1585,6 +1711,29 @@ class DeviantartEclipseAPI(): } return self._pagination(endpoint, params) + def galleries_search(self, user_id, query, + offset=None, order="most-recent"): + endpoint = "/shared_api/galleries/search" + params = { + "userid": user_id, + "order" : order, + "q" : query, + "offset": offset, + "limit" : 24, + } + return self._pagination(endpoint, params) + + def search_deviations(self, params): + endpoint = "/da-browse/api/networkbar/search/deviations" + return self._pagination(endpoint, params, key="deviations") + + def user_info(self, user, expand=False): + endpoint = "/shared_api/user/info" + params = {"username": user} + if expand: + params["expand"] = "user.stats,user.profile,user.watch" + return self._call(endpoint, params) + def user_watching(self, user, offset=None): endpoint = "/da-user-profile/api/module/watching" params = { @@ -1611,23 +1760,37 @@ class DeviantartEclipseAPI(): except Exception: return {"error": response.text} - def _pagination(self, endpoint, params): + def _pagination(self, endpoint, params, key="results"): + limit = params.get("limit", 24) + warn = True + while True: data = self._call(endpoint, params) - results = data.get("results") + results = data.get(key) if results is None: return + if len(results) < limit and warn and data.get("hasMore"): + warn = False + self.log.warning( + "Private deviations detected! " + "Provide login credentials or session cookies " + "to be able to access them.") yield from results if not data.get("hasMore"): return - next_offset = data.get("nextOffset") - if next_offset: - params["offset"] = next_offset + if "nextCursor" in data: + params["offset"] = None + params["cursor"] = data["nextCursor"] + elif "nextOffset" in data: + params["offset"] = data["nextOffset"] + params["cursor"] = None + elif params.get("offset") is None: + return else: - params["offset"] += params["limit"] + params["offset"] = int(params["offset"]) + len(results) def _module_id_watching(self, user): url = "{}/{}/about".format(self.extractor.root, user) |
