diff options
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 428 |
1 files changed, 179 insertions, 249 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d6669d1..2dcf0b7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -27,14 +27,15 @@ BASE_PATTERN = ( class DeviantartExtractor(Extractor): - """Base class for deviantart extractors using the OAuth API""" + """Base class for deviantart extractors""" category = "deviantart" directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" cookiedomain = None root = "https://www.deviantart.com" + _last_request = 0 - def __init__(self, match=None): + def __init__(self, match): Extractor.__init__(self, match) self.offset = 0 self.flat = self.config("flat", True) @@ -43,10 +44,10 @@ class DeviantartExtractor(Extractor): self.original = self.config("original", True) self.user = match.group(1) or match.group(2) self.group = False - self.api = DeviantartAPI(self) + self.api = None if self.quality: - self.quality = "q_{}".format(self.quality) + self.quality = ",q_{}".format(self.quality) if self.original != "image": self._update_content = self._update_content_default @@ -64,6 +65,8 @@ class DeviantartExtractor(Extractor): return num def items(self): + self.api = DeviantartOAuthAPI(self) + if self.user: profile = self.api.user_profile(self.user) self.group = not profile @@ -95,12 +98,12 @@ class DeviantartExtractor(Extractor): # https://github.com/r888888888/danbooru/issues/4069 intermediary, count = re.subn( r"(/f/[^/]+/[^/]+)/v\d+/.*", - r"/intermediary\1", content["src"]) + r"/intermediary\1", content["src"], 1) if count and self._check_url(intermediary): content["src"] = intermediary if self.quality: content["src"] = re.sub( - r"q_\d+", self.quality, content["src"]) + r",q_\d+", self.quality, content["src"], 1) yield self.commit(deviation, content) @@ -266,6 +269,23 @@ class DeviantartExtractor(Extractor): def _check_url(self, url): return self.request(url, method="HEAD", fatal=False).status_code < 400 + def _limited_request(self, url, **kwargs): + """Limits HTTP requests to one every 2 seconds""" + kwargs["fatal"] = None + diff = time.time() - DeviantartExtractor._last_request + if diff < 2.0: + delay = 2.0 - diff + self.log.debug("Sleeping %.2f seconds", delay) + time.sleep(delay) + + while True: + response = self.request(url, **kwargs) + if response.status_code != 403 or \ + b"Request blocked." not in response.content: + DeviantartExtractor._last_request = time.time() + return response + self.wait(seconds=180) + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -293,6 +313,9 @@ class DeviantartUserExtractor(DeviantartExtractor): ), ("gallery",)) +############################################################################### +# OAuth ####################################################################### + class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" subcategory = "gallery" @@ -439,7 +462,7 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # multiple stash items ("https://sta.sh/21jf51j7pzl2", { - "pattern": pattern, + "options": (("original", False),), "count": 4, }), # downloadable, but no "content" field (#307) @@ -447,8 +470,13 @@ class DeviantartStashExtractor(DeviantartExtractor): "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "count": 1, }), + # mixed folders and images (#659) + ("https://sta.sh/215twi387vfj", { + "options": (("original", False),), + "count": 4, + }), ("https://sta.sh/abcdefghijkl", { - "exception": exception.HttpError, + "count": 0, }), ) @@ -459,21 +487,31 @@ class DeviantartStashExtractor(DeviantartExtractor): self.user = None self.stash_id = match.group(1) - def deviations(self): - url = "https://sta.sh/" + self.stash_id - page = self.request(url).text - deviation_id = text.extract(page, '//deviation/', '"')[0] + def deviations(self, stash_id=None): + if stash_id is None: + stash_id = self.stash_id + url = "https://sta.sh/" + stash_id + page = self._limited_request(url).text - if deviation_id: - return (self.api.deviation(deviation_id),) + if stash_id[0] == "0": + uuid = text.extract(page, '//deviation/', '"')[0] + if uuid: + yield self.api.deviation(uuid) + return - else: - data = {"_extractor": DeviantartStashExtractor} - page = text.extract(page, 'id="stash-body"', 'class="footer"')[0] - return [ - (url, data) - for url in text.extract_iter(page, '<a href="', '"') - ] + for item in text.extract_iter( + page, 'class="stash-thumb-container', '</div>'): + url = text.extract(item, '<a href="', '"')[0] + + if url: + stash_id = url.rpartition("/")[2] + else: + stash_id = text.extract(item, 'gmi-stashid="', '"')[0] + stash_id = "2" + util.bencode(text.parse_int( + stash_id), "0123456789abcdefghijklmnopqrstuvwxyz") + + if len(stash_id) > 2: + yield from self.deviations(stash_id) class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -635,148 +673,10 @@ class DeviantartPopularExtractor(DeviantartExtractor): deviation["popular"] = self.popular -class DeviantartExtractorV2(DeviantartExtractor): - """Base class for deviantart extractors using the NAPI""" - cookiedomain = ".deviantart.com" - cookienames = ("auth", "auth_secure", "userinfo") - _warning = True - - def items(self): - if self.original and not self._check_cookies(self.cookienames): - self.original = False - if self._warning: - DeviantartExtractorV2._warning = False - self.log.warning("No session cookies set: " - "Disabling original file downloads.") - - yield Message.Version, 1 - for deviation in self.deviations(): - data = self.api.deviation_extended_fetch( - deviation["deviationId"], - deviation["author"]["username"], - "journal" if deviation["isJournal"] else "art", - ) - - if "deviation" not in data: - self.log.warning("Unable to fetch deviation ID %s", - deviation["deviationId"]) - self.log.debug("Server response: %s", data) - continue - - deviation = self._extract(data) - if not deviation: - continue - - yield Message.Directory, deviation - yield Message.Url, deviation["target"]["src"], deviation - if self.extra: - for match in DeviantartStashExtractor.pattern.finditer( - deviation["description"]): - deviation["_extractor"] = DeviantartStashExtractor - yield Message.Queue, match.group(0), deviation - - def _extract(self, data): - deviation = data["deviation"] - extended = deviation["extended"] - media = deviation["media"] - del deviation["extended"] - del deviation["media"] - - # prepare deviation metadata - deviation["description"] = extended.get("description", "") - deviation["username"] = deviation["author"]["username"] - deviation["_username"] = deviation["username"].lower() - deviation["stats"] = extended["stats"] - deviation["stats"]["comments"] = data["comments"]["total"] - deviation["index"] = deviation["deviationId"] - deviation["tags"] = [t["name"] for t in extended.get("tags") or ()] - deviation["date"] = text.parse_datetime( - deviation["publishedTime"]) - deviation["category_path"] = "/".join( - extended[key]["displayNameEn"] - for key in ("typeFacet", "contentFacet", "categoryFacet") - if key in extended - ) - - # extract download target - target = media["types"][-1] - src = token = None - - if "textContent" in deviation: - if not self.commit_journal: - return None - journal = deviation["textContent"] - journal["html"] = journal["html"]["markup"] - src = self.commit_journal(deviation, journal)[1] - - elif target["t"] == "gif": - src = target["b"] - token = media["token"][0] - - elif "download" in extended and self.original: - target = extended["download"] - src = target["url"] - del target["url"] - - elif target["t"] == "video": - # select largest video - target = max(media["types"], - key=lambda x: text.parse_int(x.get("q", "")[:-1])) - src = target["b"] - - elif target["t"] == "flash": - src = target["s"] - if src.startswith("https://sandbox.deviantart.com"): - # extract SWF file from "sandbox" - src = text.extract( - self.request(src).text, 'id="sandboxembed" src="', '"')[0] - - else: - src = media["baseUri"] - if "token" in media: - token = media["token"][0] - - if "c" in target: - src += "/" + target["c"].replace( - "<prettyName>", media["prettyName"]) - if src.startswith("https://images-wixmp-"): - if deviation["index"] <= 790677560: - # https://github.com/r888888888/danbooru/issues/4069 - intermediary, count = re.subn( - r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src) - if count and self._check_url(intermediary): - src = intermediary - if self.quality: - src = re.sub(r"q_\d+", self.quality, src) - - # filename and extension metadata - alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" - sub = re.compile(r"\W").sub - deviation["filename"] = "".join(( - sub("_", deviation["title"].lower()), "_by_", - sub("_", deviation["author"]["username"].lower()), "-d", - util.bencode(deviation["index"], alphabet), - )) - if "extension" not in deviation: - deviation["extension"] = text.ext_from_url(src) - - if token: - src = src + "?token=" + token - target["src"] = src - deviation["target"] = target - return deviation - - def _pagination(self, url, params, headers=None): - while True: - data = self.request(url, params=params, headers=headers).json() - yield from data["results"] +############################################################################### +# Eclipse ##################################################################### - if not data["hasMore"]: - return - params["offset"] = data["nextOffset"] - - -class DeviantartDeviationExtractor(DeviantartExtractorV2): +class DeviantartDeviationExtractor(DeviantartExtractor): """Extractor for single deviations""" subcategory = "deviation" archive_fmt = "{index}.{extension}" @@ -784,16 +684,13 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): test = ( (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { "options": (("original", 0),), - # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", }), ("https://www.deviantart.com/zzz/art/zzz-1234567890", { - "count": 0, + "exception": exception.NotFoundError, }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { - # "pattern": (r"https://www.deviantart.com/download/261986576" - # r"/[\w-]+\.jpg\?token=\w+&ts=\d+"), - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/intermediary/f/[^/]+/[^.]+\.jpg") + "pattern": r"https://api-da\.wixmp\.com/_api/download/file", }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { @@ -809,10 +706,10 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" r"/f/[^/]+/[^.]+\.gif\?token="), }), - # external URLs from description (#302) + # sta.sh URLs from description (#302) (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), { "options": (("extra", 1), ("original", 0)), - "pattern": r"https?://sta\.sh/\w+$", + "pattern": DeviantartStashExtractor.pattern, "range": "2-", "count": 4, }), @@ -823,33 +720,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5", "extension": "mp4", "target": { - "d": 306, - "f": 19367585, - "h": 720, - "q": "720p", - "t": "video", - "w": 1364, + "duration": 306, + "filesize": 19367585, + "quality": "720p", "src": str, }, } }), - # archive - ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", { - # "pattern": r"https://.+deviantart.com/download/763300948/.*rar", - "pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png" - }), - # swf - ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", { - "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf", - }), # journal ("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", { - "url": "f33f8127ab71819be7de849175b6d5f8b37bb629", + "url": "d34b2c9f873423e665a1b8ced20fcb75951694a3", "pattern": "text:<!DOCTYPE html>\n", }), # journal-like post with isJournal == False (#419) ("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", { - "url": "1534d6ea0561247ab921d07505e57a9d663a833b", + "url": "e2e0044bd255304412179b6118536dbd9bb3bb0e", "pattern": "text:<!DOCTYPE html>\n", }), # old-style URLs @@ -863,19 +748,20 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): skip = Extractor.skip def __init__(self, match): - DeviantartExtractorV2.__init__(self, match) + DeviantartExtractor.__init__(self, match) self.type = match.group(3) self.deviation_id = match.group(4) def deviations(self): - return ({ - "deviationId": self.deviation_id, - "author" : {"username": self.user}, - "isJournal" : self.type == "journal", - },) + deviation = DeviantartEclipseAPI(self).deviation_extended_fetch( + self.deviation_id, self.user, self.type) + if "error" in deviation: + raise exception.NotFoundError("deviation") + return (self.api.deviation( + deviation["deviation"]["extended"]["deviationUuid"]),) -class DeviantartScrapsExtractor(DeviantartExtractorV2): +class DeviantartScrapsExtractor(DeviantartExtractor): """Extractor for an artist's scraps""" subcategory = "scraps" directory_fmt = ("{category}", "{username}", "Scraps") @@ -888,24 +774,31 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"), ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), ) + cookiedomain = ".deviantart.com" + cookienames = ("auth", "auth_secure", "userinfo") + _warning = True def deviations(self): - url = self.root + "/_napi/da-user-profile/api/gallery/contents" - params = { - "username" : self.user, - "offset" : self.offset, - "limit" : "24", - "scraps_folder": "true", - } - headers = { - "Referer": "{}/{}/gallery/scraps".format(self.root, self.user), - } + eclipse_api = DeviantartEclipseAPI(self) + if self._warning: + DeviantartScrapsExtractor._warning = False + if not self._check_cookies(self.cookienames): + self.log.warning( + "No session cookies set: Unable to fetch mature scraps.") + + for obj in eclipse_api.gallery_scraps(self.user, self.offset): + deviation = obj["deviation"] + deviation_uuid = eclipse_api.deviation_extended_fetch( + deviation["deviationId"], + deviation["author"]["username"], + "journal" if deviation["isJournal"] else "art", + )["deviation"]["extended"]["deviationUuid"] - for obj in self._pagination(url, params, headers): - yield obj["deviation"] + yield self.api.deviation(deviation_uuid) -class DeviantartFollowingExtractor(DeviantartExtractorV2): +class DeviantartFollowingExtractor(DeviantartExtractor): + """Extractor for user's watched users""" subcategory = "following" pattern = BASE_PATTERN + "/about#watching$" test = ("https://www.deviantart.com/shimoda7/about#watching", { @@ -915,30 +808,19 @@ class DeviantartFollowingExtractor(DeviantartExtractorV2): }) def items(self): - url = "{}/_napi/da-user-profile/api/module/watching".format(self.root) - params = { - "username": self.user, - "moduleid": self._module_id(self.user), - "offset" : "0", - "limit" : "24", - } + eclipse_api = DeviantartEclipseAPI(self) yield Message.Version, 1 - for user in self._pagination(url, params): + for user in eclipse_api.user_watching(self.user, self.offset): url = "{}/{}".format(self.root, user["username"]) yield Message.Queue, url, user - def _module_id(self, username): - url = "{}/{}/about".format(self.root, username) - page = self.request(url).text - pos = page.find('\\"type\\":\\"watching\\"') - if pos < 0: - raise exception.NotFoundError("module") - return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') +############################################################################### +# API Interfaces ############################################################## -class DeviantartAPI(): - """Minimal interface for the DeviantArt API +class DeviantartOAuthAPI(): + """Interface for the DeviantArt OAuth API Ref: https://www.deviantart.com/developers/http/v1/20160316 """ @@ -1029,31 +911,6 @@ class DeviantartAPI(): params = {"mature_content": self.mature} return self._call(endpoint, params) - def deviation_extended_fetch(self, deviation_id, user, kind): - url = ("https://www.deviantart.com/_napi/da-browse/shared_api" - "/deviation/extended_fetch") - headers = {"Referer": "https://www.deviantart.com/"} - params = { - "deviationid" : deviation_id, - "username" : user, - "type" : kind, - "include_session": "false", - } - response = self.extractor.request( - url, headers=headers, params=params, fatal=None) - code = response.status_code - - if code == 404: - raise exception.StopExtraction( - "Your account must use the Eclipse interface.") - elif code == 403 and b"Request blocked." in response.content: - raise exception.StopExtraction( - "Requests to deviantart.com blocked due to too much traffic.") - try: - return response.json() - except Exception: - return {"error": response.text} - def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" if not deviations: @@ -1225,11 +1082,84 @@ class DeviantartAPI(): return dmap +class DeviantartEclipseAPI(): + """Interface to the DeviantArt Eclipse API""" + + def __init__(self, extractor): + self.extractor = extractor + self.log = extractor.log + + def deviation_extended_fetch(self, deviation_id, user=None, kind=None): + endpoint = "da-browse/shared_api/deviation/extended_fetch" + params = { + "deviationid" : deviation_id, + "username" : user, + "type" : kind, + "include_session": "false", + } + return self._call(endpoint, params) + + def gallery_scraps(self, user, offset=None): + endpoint = "da-user-profile/api/gallery/contents" + params = { + "username" : user, + "offset" : offset, + "limit" : "24", + "scraps_folder": "true", + } + return self._pagination(endpoint, params) + + def user_watching(self, user, offset=None): + endpoint = "da-user-profile/api/module/watching" + params = { + "username": user, + "moduleid": self._module_id_watching(user), + "offset" : None, + "limit" : "24", + } + return self._pagination(endpoint, params) + + def _call(self, endpoint, params=None): + url = "https://www.deviantart.com/_napi/" + endpoint + headers = {"Referer": "https://www.deviantart.com/"} + + response = self.extractor._limited_request( + url, params=params, headers=headers, fatal=None) + + if response.status_code == 404: + raise exception.StopExtraction( + "Your account must use the Eclipse interface.") + try: + return response.json() + except Exception: + return {"error": response.text} + + def _pagination(self, endpoint, params=None): + while True: + data = self._call(endpoint, params) + yield from data["results"] + + if not data["hasMore"]: + return + params["offset"] = data["nextOffset"] + + def _module_id_watching(self, user): + url = "{}/{}/about".format(self.extractor.root, user) + page = self.extractor._limited_request(url).text + pos = page.find('\\"type\\":\\"watching\\"') + if pos < 0: + raise exception.NotFoundError("module") + return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') + + @cache(maxage=10*365*24*3600, keyarg=0) def _refresh_token_cache(original_token, new_token=None): return new_token or original_token +############################################################################### +# Journal Formats ############################################################# + SHADOW_TEMPLATE = """ <span class="shadow"> <img src="{src}" class="smshadow" width="{width}" height="{height}"> |
