diff options
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 306 |
1 files changed, 154 insertions, 152 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index eeee74a..604966f 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -29,7 +29,7 @@ BASE_PATTERN = ( class DeviantartExtractor(Extractor): """Base class for deviantart extractors using the OAuth API""" category = "deviantart" - directory_fmt = ("{category}", "{author[username]!l}") + directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" root = "https://www.deviantart.com" @@ -47,6 +47,12 @@ class DeviantartExtractor(Extractor): if self.quality: self.quality = "q_{}".format(self.quality) + if self.original != "image": + self._update_content = self._update_content_default + else: + self._update_content = self._update_content_image + self.original = True + self.commit_journal = { "html": self._commit_journal_html, "text": self._commit_journal_text, @@ -62,6 +68,7 @@ class DeviantartExtractor(Extractor): self.group = not profile if self.group: self.subcategory = "group-" + self.subcategory + self.user = self.user.lower() else: self.user = profile["user"]["username"] @@ -95,8 +102,7 @@ class DeviantartExtractor(Extractor): yield self.commit(deviation, content) elif deviation["is_downloadable"]: - content = {} - self._update_content(deviation, content) + content = self.api.deviation_download(deviation["deviationid"]) yield self.commit(deviation, content) if "videos" in deviation: @@ -127,8 +133,14 @@ class DeviantartExtractor(Extractor): deviation["url"].rpartition("-")[2]) except KeyError: deviation["index"] = 0 + if self.user: deviation["username"] = self.user + deviation["_username"] = self.user.lower() + else: + deviation["username"] = deviation["author"]["username"] + deviation["_username"] = deviation["username"].lower() + deviation["da_category"] = deviation["category"] deviation["published_time"] = text.parse_int( deviation["published_time"]) @@ -238,81 +250,51 @@ class DeviantartExtractor(Extractor): url = "{}/{}/{}/0/".format(self.root, self.user, category) return [(url + folder["name"], folder) for folder in folders] - def _update_content(self, deviation, content): - try: - data = self.api.deviation_extended_fetch( - deviation["index"], - deviation["author"]["username"], - "journal" if "excerpt" in deviation else "art", - ) - download = data["deviation"]["extended"]["download"] - download["src"] = download["url"] - except Exception as e: - self.log.warning( - "Unable to fetch original download URL for ID %s ('%s: %s')", - deviation["index"], e.__class__.__name__, e, - ) - self.log.debug("Server response: %s", data) - else: - if self.original == "image": - url = data["src"].partition("?")[0] - mtype = mimetypes.guess_type(url, False)[0] - if not mtype or not mtype.startswith("image/"): - return - del download["url"] - content.update(download) + def _update_content_default(self, deviation, content): + content.update(self.api.deviation_download(deviation["deviationid"])) + def _update_content_image(self, deviation, content): + data = self.api.deviation_download(deviation["deviationid"]) + url = data["src"].partition("?")[0] + mtype = mimetypes.guess_type(url, False)[0] + if mtype and mtype.startswith("image/"): + content.update(data) -class DeviantartUserExtractor(Extractor): + +class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" - category = "deviantart" subcategory = "user" pattern = BASE_PATTERN + r"/?$" test = ( ("https://www.deviantart.com/shimoda7", { - "options": (("include", "gsjf"),), - "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)", + "pattern": r"/shimoda7/gallery$", + }), + ("https://www.deviantart.com/shimoda7", { + "options": (("include", "all"),), + "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$", "count": 4, }), ("https://shimoda7.deviantart.com/"), ) - def __init__(self, match): - Extractor.__init__(self, match) - self.user = match.group(1) or match.group(2) - - incl = self.config("include") or "g" - if isinstance(incl, list): - incl = "".join(item[0] for item in incl if item) - self.include = incl.lower() - def items(self): - base = "https://www.deviantart.com/{}/".format(self.user) - incl = self.include - data = {} - - if "g" in incl: - data["_extractor"] = DeviantartGalleryExtractor - yield Message.Queue, base + "gallery", data - if "s" in incl: - data["_extractor"] = DeviantartScrapsExtractor - yield Message.Queue, base + "gallery/scraps", data - if "j" in incl: - data["_extractor"] = DeviantartJournalExtractor - yield Message.Queue, base + "posts", data - if "f" in incl: - data["_extractor"] = DeviantartFavoriteExtractor - yield Message.Queue, base + "favourites", data + base = "{}/{}/".format(self.root, self.user) + return self._dispatch_extractors(( + (DeviantartGalleryExtractor , base + "gallery"), + (DeviantartScrapsExtractor , base + "gallery/scraps"), + (DeviantartJournalExtractor , base + "posts"), + (DeviantartFavoriteExtractor, base + "favourites"), + ), ("gallery",)) class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" subcategory = "gallery" - archive_fmt = "g_{username}_{index}.{extension}" + archive_fmt = "g_{_username}_{index}.{extension}" pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(www.deviantart.com/download/\d+/" + "pattern": r"https://(api-da\.wixmp\.com/_api/download/file" r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", "count": ">= 30", "keyword": { @@ -398,7 +380,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor): class DeviantartFolderExtractor(DeviantartExtractor): """Extractor for deviations inside an artist's gallery folder""" subcategory = "folder" - directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}") + directory_fmt = ("{category}", "{username}", "{folder[title]}") archive_fmt = "F_{folder[uuid]}_{index}.{extension}" pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)" test = ( @@ -418,14 +400,19 @@ class DeviantartFolderExtractor(DeviantartExtractor): def __init__(self, match): DeviantartExtractor.__init__(self, match) - self.fname = match.group(4) - self.folder = {"owner": self.user, "index": match.group(3)} + self.folder = None + self.folder_id = match.group(3) + self.folder_name = match.group(4) def deviations(self): folders = self.api.gallery_folders(self.user) - folder = self._find_folder(folders, self.fname) - self.folder["title"] = folder["name"] - self.folder["uuid"] = folder["folderid"] + folder = self._find_folder(folders, self.folder_name) + self.folder = { + "title": folder["name"], + "uuid" : folder["folderid"], + "index": self.folder_id, + "owner": self.user, + } return self.api.gallery(self.user, folder["folderid"], self.offset) def prepare(self, deviation): @@ -440,7 +427,8 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://sta.sh/download/7549925030122512/.+\?token=", + "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f", "count": 1, }), # multiple stash items @@ -450,7 +438,7 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { - "pattern": r"https://sta.sh/download/7800709982190282/.+\?token=", + "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "count": 1, }), ("https://sta.sh/abcdefghijkl", { @@ -468,41 +456,25 @@ class DeviantartStashExtractor(DeviantartExtractor): def deviations(self): url = "https://sta.sh/" + self.stash_id page = self.request(url).text - deviation_id, pos = text.extract(page, '//deviation/', '"') + deviation_id = text.extract(page, '//deviation/', '"')[0] if deviation_id: - deviation = self.api.deviation(deviation_id) - pos = page.find("dev-page-download", pos) - if pos >= 0: - deviation["_download"] = { - "width" : text.parse_int(text.extract( - page, 'data-download_width="' , '"', pos)[0]), - "height": text.parse_int(text.extract( - page, 'data-download_height="', '"', pos)[0]), - "src" : text.unescape(text.extract( - page, 'data-download_url="' , '"', pos)[0]), - } - return (deviation,) + return (self.api.deviation(deviation_id),) + else: data = {"_extractor": DeviantartStashExtractor} - page = text.extract( - page, 'id="stash-body"', 'class="footer"', pos)[0] + page = text.extract(page, 'id="stash-body"', 'class="footer"')[0] return [ (url, data) for url in text.extract_iter(page, '<a href="', '"') ] - def _update_content(self, deviation, content): - if "_download" in deviation: - content.update(deviation["_download"]) - del deviation["_download"] - class DeviantartFavoriteExtractor(DeviantartExtractor): """Extractor for an artist's favorites""" subcategory = "favorite" directory_fmt = ("{category}", "{username}", "Favourites") - archive_fmt = "f_{username}_{index}.{extension}" + archive_fmt = "f_{_username}_{index}.{extension}" pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$" test = ( ("https://www.deviantart.com/h3813067/favourites/", { @@ -530,8 +502,8 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): class DeviantartCollectionExtractor(DeviantartExtractor): """Extractor for a single favorite collection""" subcategory = "collection" - directory_fmt = ("{category}", "{collection[owner]}", - "Favourites", "{collection[title]}") + directory_fmt = ("{category}", "{username}", "Favourites", + "{collection[title]}") archive_fmt = "C_{collection[uuid]}_{index}.{extension}" pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)" test = ( @@ -546,14 +518,19 @@ class DeviantartCollectionExtractor(DeviantartExtractor): def __init__(self, match): DeviantartExtractor.__init__(self, match) - _, _, cid, self.cname = match.groups() - self.collection = {"owner": self.user, "index": cid} + self.collection = None + self.collection_id = match.group(3) + self.collection_name = match.group(4) def deviations(self): folders = self.api.collections_folders(self.user) - folder = self._find_folder(folders, self.cname) - self.collection["title"] = folder["name"] - self.collection["uuid"] = folder["folderid"] + folder = self._find_folder(folders, self.collection_name) + self.collection = { + "title": folder["name"], + "uuid" : folder["folderid"], + "index": self.collection_id, + "owner": self.user, + } return self.api.collections(self.user, folder["folderid"], self.offset) def prepare(self, deviation): @@ -565,7 +542,7 @@ class DeviantartJournalExtractor(DeviantartExtractor): """Extractor for an artist's journals""" subcategory = "journal" directory_fmt = ("{category}", "{username}", "Journal") - archive_fmt = "j_{username}_{index}.{extension}" + archive_fmt = "j_{_username}_{index}.{extension}" pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$" test = ( ("https://www.deviantart.com/angrywhitewanker/posts/journals/", { @@ -635,8 +612,18 @@ class DeviantartPopularExtractor(DeviantartExtractor): class DeviantartExtractorV2(DeviantartExtractor): """Base class for deviantart extractors using the NAPI""" + cookiedomain = ".deviantart.com" + cookienames = ("auth", "auth_secure", "userinfo") + _warning = True def items(self): + if self.original and not self._check_cookies(self.cookienames): + self.original = False + if self._warning: + DeviantartExtractorV2._warning = False + self.log.warning("No session cookies set: " + "Disabling original file downloads.") + yield Message.Version, 1 for deviation in self.deviations(): data = self.api.deviation_extended_fetch( @@ -646,10 +633,14 @@ class DeviantartExtractorV2(DeviantartExtractor): ) if "deviation" not in data: - self.log.warning("Skipping ID %s", deviation["deviationId"]) + self.log.warning("Unable to fetch deviation ID %s", + deviation["deviationId"]) self.log.debug("Server response: %s", data) continue + deviation = self._extract(data) + if not deviation: + continue yield Message.Directory, deviation yield Message.Url, deviation["target"]["src"], deviation @@ -662,13 +653,14 @@ class DeviantartExtractorV2(DeviantartExtractor): def _extract(self, data): deviation = data["deviation"] extended = deviation["extended"] - files = deviation["files"] + media = deviation["media"] del deviation["extended"] - del deviation["files"] + del deviation["media"] # prepare deviation metadata deviation["description"] = extended.get("description", "") - deviation["username"] = self.user.lower() + deviation["username"] = deviation["author"]["username"] + deviation["_username"] = deviation["username"].lower() deviation["stats"] = extended["stats"] deviation["stats"]["comments"] = data["comments"]["total"] deviation["index"] = deviation["deviationId"] @@ -682,53 +674,69 @@ class DeviantartExtractorV2(DeviantartExtractor): ) # extract download target - target = files[-1] + target = media["types"][-1] + src = token = None - if "textContent" in deviation and self.commit_journal: + if "textContent" in deviation: + if not self.commit_journal: + return None journal = deviation["textContent"] journal["html"] = journal["html"]["markup"] - target["src"] = self.commit_journal(deviation, journal)[1] - elif target["type"] == "gif": - pass - elif target["type"] == "video": - # select largest video - target = max( - files, key=lambda x: text.parse_int(x.get("quality", "")[:-1])) - elif target["type"] == "flash": - if target["src"].startswith("https://sandbox.deviantart.com"): - # extract SWF file from "sandbox" - target["src"] = text.extract( - self.request(target["src"]).text, - 'id="sandboxembed" src="', '"', - )[0] - elif "download" in extended: + src = self.commit_journal(deviation, journal)[1] + + elif target["t"] == "gif": + src = target["b"] + token = media["token"][0] + + elif "download" in extended and self.original: target = extended["download"] - target["src"] = target["url"] + src = target["url"] del target["url"] - elif target["src"].startswith("https://images-wixmp-"): - if deviation["index"] <= 790677560: - # https://github.com/r888888888/danbooru/issues/4069 - target["src"] = re.sub( - r"(/f/[^/]+/[^/]+)/v\d+/.*", - r"/intermediary\1", target["src"]) - if self.quality: - target["src"] = re.sub( - r"q_\d+", self.quality, target["src"]) + + elif target["t"] == "video": + # select largest video + target = max(media["types"], + key=lambda x: text.parse_int(x.get("q", "")[:-1])) + src = target["s"] + + elif target["t"] == "flash": + src = target["s"] + if src.startswith("https://sandbox.deviantart.com"): + # extract SWF file from "sandbox" + src = text.extract( + self.request(src).text, 'id="sandboxembed" src="', '"')[0] + + else: + src = media["baseUri"] + if "token" in media: + token = media["token"][0] + + if "c" in target: + src += "/" + target["c"].replace( + "<prettyName>", media["prettyName"]) + if src.startswith("https://images-wixmp-"): + if deviation["index"] <= 790677560: + # https://github.com/r888888888/danbooru/issues/4069 + src = re.sub( + r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src) + if self.quality: + src = re.sub(r"q_\d+", self.quality, src) # filename and extension metadata alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" sub = re.compile(r"\W").sub - deviation["filename"] = target["filename"] = "".join(( + deviation["filename"] = "".join(( sub("_", deviation["title"].lower()), "_by_", sub("_", deviation["author"]["username"].lower()), "-d", util.bencode(deviation["index"], alphabet), )) if "extension" not in deviation: - deviation["extension"] = target["extension"] = ( - text.ext_from_url(target["src"]) - ) - deviation["target"] = target + deviation["extension"] = text.ext_from_url(src) + if token: + src = src + "?token=" + token + target["src"] = src + deviation["target"] = target return deviation @@ -740,19 +748,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): test = ( (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { "options": (("original", 0),), - "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", }), ("https://www.deviantart.com/zzz/art/zzz-1234567890", { "count": 0, }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { - "pattern": (r"https://www.deviantart.com/download/261986576" - r"/[\w-]+\.jpg\?token=\w+&ts=\d+"), + # "pattern": (r"https://www.deviantart.com/download/261986576" + # r"/[\w-]+\.jpg\?token=\w+&ts=\d+"), + "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" + r"/intermediary/f/[^/]+/[^.]+\.jpg") }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/intermediary/f/[^/]+/[^.]+\.jpg$") + r"/intermediary/f/[^/]+/[^.]+\.jpg") }), # wixmp URL rewrite v2 (#369) (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), { @@ -774,20 +784,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", { "url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b", "keyword": { + "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5", + "extension": "mp4", "target": { - "duration": 306, - "extension": "mp4", - "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5", - "filesize": 9963639, - "quality": "1080p", + "d": 306, + "f": 9963639, + "q": "1080p", + "t": "video", "src": str, - "type": "video", }, } }), # archive ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", { - "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar", + # "pattern": r"https://.+deviantart.com/download/763300948/.*rar", + "pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png" }), # swf ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", { @@ -830,7 +841,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): """Extractor for an artist's scraps""" subcategory = "scraps" directory_fmt = ("{category}", "{username}", "Scraps") - archive_fmt = "s_{username}_{index}.{extension}" + archive_fmt = "s_{_username}_{index}.{extension}" pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b" test = ( ("https://www.deviantart.com/shimoda7/gallery/scraps", { @@ -841,14 +852,6 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): ) def deviations(self): - # copy self.session - session = self.session.__class__() - for attr in session.__attrs__: - setattr(session, attr, getattr(self.session, attr, None)) - - # reset cookies in the original session object - self.session.cookies = session.cookies.__class__() - url = self.root + "/_napi/da-user-profile/api/gallery/contents" params = { "username" : self.user, @@ -861,8 +864,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): } while True: - data = self.request( - url, session=session, params=params, headers=headers).json() + data = self.request(url, params=params, headers=headers).json() for obj in data["results"]: yield obj["deviation"] |
