diff options
| author | 2019-08-26 19:34:45 -0400 | |
|---|---|---|
| committer | 2019-08-26 19:34:45 -0400 | |
| commit | b75d158d014d6c43d7d785c46c9372a9cf84d144 (patch) | |
| tree | 7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d /gallery_dl/extractor/deviantart.py | |
| parent | 64ad8e7bd15df71ab1116eede414558631bcad32 (diff) | |
New upstream version 1.10.2upstream/1.10.2
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 394 |
1 files changed, 266 insertions, 128 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 63e2913..bd1299b 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -27,7 +27,7 @@ BASE_PATTERN = ( class DeviantartExtractor(Extractor): - """Base class for deviantart extractors""" + """Base class for deviantart extractors using the OAuth API""" category = "deviantart" directory_fmt = ("{category}", "{author[username]!l}") filename_fmt = "{category}_{index}_{title}.{extension}" @@ -38,11 +38,15 @@ class DeviantartExtractor(Extractor): self.offset = 0 self.flat = self.config("flat", True) self.extra = self.config("extra", False) + self.quality = self.config("quality", "100") self.original = self.config("original", True) self.user = match.group(1) or match.group(2) self.group = False self.api = DeviantartAPI(self) + if self.quality: + self.quality = "q_{}".format(self.quality) + if self.original != "image": self._update_content = self._update_content_default else: @@ -81,12 +85,15 @@ class DeviantartExtractor(Extractor): text.ext_from_url(content["src"]) != "gif": self._update_content(deviation, content) - if deviation["index"] <= 790677560 and \ - content["src"].startswith("https://images-wixmp-"): - # https://github.com/r888888888/danbooru/issues/4069 - content["src"] = re.sub( - r"(/f/[^/]+/[^/]+)/v\d+/.*", - r"/intermediary\1", content["src"]) + if content["src"].startswith("https://images-wixmp-"): + if deviation["index"] <= 790677560: + # https://github.com/r888888888/danbooru/issues/4069 + content["src"] = re.sub( + r"(/f/[^/]+/[^/]+)/v\d+/.*", + r"/intermediary\1", content["src"]) + if self.quality: + content["src"] = re.sub( + r"q_\d+", self.quality, content["src"]) yield self.commit(deviation, content) @@ -133,8 +140,16 @@ class DeviantartExtractor(Extractor): @staticmethod def commit(deviation, target): url = target["src"] - deviation["target"] = text.nameext_from_url(url, target.copy()) - deviation["extension"] = deviation["target"]["extension"] + thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url + target = text.nameext_from_url(thumb, target.copy()) + if target["filename"].endswith("-150"): + target["filename"] = target["filename"][:-4] + if not target["filename"].count("-"): + name, _, hid = target["filename"].rpartition("_") + target["filename"] = name + "-" + hid + deviation["target"] = target + deviation["filename"] = target["filename"] + deviation["extension"] = target["extension"] = text.ext_from_url(url) return Message.Url, url, deviation def _commit_journal_html(self, deviation, journal): @@ -225,14 +240,6 @@ class DeviantartExtractor(Extractor): if mtype and mtype.startswith("image/"): content.update(data) - def _html_request(self, url, **kwargs): - cookies = {"userinfo": ( - '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0' - 'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU' - ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}' - )} - return self.request(url, cookies=cookies, **kwargs) - class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" @@ -360,68 +367,6 @@ class DeviantartFolderExtractor(DeviantartExtractor): deviation["folder"] = self.folder -class DeviantartDeviationExtractor(DeviantartExtractor): - """Extractor for single deviations""" - subcategory = "deviation" - archive_fmt = "{index}.{extension}" - pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)" - test = ( - (("https://www.deviantart.com/shimoda7/art/" - "For-the-sake-of-a-memory-10073852"), { - "options": (("original", 0),), - "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", - }), - ("https://www.deviantart.com/zzz/art/zzz-1234567890", { - "exception": exception.NotFoundError, - }), - (("https://www.deviantart.com/myria-moon/art/" - "Aime-Moi-part-en-vadrouille-261986576"), { - "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\." - r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"), - }), - # wixmp URL rewrite - (("https://www.deviantart.com/citizenfresh/art/" - "Hverarond-14-the-beauty-of-the-earth-789295466"), { - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/intermediary/f/[^/]+/[^.]+\.jpg$") - }), - # non-download URL for GIFs (#242) - (("https://www.deviantart.com/skatergators/art/" - "COM-Monique-Model-781571783"), { - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/f/[^/]+/[^.]+\.gif\?token="), - }), - # external URLs from description (#302) - (("https://www.deviantart.com/uotapo/art/" - "INANAKI-Memorial-Humane7-590297498"), { - "options": (("extra", 1), ("original", 0)), - "pattern": r"https?://sta\.sh/\w+$", - "range": "2-", - "count": 4, - }), - # old-style URLs - ("https://shimoda7.deviantart.com" - "/art/For-the-sake-of-a-memory-10073852"), - ("https://myria-moon.deviantart.com" - "/art/Aime-Moi-part-en-vadrouille-261986576"), - ("https://zzz.deviantart.com/art/zzz-1234567890"), - ) - - skip = Extractor.skip - - def __init__(self, match): - DeviantartExtractor.__init__(self, match) - self.path = match.group(3) - - def deviations(self): - url = "{}/{}/{}".format(self.root, self.user, self.path) - response = self._html_request(url, fatal=False) - deviation_id = text.extract(response.text, '//deviation/', '"')[0] - if response.status_code >= 400 or not deviation_id: - raise exception.NotFoundError("image") - return (self.api.deviation(deviation_id),) - - class DeviantartStashExtractor(DeviantartExtractor): """Extractor for sta.sh-ed deviations""" subcategory = "stash" @@ -558,54 +503,6 @@ class DeviantartJournalExtractor(DeviantartExtractor): return self.api.browse_user_journals(self.user, self.offset) -class DeviantartScrapsExtractor(DeviantartExtractor): - """Extractor for an artist's scraps""" - subcategory = "scraps" - directory_fmt = ("{category}", "{username}", "Scraps") - archive_fmt = "s_{username}_{index}.{extension}" - pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b" - test = ( - ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", { - "count": 12, - "options": (("original", False),), - }), - ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), - ) - - def deviations(self): - url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user) - page = self._html_request(url).text - csrf, pos = text.extract(page, '"csrf":"', '"') - iid , pos = text.extract(page, '"requestid":"', '"', pos) - - url = "https://www.deviantart.com/dapi/v1/gallery/0" - data = { - "username": self.user, - "offset": self.offset, - "limit": "24", - "catpath": "scraps", - "_csrf": csrf, - "dapiIid": iid + "-jsok7403-1.1" - } - - while True: - content = self.request( - url, method="POST", data=data).json()["content"] - - for item in content["results"]: - if item["html"].startswith('<div class="ad-container'): - continue - deviation_url = text.extract(item["html"], 'href="', '"')[0] - page = self._html_request(deviation_url).text - deviation_id = text.extract(page, '//deviation/', '"')[0] - if deviation_id: - yield self.api.deviation(deviation_id) - - if not content["has_more"]: - return - data["offset"] = content["next_offset"] - - class DeviantartPopularExtractor(DeviantartExtractor): """Extractor for popular deviations""" subcategory = "popular" @@ -649,6 +546,247 @@ class DeviantartPopularExtractor(DeviantartExtractor): deviation["popular"] = self.popular +class DeviantartExtractorV2(Extractor): + """Base class for deviantart extractors using the NAPI""" + category = "deviantart" + directory_fmt = ("{category}", "{author[username]!l}") + filename_fmt = "{category}_{index}_{title}.{extension}" + root = "https://www.deviantart.com" + + def __init__(self, match=None): + Extractor.__init__(self, match) + self.offset = 0 + self.extra = self.config("extra", False) + self.quality = self.config("quality", "100") + self.user = match.group(1) or match.group(2) + + if self.quality: + self.quality = "q_{}".format(self.quality) + + def items(self): + url = ( + self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch" + ) + params = { + "deviationid" : None, + "username" : None, + "type" : None, + "include_session": "false", + } + headers = { + "Referer": self.root, + } + + yield Message.Version, 1 + for deviation in self.deviations(): + params["deviationid"] = deviation["deviationId"] + params["username"] = deviation["author"]["username"] + params["type"] = "journal" if deviation["isJournal"] else "art" + data = self.request(url, params=params, headers=headers).json() + + if "deviation" not in data: + self.log.warning("Skipping %s", params["deviationid"]) + continue + deviation = self._extract(data) + + yield Message.Directory, deviation + yield Message.Url, deviation["target"]["src"], deviation + if self.extra: + for match in DeviantartStashExtractor.pattern.finditer( + deviation["description"]): + deviation["_extractor"] = DeviantartStashExtractor + yield Message.Queue, match.group(0), deviation + + def _extract(self, data): + deviation = data["deviation"] + extended = deviation["extended"] + files = deviation["files"] + del deviation["extended"] + del deviation["files"] + + # prepare deviation metadata + deviation["description"] = extended.get("description", "") + deviation["username"] = self.user.lower() + deviation["stats"] = extended["stats"] + deviation["stats"]["comments"] = data["comments"]["total"] + deviation["index"] = deviation["deviationId"] + deviation["tags"] = [t["name"] for t in extended.get("tags") or ()] + deviation["date"] = text.parse_datetime( + deviation["publishedTime"]) + deviation["category_path"] = "/".join( + extended[key]["displayNameEn"] + for key in ("typeFacet", "contentFacet", "categoryFacet") + if key in extended + ) + + # extract download target + target = files[-1] + name = files[0]["src"] + + if target["type"] == "gif": + pass + elif target["type"] == "video": + # select largest video + target = max( + files, key=lambda x: text.parse_int(x.get("quality", "")[:-1])) + name = target["src"] + elif target["type"] == "flash": + if target["src"].startswith("https://sandbox.deviantart.com"): + # extract SWF file from "sandbox" + target["src"] = text.extract( + self.request(target["src"]).text, + 'id="sandboxembed" src="', '"', + )[0] + elif "download" in extended: + target = extended["download"] + target["src"] = target["url"] + del target["url"] + + # url rewrites + if target["src"].startswith("https://images-wixmp-"): + if deviation["index"] <= 790677560: + # https://github.com/r888888888/danbooru/issues/4069 + target["src"] = re.sub( + r"(/f/[^/]+/[^/]+)/v\d+/.*", + r"/intermediary\1", target["src"]) + if self.quality: + target["src"] = re.sub( + r"q_\d+", self.quality, target["src"]) + + text.nameext_from_url(name, target) + if target["filename"].endswith("-150"): + target["filename"] = target["filename"][:-4] + if not target["filename"].count("-"): + name, _, hid = target["filename"].rpartition("_") + target["filename"] = name + "-" + hid + deviation["target"] = target + deviation["filename"] = target["filename"] + deviation["extension"] = target["extension"] = ( + text.ext_from_url(target["src"])) + return deviation + + +class DeviantartDeviationExtractor(DeviantartExtractorV2): + """Extractor for single deviations""" + subcategory = "deviation" + archive_fmt = "{index}.{extension}" + pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)" + test = ( + (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { + "options": (("original", 0),), + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + }), + ("https://www.deviantart.com/zzz/art/zzz-1234567890", { + "count": 0, + }), + (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { + "pattern": (r"https://www.deviantart.com/download/261986576" + r"/[\w-]+\.jpg\?token=\w+&ts=\d+"), + }), + # wixmp URL rewrite + (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { + "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" + r"/intermediary/f/[^/]+/[^.]+\.jpg$") + }), + # wixmp URL rewrite v2 (#369) + (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), { + "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100," + }), + # non-download URL for GIFs (#242) + (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), { + "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" + r"/f/[^/]+/[^.]+\.gif\?token="), + }), + # external URLs from description (#302) + (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), { + "options": (("extra", 1), ("original", 0)), + "pattern": r"https?://sta\.sh/\w+$", + "range": "2-", + "count": 4, + }), + # video + ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", { + "url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b", + "keyword": { + "target": { + "duration": 306, + "extension": "mp4", + "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5", + "filesize": 9963639, + "quality": "1080p", + "src": str, + "type": "video", + }, + } + }), + # archive + ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", { + "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar", + }), + # swf + ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", { + "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf", + }), + # old-style URLs + ("https://shimoda7.deviantart.com" + "/art/For-the-sake-of-a-memory-10073852"), + ("https://myria-moon.deviantart.com" + "/art/Aime-Moi-part-en-vadrouille-261986576"), + ("https://zzz.deviantart.com/art/zzz-1234567890"), + ) + + skip = Extractor.skip + + def __init__(self, match): + DeviantartExtractorV2.__init__(self, match) + self.type = match.group(3) + self.deviation_id = match.group(4) + + def deviations(self): + return ({ + "deviationId": self.deviation_id, + "author" : {"username": self.user}, + "isJournal" : self.type == "journal", + },) + + +class DeviantartScrapsExtractor(DeviantartExtractorV2): + """Extractor for an artist's scraps""" + subcategory = "scraps" + directory_fmt = ("{category}", "{username}", "Scraps") + archive_fmt = "s_{username}_{index}.{extension}" + pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b" + test = ( + ("https://www.deviantart.com/shimoda7/gallery/scraps", { + "count": 12, + }), + ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"), + ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), + ) + + def deviations(self): + url = self.root + "/_napi/da-user-profile/api/gallery/contents" + params = { + "username" : self.user, + "offset" : self.offset, + "limit" : "24", + "scraps_folder": "true", + } + headers = { + "Referer": "{}/{}/gallery/scraps".format(self.root, self.user), + } + + while True: + data = self.request(url, params=params, headers=headers).json() + + for obj in data["results"]: + yield obj["deviation"] + + if not data["hasMore"]: + return + params["offset"] = data["nextOffset"] + + class DeviantartAPI(): """Minimal interface for the DeviantArt API @@ -805,7 +943,7 @@ class DeviantartAPI(): self.authenticate(None if public else self.refresh_token) response = self.extractor.request( - url, headers=self.headers, params=params, fatal=False) + url, headers=self.headers, params=params, fatal=None) data = response.json() status = response.status_code |
