summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/deviantart.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-08-26 19:34:45 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-08-26 19:34:45 -0400
commitb75d158d014d6c43d7d785c46c9372a9cf84d144 (patch)
tree7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d /gallery_dl/extractor/deviantart.py
parent64ad8e7bd15df71ab1116eede414558631bcad32 (diff)
New upstream version 1.10.2upstream/1.10.2
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
-rw-r--r--gallery_dl/extractor/deviantart.py394
1 files changed, 266 insertions, 128 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 63e2913..bd1299b 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -27,7 +27,7 @@ BASE_PATTERN = (
class DeviantartExtractor(Extractor):
- """Base class for deviantart extractors"""
+ """Base class for deviantart extractors using the OAuth API"""
category = "deviantart"
directory_fmt = ("{category}", "{author[username]!l}")
filename_fmt = "{category}_{index}_{title}.{extension}"
@@ -38,11 +38,15 @@ class DeviantartExtractor(Extractor):
self.offset = 0
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
+ self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.user = match.group(1) or match.group(2)
self.group = False
self.api = DeviantartAPI(self)
+ if self.quality:
+ self.quality = "q_{}".format(self.quality)
+
if self.original != "image":
self._update_content = self._update_content_default
else:
@@ -81,12 +85,15 @@ class DeviantartExtractor(Extractor):
text.ext_from_url(content["src"]) != "gif":
self._update_content(deviation, content)
- if deviation["index"] <= 790677560 and \
- content["src"].startswith("https://images-wixmp-"):
- # https://github.com/r888888888/danbooru/issues/4069
- content["src"] = re.sub(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", content["src"])
+ if content["src"].startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ content["src"] = re.sub(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*",
+ r"/intermediary\1", content["src"])
+ if self.quality:
+ content["src"] = re.sub(
+ r"q_\d+", self.quality, content["src"])
yield self.commit(deviation, content)
@@ -133,8 +140,16 @@ class DeviantartExtractor(Extractor):
@staticmethod
def commit(deviation, target):
url = target["src"]
- deviation["target"] = text.nameext_from_url(url, target.copy())
- deviation["extension"] = deviation["target"]["extension"]
+ thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url
+ target = text.nameext_from_url(thumb, target.copy())
+ if target["filename"].endswith("-150"):
+ target["filename"] = target["filename"][:-4]
+ if not target["filename"].count("-"):
+ name, _, hid = target["filename"].rpartition("_")
+ target["filename"] = name + "-" + hid
+ deviation["target"] = target
+ deviation["filename"] = target["filename"]
+ deviation["extension"] = target["extension"] = text.ext_from_url(url)
return Message.Url, url, deviation
def _commit_journal_html(self, deviation, journal):
@@ -225,14 +240,6 @@ class DeviantartExtractor(Extractor):
if mtype and mtype.startswith("image/"):
content.update(data)
- def _html_request(self, url, **kwargs):
- cookies = {"userinfo": (
- '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0'
- 'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU'
- ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}'
- )}
- return self.request(url, cookies=cookies, **kwargs)
-
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
@@ -360,68 +367,6 @@ class DeviantartFolderExtractor(DeviantartExtractor):
deviation["folder"] = self.folder
-class DeviantartDeviationExtractor(DeviantartExtractor):
- """Extractor for single deviations"""
- subcategory = "deviation"
- archive_fmt = "{index}.{extension}"
- pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)"
- test = (
- (("https://www.deviantart.com/shimoda7/art/"
- "For-the-sake-of-a-memory-10073852"), {
- "options": (("original", 0),),
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
- }),
- ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
- "exception": exception.NotFoundError,
- }),
- (("https://www.deviantart.com/myria-moon/art/"
- "Aime-Moi-part-en-vadrouille-261986576"), {
- "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
- r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
- }),
- # wixmp URL rewrite
- (("https://www.deviantart.com/citizenfresh/art/"
- "Hverarond-14-the-beauty-of-the-earth-789295466"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg$")
- }),
- # non-download URL for GIFs (#242)
- (("https://www.deviantart.com/skatergators/art/"
- "COM-Monique-Model-781571783"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/f/[^/]+/[^.]+\.gif\?token="),
- }),
- # external URLs from description (#302)
- (("https://www.deviantart.com/uotapo/art/"
- "INANAKI-Memorial-Humane7-590297498"), {
- "options": (("extra", 1), ("original", 0)),
- "pattern": r"https?://sta\.sh/\w+$",
- "range": "2-",
- "count": 4,
- }),
- # old-style URLs
- ("https://shimoda7.deviantart.com"
- "/art/For-the-sake-of-a-memory-10073852"),
- ("https://myria-moon.deviantart.com"
- "/art/Aime-Moi-part-en-vadrouille-261986576"),
- ("https://zzz.deviantart.com/art/zzz-1234567890"),
- )
-
- skip = Extractor.skip
-
- def __init__(self, match):
- DeviantartExtractor.__init__(self, match)
- self.path = match.group(3)
-
- def deviations(self):
- url = "{}/{}/{}".format(self.root, self.user, self.path)
- response = self._html_request(url, fatal=False)
- deviation_id = text.extract(response.text, '//deviation/', '"')[0]
- if response.status_code >= 400 or not deviation_id:
- raise exception.NotFoundError("image")
- return (self.api.deviation(deviation_id),)
-
-
class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
@@ -558,54 +503,6 @@ class DeviantartJournalExtractor(DeviantartExtractor):
return self.api.browse_user_journals(self.user, self.offset)
-class DeviantartScrapsExtractor(DeviantartExtractor):
- """Extractor for an artist's scraps"""
- subcategory = "scraps"
- directory_fmt = ("{category}", "{username}", "Scraps")
- archive_fmt = "s_{username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b"
- test = (
- ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", {
- "count": 12,
- "options": (("original", False),),
- }),
- ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
- )
-
- def deviations(self):
- url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user)
- page = self._html_request(url).text
- csrf, pos = text.extract(page, '"csrf":"', '"')
- iid , pos = text.extract(page, '"requestid":"', '"', pos)
-
- url = "https://www.deviantart.com/dapi/v1/gallery/0"
- data = {
- "username": self.user,
- "offset": self.offset,
- "limit": "24",
- "catpath": "scraps",
- "_csrf": csrf,
- "dapiIid": iid + "-jsok7403-1.1"
- }
-
- while True:
- content = self.request(
- url, method="POST", data=data).json()["content"]
-
- for item in content["results"]:
- if item["html"].startswith('<div class="ad-container'):
- continue
- deviation_url = text.extract(item["html"], 'href="', '"')[0]
- page = self._html_request(deviation_url).text
- deviation_id = text.extract(page, '//deviation/', '"')[0]
- if deviation_id:
- yield self.api.deviation(deviation_id)
-
- if not content["has_more"]:
- return
- data["offset"] = content["next_offset"]
-
-
class DeviantartPopularExtractor(DeviantartExtractor):
"""Extractor for popular deviations"""
subcategory = "popular"
@@ -649,6 +546,247 @@ class DeviantartPopularExtractor(DeviantartExtractor):
deviation["popular"] = self.popular
+class DeviantartExtractorV2(Extractor):
+ """Base class for deviantart extractors using the NAPI"""
+ category = "deviantart"
+ directory_fmt = ("{category}", "{author[username]!l}")
+ filename_fmt = "{category}_{index}_{title}.{extension}"
+ root = "https://www.deviantart.com"
+
+ def __init__(self, match=None):
+ Extractor.__init__(self, match)
+ self.offset = 0
+ self.extra = self.config("extra", False)
+ self.quality = self.config("quality", "100")
+ self.user = match.group(1) or match.group(2)
+
+ if self.quality:
+ self.quality = "q_{}".format(self.quality)
+
+ def items(self):
+ url = (
+ self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch"
+ )
+ params = {
+ "deviationid" : None,
+ "username" : None,
+ "type" : None,
+ "include_session": "false",
+ }
+ headers = {
+ "Referer": self.root,
+ }
+
+ yield Message.Version, 1
+ for deviation in self.deviations():
+ params["deviationid"] = deviation["deviationId"]
+ params["username"] = deviation["author"]["username"]
+ params["type"] = "journal" if deviation["isJournal"] else "art"
+ data = self.request(url, params=params, headers=headers).json()
+
+ if "deviation" not in data:
+ self.log.warning("Skipping %s", params["deviationid"])
+ continue
+ deviation = self._extract(data)
+
+ yield Message.Directory, deviation
+ yield Message.Url, deviation["target"]["src"], deviation
+ if self.extra:
+ for match in DeviantartStashExtractor.pattern.finditer(
+ deviation["description"]):
+ deviation["_extractor"] = DeviantartStashExtractor
+ yield Message.Queue, match.group(0), deviation
+
+ def _extract(self, data):
+ deviation = data["deviation"]
+ extended = deviation["extended"]
+ files = deviation["files"]
+ del deviation["extended"]
+ del deviation["files"]
+
+ # prepare deviation metadata
+ deviation["description"] = extended.get("description", "")
+ deviation["username"] = self.user.lower()
+ deviation["stats"] = extended["stats"]
+ deviation["stats"]["comments"] = data["comments"]["total"]
+ deviation["index"] = deviation["deviationId"]
+ deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
+ deviation["date"] = text.parse_datetime(
+ deviation["publishedTime"])
+ deviation["category_path"] = "/".join(
+ extended[key]["displayNameEn"]
+ for key in ("typeFacet", "contentFacet", "categoryFacet")
+ if key in extended
+ )
+
+ # extract download target
+ target = files[-1]
+ name = files[0]["src"]
+
+ if target["type"] == "gif":
+ pass
+ elif target["type"] == "video":
+ # select largest video
+ target = max(
+ files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
+ name = target["src"]
+ elif target["type"] == "flash":
+ if target["src"].startswith("https://sandbox.deviantart.com"):
+ # extract SWF file from "sandbox"
+ target["src"] = text.extract(
+ self.request(target["src"]).text,
+ 'id="sandboxembed" src="', '"',
+ )[0]
+ elif "download" in extended:
+ target = extended["download"]
+ target["src"] = target["url"]
+ del target["url"]
+
+ # url rewrites
+ if target["src"].startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ target["src"] = re.sub(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*",
+ r"/intermediary\1", target["src"])
+ if self.quality:
+ target["src"] = re.sub(
+ r"q_\d+", self.quality, target["src"])
+
+ text.nameext_from_url(name, target)
+ if target["filename"].endswith("-150"):
+ target["filename"] = target["filename"][:-4]
+ if not target["filename"].count("-"):
+ name, _, hid = target["filename"].rpartition("_")
+ target["filename"] = name + "-" + hid
+ deviation["target"] = target
+ deviation["filename"] = target["filename"]
+ deviation["extension"] = target["extension"] = (
+ text.ext_from_url(target["src"]))
+ return deviation
+
+
+class DeviantartDeviationExtractor(DeviantartExtractorV2):
+ """Extractor for single deviations"""
+ subcategory = "deviation"
+ archive_fmt = "{index}.{extension}"
+ pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
+ test = (
+ (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
+ "options": (("original", 0),),
+ "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+ }),
+ ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
+ "count": 0,
+ }),
+ (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
+ "pattern": (r"https://www.deviantart.com/download/261986576"
+ r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+ }),
+ # wixmp URL rewrite
+ (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+ r"/intermediary/f/[^/]+/[^.]+\.jpg$")
+ }),
+ # wixmp URL rewrite v2 (#369)
+ (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
+ "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+ }),
+ # non-download URL for GIFs (#242)
+ (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+ r"/f/[^/]+/[^.]+\.gif\?token="),
+ }),
+ # external URLs from description (#302)
+ (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
+ "options": (("extra", 1), ("original", 0)),
+ "pattern": r"https?://sta\.sh/\w+$",
+ "range": "2-",
+ "count": 4,
+ }),
+ # video
+ ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
+ "url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
+ "keyword": {
+ "target": {
+ "duration": 306,
+ "extension": "mp4",
+ "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
+ "filesize": 9963639,
+ "quality": "1080p",
+ "src": str,
+ "type": "video",
+ },
+ }
+ }),
+ # archive
+ ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
+ "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
+ }),
+ # swf
+ ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
+ "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
+ }),
+ # old-style URLs
+ ("https://shimoda7.deviantart.com"
+ "/art/For-the-sake-of-a-memory-10073852"),
+ ("https://myria-moon.deviantart.com"
+ "/art/Aime-Moi-part-en-vadrouille-261986576"),
+ ("https://zzz.deviantart.com/art/zzz-1234567890"),
+ )
+
+ skip = Extractor.skip
+
+ def __init__(self, match):
+ DeviantartExtractorV2.__init__(self, match)
+ self.type = match.group(3)
+ self.deviation_id = match.group(4)
+
+ def deviations(self):
+ return ({
+ "deviationId": self.deviation_id,
+ "author" : {"username": self.user},
+ "isJournal" : self.type == "journal",
+ },)
+
+
+class DeviantartScrapsExtractor(DeviantartExtractorV2):
+ """Extractor for an artist's scraps"""
+ subcategory = "scraps"
+ directory_fmt = ("{category}", "{username}", "Scraps")
+ archive_fmt = "s_{username}_{index}.{extension}"
+ pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
+ test = (
+ ("https://www.deviantart.com/shimoda7/gallery/scraps", {
+ "count": 12,
+ }),
+ ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
+ ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
+ )
+
+ def deviations(self):
+ url = self.root + "/_napi/da-user-profile/api/gallery/contents"
+ params = {
+ "username" : self.user,
+ "offset" : self.offset,
+ "limit" : "24",
+ "scraps_folder": "true",
+ }
+ headers = {
+ "Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
+ }
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+
+ for obj in data["results"]:
+ yield obj["deviation"]
+
+ if not data["hasMore"]:
+ return
+ params["offset"] = data["nextOffset"]
+
+
class DeviantartAPI():
"""Minimal interface for the DeviantArt API
@@ -805,7 +943,7 @@ class DeviantartAPI():
self.authenticate(None if public else self.refresh_token)
response = self.extractor.request(
- url, headers=self.headers, params=params, fatal=False)
+ url, headers=self.headers, params=params, fatal=None)
data = response.json()
status = response.status_code