summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/deviantart.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
-rw-r--r--gallery_dl/extractor/deviantart.py306
1 files changed, 154 insertions, 152 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index eeee74a..604966f 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -29,7 +29,7 @@ BASE_PATTERN = (
class DeviantartExtractor(Extractor):
"""Base class for deviantart extractors using the OAuth API"""
category = "deviantart"
- directory_fmt = ("{category}", "{author[username]!l}")
+ directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
root = "https://www.deviantart.com"
@@ -47,6 +47,12 @@ class DeviantartExtractor(Extractor):
if self.quality:
self.quality = "q_{}".format(self.quality)
+ if self.original != "image":
+ self._update_content = self._update_content_default
+ else:
+ self._update_content = self._update_content_image
+ self.original = True
+
self.commit_journal = {
"html": self._commit_journal_html,
"text": self._commit_journal_text,
@@ -62,6 +68,7 @@ class DeviantartExtractor(Extractor):
self.group = not profile
if self.group:
self.subcategory = "group-" + self.subcategory
+ self.user = self.user.lower()
else:
self.user = profile["user"]["username"]
@@ -95,8 +102,7 @@ class DeviantartExtractor(Extractor):
yield self.commit(deviation, content)
elif deviation["is_downloadable"]:
- content = {}
- self._update_content(deviation, content)
+ content = self.api.deviation_download(deviation["deviationid"])
yield self.commit(deviation, content)
if "videos" in deviation:
@@ -127,8 +133,14 @@ class DeviantartExtractor(Extractor):
deviation["url"].rpartition("-")[2])
except KeyError:
deviation["index"] = 0
+
if self.user:
deviation["username"] = self.user
+ deviation["_username"] = self.user.lower()
+ else:
+ deviation["username"] = deviation["author"]["username"]
+ deviation["_username"] = deviation["username"].lower()
+
deviation["da_category"] = deviation["category"]
deviation["published_time"] = text.parse_int(
deviation["published_time"])
@@ -238,81 +250,51 @@ class DeviantartExtractor(Extractor):
url = "{}/{}/{}/0/".format(self.root, self.user, category)
return [(url + folder["name"], folder) for folder in folders]
- def _update_content(self, deviation, content):
- try:
- data = self.api.deviation_extended_fetch(
- deviation["index"],
- deviation["author"]["username"],
- "journal" if "excerpt" in deviation else "art",
- )
- download = data["deviation"]["extended"]["download"]
- download["src"] = download["url"]
- except Exception as e:
- self.log.warning(
- "Unable to fetch original download URL for ID %s ('%s: %s')",
- deviation["index"], e.__class__.__name__, e,
- )
- self.log.debug("Server response: %s", data)
- else:
- if self.original == "image":
- url = data["src"].partition("?")[0]
- mtype = mimetypes.guess_type(url, False)[0]
- if not mtype or not mtype.startswith("image/"):
- return
- del download["url"]
- content.update(download)
+ def _update_content_default(self, deviation, content):
+ content.update(self.api.deviation_download(deviation["deviationid"]))
+ def _update_content_image(self, deviation, content):
+ data = self.api.deviation_download(deviation["deviationid"])
+ url = data["src"].partition("?")[0]
+ mtype = mimetypes.guess_type(url, False)[0]
+ if mtype and mtype.startswith("image/"):
+ content.update(data)
-class DeviantartUserExtractor(Extractor):
+
+class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
- category = "deviantart"
subcategory = "user"
pattern = BASE_PATTERN + r"/?$"
test = (
("https://www.deviantart.com/shimoda7", {
- "options": (("include", "gsjf"),),
- "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)",
+ "pattern": r"/shimoda7/gallery$",
+ }),
+ ("https://www.deviantart.com/shimoda7", {
+ "options": (("include", "all"),),
+ "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$",
"count": 4,
}),
("https://shimoda7.deviantart.com/"),
)
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user = match.group(1) or match.group(2)
-
- incl = self.config("include") or "g"
- if isinstance(incl, list):
- incl = "".join(item[0] for item in incl if item)
- self.include = incl.lower()
-
def items(self):
- base = "https://www.deviantart.com/{}/".format(self.user)
- incl = self.include
- data = {}
-
- if "g" in incl:
- data["_extractor"] = DeviantartGalleryExtractor
- yield Message.Queue, base + "gallery", data
- if "s" in incl:
- data["_extractor"] = DeviantartScrapsExtractor
- yield Message.Queue, base + "gallery/scraps", data
- if "j" in incl:
- data["_extractor"] = DeviantartJournalExtractor
- yield Message.Queue, base + "posts", data
- if "f" in incl:
- data["_extractor"] = DeviantartFavoriteExtractor
- yield Message.Queue, base + "favourites", data
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (DeviantartGalleryExtractor , base + "gallery"),
+ (DeviantartScrapsExtractor , base + "gallery/scraps"),
+ (DeviantartJournalExtractor , base + "posts"),
+ (DeviantartFavoriteExtractor, base + "favourites"),
+ ), ("gallery",))
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
- archive_fmt = "g_{username}_{index}.{extension}"
+ archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
- "pattern": r"https://(www.deviantart.com/download/\d+/"
+ "pattern": r"https://(api-da\.wixmp\.com/_api/download/file"
r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)",
"count": ">= 30",
"keyword": {
@@ -398,7 +380,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
class DeviantartFolderExtractor(DeviantartExtractor):
"""Extractor for deviations inside an artist's gallery folder"""
subcategory = "folder"
- directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}")
+ directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"
test = (
@@ -418,14 +400,19 @@ class DeviantartFolderExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- self.fname = match.group(4)
- self.folder = {"owner": self.user, "index": match.group(3)}
+ self.folder = None
+ self.folder_id = match.group(3)
+ self.folder_name = match.group(4)
def deviations(self):
folders = self.api.gallery_folders(self.user)
- folder = self._find_folder(folders, self.fname)
- self.folder["title"] = folder["name"]
- self.folder["uuid"] = folder["folderid"]
+ folder = self._find_folder(folders, self.folder_name)
+ self.folder = {
+ "title": folder["name"],
+ "uuid" : folder["folderid"],
+ "index": self.folder_id,
+ "owner": self.user,
+ }
return self.api.gallery(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
@@ -440,7 +427,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = (
("https://sta.sh/022c83odnaxc", {
- "pattern": r"https://sta.sh/download/7549925030122512/.+\?token=",
+ "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
+ "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
"count": 1,
}),
# multiple stash items
@@ -450,7 +438,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
}),
# downloadable, but no "content" field (#307)
("https://sta.sh/024t4coz16mi", {
- "pattern": r"https://sta.sh/download/7800709982190282/.+\?token=",
+ "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
"count": 1,
}),
("https://sta.sh/abcdefghijkl", {
@@ -468,41 +456,25 @@ class DeviantartStashExtractor(DeviantartExtractor):
def deviations(self):
url = "https://sta.sh/" + self.stash_id
page = self.request(url).text
- deviation_id, pos = text.extract(page, '//deviation/', '"')
+ deviation_id = text.extract(page, '//deviation/', '"')[0]
if deviation_id:
- deviation = self.api.deviation(deviation_id)
- pos = page.find("dev-page-download", pos)
- if pos >= 0:
- deviation["_download"] = {
- "width" : text.parse_int(text.extract(
- page, 'data-download_width="' , '"', pos)[0]),
- "height": text.parse_int(text.extract(
- page, 'data-download_height="', '"', pos)[0]),
- "src" : text.unescape(text.extract(
- page, 'data-download_url="' , '"', pos)[0]),
- }
- return (deviation,)
+ return (self.api.deviation(deviation_id),)
+
else:
data = {"_extractor": DeviantartStashExtractor}
- page = text.extract(
- page, 'id="stash-body"', 'class="footer"', pos)[0]
+ page = text.extract(page, 'id="stash-body"', 'class="footer"')[0]
return [
(url, data)
for url in text.extract_iter(page, '<a href="', '"')
]
- def _update_content(self, deviation, content):
- if "_download" in deviation:
- content.update(deviation["_download"])
- del deviation["_download"]
-
class DeviantartFavoriteExtractor(DeviantartExtractor):
"""Extractor for an artist's favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "{username}", "Favourites")
- archive_fmt = "f_{username}_{index}.{extension}"
+ archive_fmt = "f_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/h3813067/favourites/", {
@@ -530,8 +502,8 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
class DeviantartCollectionExtractor(DeviantartExtractor):
"""Extractor for a single favorite collection"""
subcategory = "collection"
- directory_fmt = ("{category}", "{collection[owner]}",
- "Favourites", "{collection[title]}")
+ directory_fmt = ("{category}", "{username}", "Favourites",
+ "{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"
test = (
@@ -546,14 +518,19 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- _, _, cid, self.cname = match.groups()
- self.collection = {"owner": self.user, "index": cid}
+ self.collection = None
+ self.collection_id = match.group(3)
+ self.collection_name = match.group(4)
def deviations(self):
folders = self.api.collections_folders(self.user)
- folder = self._find_folder(folders, self.cname)
- self.collection["title"] = folder["name"]
- self.collection["uuid"] = folder["folderid"]
+ folder = self._find_folder(folders, self.collection_name)
+ self.collection = {
+ "title": folder["name"],
+ "uuid" : folder["folderid"],
+ "index": self.collection_id,
+ "owner": self.user,
+ }
return self.api.collections(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
@@ -565,7 +542,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
"""Extractor for an artist's journals"""
subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal")
- archive_fmt = "j_{username}_{index}.{extension}"
+ archive_fmt = "j_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
test = (
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
@@ -635,8 +612,18 @@ class DeviantartPopularExtractor(DeviantartExtractor):
class DeviantartExtractorV2(DeviantartExtractor):
"""Base class for deviantart extractors using the NAPI"""
+ cookiedomain = ".deviantart.com"
+ cookienames = ("auth", "auth_secure", "userinfo")
+ _warning = True
def items(self):
+ if self.original and not self._check_cookies(self.cookienames):
+ self.original = False
+ if self._warning:
+ DeviantartExtractorV2._warning = False
+ self.log.warning("No session cookies set: "
+ "Disabling original file downloads.")
+
yield Message.Version, 1
for deviation in self.deviations():
data = self.api.deviation_extended_fetch(
@@ -646,10 +633,14 @@ class DeviantartExtractorV2(DeviantartExtractor):
)
if "deviation" not in data:
- self.log.warning("Skipping ID %s", deviation["deviationId"])
+ self.log.warning("Unable to fetch deviation ID %s",
+ deviation["deviationId"])
self.log.debug("Server response: %s", data)
continue
+
deviation = self._extract(data)
+ if not deviation:
+ continue
yield Message.Directory, deviation
yield Message.Url, deviation["target"]["src"], deviation
@@ -662,13 +653,14 @@ class DeviantartExtractorV2(DeviantartExtractor):
def _extract(self, data):
deviation = data["deviation"]
extended = deviation["extended"]
- files = deviation["files"]
+ media = deviation["media"]
del deviation["extended"]
- del deviation["files"]
+ del deviation["media"]
# prepare deviation metadata
deviation["description"] = extended.get("description", "")
- deviation["username"] = self.user.lower()
+ deviation["username"] = deviation["author"]["username"]
+ deviation["_username"] = deviation["username"].lower()
deviation["stats"] = extended["stats"]
deviation["stats"]["comments"] = data["comments"]["total"]
deviation["index"] = deviation["deviationId"]
@@ -682,53 +674,69 @@ class DeviantartExtractorV2(DeviantartExtractor):
)
# extract download target
- target = files[-1]
+ target = media["types"][-1]
+ src = token = None
- if "textContent" in deviation and self.commit_journal:
+ if "textContent" in deviation:
+ if not self.commit_journal:
+ return None
journal = deviation["textContent"]
journal["html"] = journal["html"]["markup"]
- target["src"] = self.commit_journal(deviation, journal)[1]
- elif target["type"] == "gif":
- pass
- elif target["type"] == "video":
- # select largest video
- target = max(
- files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
- elif target["type"] == "flash":
- if target["src"].startswith("https://sandbox.deviantart.com"):
- # extract SWF file from "sandbox"
- target["src"] = text.extract(
- self.request(target["src"]).text,
- 'id="sandboxembed" src="', '"',
- )[0]
- elif "download" in extended:
+ src = self.commit_journal(deviation, journal)[1]
+
+ elif target["t"] == "gif":
+ src = target["b"]
+ token = media["token"][0]
+
+ elif "download" in extended and self.original:
target = extended["download"]
- target["src"] = target["url"]
+ src = target["url"]
del target["url"]
- elif target["src"].startswith("https://images-wixmp-"):
- if deviation["index"] <= 790677560:
- # https://github.com/r888888888/danbooru/issues/4069
- target["src"] = re.sub(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", target["src"])
- if self.quality:
- target["src"] = re.sub(
- r"q_\d+", self.quality, target["src"])
+
+ elif target["t"] == "video":
+ # select largest video
+ target = max(media["types"],
+ key=lambda x: text.parse_int(x.get("q", "")[:-1]))
+ src = target["s"]
+
+ elif target["t"] == "flash":
+ src = target["s"]
+ if src.startswith("https://sandbox.deviantart.com"):
+ # extract SWF file from "sandbox"
+ src = text.extract(
+ self.request(src).text, 'id="sandboxembed" src="', '"')[0]
+
+ else:
+ src = media["baseUri"]
+ if "token" in media:
+ token = media["token"][0]
+
+ if "c" in target:
+ src += "/" + target["c"].replace(
+ "<prettyName>", media["prettyName"])
+ if src.startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ src = re.sub(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src)
+ if self.quality:
+ src = re.sub(r"q_\d+", self.quality, src)
# filename and extension metadata
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
sub = re.compile(r"\W").sub
- deviation["filename"] = target["filename"] = "".join((
+ deviation["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
sub("_", deviation["author"]["username"].lower()), "-d",
util.bencode(deviation["index"], alphabet),
))
if "extension" not in deviation:
- deviation["extension"] = target["extension"] = (
- text.ext_from_url(target["src"])
- )
- deviation["target"] = target
+ deviation["extension"] = text.ext_from_url(src)
+ if token:
+ src = src + "?token=" + token
+ target["src"] = src
+ deviation["target"] = target
return deviation
@@ -740,19 +748,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+ # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
"count": 0,
}),
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
- "pattern": (r"https://www.deviantart.com/download/261986576"
- r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+ # "pattern": (r"https://www.deviantart.com/download/261986576"
+ # r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+ r"/intermediary/f/[^/]+/[^.]+\.jpg")
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg$")
+ r"/intermediary/f/[^/]+/[^.]+\.jpg")
}),
# wixmp URL rewrite v2 (#369)
(("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
@@ -774,20 +784,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
"url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
"keyword": {
+ "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
+ "extension": "mp4",
"target": {
- "duration": 306,
- "extension": "mp4",
- "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
- "filesize": 9963639,
- "quality": "1080p",
+ "d": 306,
+ "f": 9963639,
+ "q": "1080p",
+ "t": "video",
"src": str,
- "type": "video",
},
}
}),
# archive
("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
- "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
+ # "pattern": r"https://.+deviantart.com/download/763300948/.*rar",
+ "pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png"
}),
# swf
("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
@@ -830,7 +841,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
"""Extractor for an artist's scraps"""
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
- archive_fmt = "s_{username}_{index}.{extension}"
+ archive_fmt = "s_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
test = (
("https://www.deviantart.com/shimoda7/gallery/scraps", {
@@ -841,14 +852,6 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
)
def deviations(self):
- # copy self.session
- session = self.session.__class__()
- for attr in session.__attrs__:
- setattr(session, attr, getattr(self.session, attr, None))
-
- # reset cookies in the original session object
- self.session.cookies = session.cookies.__class__()
-
url = self.root + "/_napi/da-user-profile/api/gallery/contents"
params = {
"username" : self.user,
@@ -861,8 +864,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
}
while True:
- data = self.request(
- url, session=session, params=params, headers=headers).json()
+ data = self.request(url, params=params, headers=headers).json()
for obj in data["results"]:
yield obj["deviation"]