summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/artstation.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
commita6e995c093de8aae2e91a0787281bb34c0b871eb (patch)
tree2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/artstation.py
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/artstation.py')
-rw-r--r--gallery_dl/extractor/artstation.py167
1 files changed, 86 insertions, 81 deletions
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index f448710..fdb92c4 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2023 Mike Fährmann
+# Copyright 2018-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,22 +25,22 @@ class ArtstationExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.user = match.group(1) or match.group(2)
+ self.user = match[1] or match[2]
def _init(self):
self.session.headers["Cache-Control"] = "max-age=0"
+ self.mviews = self.config("mviews", True)
+ self.videos = self.config("videos", True)
+ self.external = self.config("external", False)
+ self.previews = self.config("previews", False)
+ self.max_posts = self.config("max-posts")
def items(self):
- videos = self.config("videos", True)
- previews = self.config("previews", False)
- external = self.config("external", False)
- max_posts = self.config("max-posts")
-
data = self.metadata()
projects = self.projects()
- if max_posts:
- projects = itertools.islice(projects, max_posts)
+ if self.max_posts:
+ projects = itertools.islice(projects, self.max_posts)
for project in projects:
for num, asset in enumerate(
self.get_project_assets(project["hash_id"]), 1):
@@ -50,28 +50,10 @@ class ArtstationExtractor(Extractor):
yield Message.Directory, asset
if adict["has_embedded_player"]:
- player = adict["player_embedded"]
- url = (text.extr(player, 'src="', '"') or
- text.extr(player, "src='", "'"))
- if url.startswith(self.root):
- # video clip hosted on artstation
- if videos:
- page = self.request(url).text
- url = text.extr(page, ' src="', '"')
- text.nameext_from_url(url, asset)
- yield Message.Url, url, asset
- elif url:
- # external URL
- if external:
- asset["extension"] = "mp4"
- yield Message.Url, "ytdl:" + url, asset
- else:
- self.log.debug(player)
- self.log.warning(
- "Failed to extract embedded player URL (%s)",
- adict.get("id"))
-
- if not previews:
+ if url := self._extract_embed(asset):
+ text.nameext_from_url(url, asset)
+ yield Message.Url, url, asset
+ if not self.previews:
continue
if adict["has_image"]:
@@ -79,19 +61,50 @@ class ArtstationExtractor(Extractor):
text.nameext_from_url(url, asset)
url = self._no_cache(url)
- if "/video_clips/" not in url:
+ if "/images/images/" in url:
lhs, _, rhs = url.partition("/large/")
if rhs:
- url = lhs + "/4k/" + rhs
+ url = f"{lhs}/4k/{rhs}"
asset["_fallback"] = self._image_fallback(lhs, rhs)
yield Message.Url, url, asset
- @staticmethod
- def _image_fallback(lhs, rhs):
- yield lhs + "/large/" + rhs
- yield lhs + "/medium/" + rhs
- yield lhs + "/small/" + rhs
+ def _extract_embed(self, asset):
+ adict = asset["asset"]
+ player = adict["player_embedded"]
+ url = (text.extr(player, 'src="', '"') or
+ text.extr(player, "src='", "'"))
+
+ if url.startswith(self.root):
+ # embed or video clip hosted on artstation
+ type = text.extr(adict.get("image_url", ""), "/assets/", "/")
+ if type == "marmosets":
+ if not self.mviews:
+ return
+ page = self.request(url).text
+ return text.extr(page, "marmoset.embed(", '",').strip("\"' ")
+
+ elif type:
+ if not self.videos:
+ return
+ page = self.request(url).text
+ return text.extr(page, ' src="', '"')
+
+ if url:
+ # external URL
+ if not self.external:
+ return
+ asset["extension"] = "mp4"
+ return f"ytdl:{url}"
+
+ self.log.debug(player)
+ self.log.warning("Failed to extract embedded player URL (%s)",
+ adict.get("id"))
+
+ def _image_fallback(self, lhs, rhs):
+ yield f"{lhs}/large/{rhs}"
+ yield f"{lhs}/medium/{rhs}"
+ yield f"{lhs}/small/{rhs}"
def metadata(self):
"""Return general metadata"""
@@ -102,10 +115,10 @@ class ArtstationExtractor(Extractor):
def get_project_assets(self, project_id):
"""Return all assets associated with 'project_id'"""
- url = "{}/projects/{}.json".format(self.root, project_id)
+ url = f"{self.root}/projects/{project_id}.json"
try:
- data = self.request(url).json()
+ data = self.request_json(url)
except exception.HttpError as exc:
self.log.warning(exc)
return
@@ -130,7 +143,7 @@ class ArtstationExtractor(Extractor):
def get_user_info(self, username):
"""Return metadata for a specific user"""
- url = "{}/users/{}/quick.json".format(self.root, username.lower())
+ url = f"{self.root}/users/{username.lower()}/quick.json"
response = self.request(url, notfound="user")
return response.json()
@@ -153,7 +166,7 @@ class ArtstationExtractor(Extractor):
params["page"] = 1
while True:
- data = self.request(url, **kwargs).json()
+ data = self.request_json(url, **kwargs)
yield from data["data"]
total += len(data["data"])
@@ -168,12 +181,10 @@ class ArtstationExtractor(Extractor):
"Accept" : "*/*",
"Origin" : self.root,
}
- return self.request(
- url, method="POST", headers=headers, json={},
- ).json()["public_csrf_token"]
+ return self.request_json(
+ url, method="POST", headers=headers, json={})["public_csrf_token"]
- @staticmethod
- def _no_cache(url):
+ def _no_cache(self, url):
"""Cause a cache miss to prevent Cloudflare 'optimizations'
Cloudflare's 'Polish' optimization strips image metadata and may even
@@ -199,7 +210,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
example = "https://www.artstation.com/USER"
def projects(self):
- url = "{}/users/{}/projects.json".format(self.root, self.user)
+ url = f"{self.root}/users/{self.user}/projects.json"
params = {"album_id": "all"}
return self._pagination(url, params)
@@ -217,7 +228,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
- self.album_id = text.parse_int(match.group(3))
+ self.album_id = text.parse_int(match[3])
def metadata(self):
userinfo = self.get_user_info(self.user)
@@ -235,7 +246,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
}
def projects(self):
- url = "{}/users/{}/projects.json".format(self.root, self.user)
+ url = f"{self.root}/users/{self.user}/projects.json"
params = {"album_id": self.album_id}
return self._pagination(url, params)
@@ -250,7 +261,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
example = "https://www.artstation.com/USER/likes"
def projects(self):
- url = "{}/users/{}/likes.json".format(self.root, self.user)
+ url = f"{self.root}/users/{self.user}/likes.json"
return self._pagination(url)
@@ -266,19 +277,17 @@ class ArtstationCollectionExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
- self.collection_id = match.group(2)
+ self.collection_id = match[2]
def metadata(self):
- url = "{}/collections/{}.json".format(
- self.root, self.collection_id)
+ url = f"{self.root}/collections/{self.collection_id}.json"
params = {"username": self.user}
- collection = self.request(
- url, params=params, notfound="collection").json()
+ collection = self.request_json(
+ url, params=params, notfound="collection")
return {"collection": collection, "user": self.user}
def projects(self):
- url = "{}/collections/{}/projects.json".format(
- self.root, self.collection_id)
+ url = f"{self.root}/collections/{self.collection_id}/projects.json"
params = {"collection_id": self.collection_id}
return self._pagination(url, params)
@@ -294,10 +303,9 @@ class ArtstationCollectionsExtractor(ArtstationExtractor):
url = self.root + "/collections.json"
params = {"username": self.user}
- for collection in self.request(
- url, params=params, notfound="collections").json():
- url = "{}/{}/collections/{}".format(
- self.root, self.user, collection["id"])
+ for collection in self.request_json(
+ url, params=params, notfound="collections"):
+ url = f"{self.root}/{self.user}/collections/{collection['id']}"
collection["_extractor"] = ArtstationCollectionExtractor
yield Message.Queue, url, collection
@@ -316,18 +324,16 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
- self.challenge_id = match.group(1)
- self.sorting = match.group(2) or "popular"
+ self.challenge_id = match[1]
+ self.sorting = match[2] or "popular"
def items(self):
- challenge_url = "{}/contests/_/challenges/{}.json".format(
- self.root, self.challenge_id)
- submission_url = "{}/contests/_/challenges/{}/submissions.json".format(
- self.root, self.challenge_id)
- update_url = "{}/contests/submission_updates.json".format(
- self.root)
-
- challenge = self.request(challenge_url).json()
+ base = f"{self.root}/contests/_/challenges/{self.challenge_id}"
+ challenge_url = f"{base}.json"
+ submission_url = f"{base}/submissions.json"
+ update_url = f"{self.root}/contests/submission_updates.json"
+
+ challenge = self.request_json(challenge_url)
yield Message.Directory, {"challenge": challenge}
params = {"sorting": self.sorting}
@@ -344,8 +350,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
text.nameext_from_url(url, update)
yield Message.Url, self._no_cache(url), update
- @staticmethod
- def _id_from_url(url):
+ def _id_from_url(self, url):
"""Get an image's submission ID from its URL"""
parts = url.split("/")
return text.parse_int("".join(parts[7:10]))
@@ -362,7 +367,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
- self.params = query = text.parse_query(match.group(1))
+ self.params = query = text.parse_query(match[1])
self.query = text.unquote(query.get("query") or query.get("q", ""))
self.sorting = query.get("sort_by", "relevance").lower()
self.tags = query.get("tags", "").split(",")
@@ -384,7 +389,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
"value" : value.split(","),
})
- url = "{}/api/v2/search/projects.json".format(self.root)
+ url = f"{self.root}/api/v2/search/projects.json"
data = {
"query" : self.query,
"page" : None,
@@ -409,13 +414,13 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
- self.query = text.parse_query(match.group(1))
+ self.query = text.parse_query(match[1])
def metadata(self):
return {"artwork": self.query}
def projects(self):
- url = "{}/projects.json".format(self.root)
+ url = f"{self.root}/projects.json"
return self._pagination(url, self.query.copy())
@@ -429,7 +434,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
- self.project_id = match.group(1)
+ self.project_id = match[1]
self.assets = None
def metadata(self):
@@ -456,8 +461,8 @@ class ArtstationFollowingExtractor(ArtstationExtractor):
example = "https://www.artstation.com/USER/following"
def items(self):
- url = "{}/users/{}/following.json".format(self.root, self.user)
+ url = f"{self.root}/users/{self.user}/following.json"
for user in self._pagination(url):
- url = "{}/{}".format(self.root, user["username"])
+ url = f"{self.root}/{user['username']}"
user["_extractor"] = ArtstationUserExtractor
yield Message.Queue, url, user