diff options
Diffstat (limited to 'gallery_dl/extractor/artstation.py')
| -rw-r--r-- | gallery_dl/extractor/artstation.py | 97 |
1 files changed, 82 insertions, 15 deletions
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index b58b3d3..49fde7b 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -29,11 +29,13 @@ class ArtstationExtractor(Extractor): self.user = match.group(1) or match.group(2) def items(self): - data = self.metadata() - - projects = self.projects() + videos = self.config("videos", True) + previews = self.config("previews", False) external = self.config("external", False) max_posts = self.config("max-posts") + + data = self.metadata() + projects = self.projects() if max_posts: projects = itertools.islice(projects, max_posts) @@ -45,13 +47,29 @@ class ArtstationExtractor(Extractor): asset["num"] = num yield Message.Directory, asset - if adict["has_embedded_player"] and external: + if adict["has_embedded_player"]: player = adict["player_embedded"] url = (text.extr(player, 'src="', '"') or text.extr(player, "src='", "'")) - if url and not url.startswith(self.root): - asset["extension"] = None - yield Message.Url, "ytdl:" + url, asset + if url.startswith(self.root): + # video clip hosted on artstation + if videos: + page = self.request(url).text + url = text.extr(page, ' src="', '"') + text.nameext_from_url(url, asset) + yield Message.Url, url, asset + elif url: + # external URL + if external: + asset["extension"] = "mp4" + yield Message.Url, "ytdl:" + url, asset + else: + self.log.debug(player) + self.log.warning( + "Failed to extract embedded player URL (%s)", + adict.get("id")) + + if not previews: continue if adict["has_image"]: @@ -59,10 +77,11 @@ class ArtstationExtractor(Extractor): text.nameext_from_url(url, asset) url = self._no_cache(url) - lhs, _, rhs = url.partition("/large/") - if rhs: - url = lhs + "/4k/" + rhs - asset["_fallback"] = self._image_fallback(lhs, rhs) + if "/video_clips/" not in url: + lhs, _, rhs = url.partition("/large/") + if rhs: + url = lhs + "/4k/" + rhs + asset["_fallback"] = self._image_fallback(lhs, rhs) yield Message.Url, url, asset @@ -175,7 +194,7 @@ class ArtstationUserExtractor(ArtstationExtractor): subcategory = "user" pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com" r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?" - r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$") + r"|((?!www)[\w-]+)\.artstation\.com(?:/projects)?)/?$") example = "https://www.artstation.com/USER" def projects(self): @@ -192,7 +211,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor): archive_fmt = "a_{album[id]}_{asset[id]}" pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com" r"/(?!artwork|projects|search)([^/?#]+)" - r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)") + r"|((?!www)[\w-]+)\.artstation\.com)/albums/(\d+)") example = "https://www.artstation.com/USER/albums/12345" def __init__(self, match): @@ -226,7 +245,7 @@ class ArtstationLikesExtractor(ArtstationExtractor): directory_fmt = ("{category}", "{userinfo[username]}", "Likes") archive_fmt = "f_{userinfo[id]}_{asset[id]}" pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" - r"/(?!artwork|projects|search)([^/?#]+)/likes/?") + r"/(?!artwork|projects|search)([^/?#]+)/likes") example = "https://www.artstation.com/USER/likes" def projects(self): @@ -234,6 +253,54 @@ class ArtstationLikesExtractor(ArtstationExtractor): return self._pagination(url) +class ArtstationCollectionExtractor(ArtstationExtractor): + """Extractor for an artstation collection""" + subcategory = "collection" + directory_fmt = ("{category}", "{user}", + "{collection[id]} {collection[name]}") + archive_fmt = "c_{collection[id]}_{asset[id]}" + pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" + r"/(?!artwork|projects|search)([^/?#]+)/collections/(\d+)") + example = "https://www.artstation.com/USER/collections/12345" + + def __init__(self, match): + ArtstationExtractor.__init__(self, match) + self.collection_id = match.group(2) + + def metadata(self): + url = "{}/collections/{}.json".format( + self.root, self.collection_id) + params = {"username": self.user} + collection = self.request( + url, params=params, notfound="collection").json() + return {"collection": collection, "user": self.user} + + def projects(self): + url = "{}/collections/{}/projects.json".format( + self.root, self.collection_id) + params = {"collection_id": self.collection_id} + return self._pagination(url, params) + + +class ArtstationCollectionsExtractor(ArtstationExtractor): + """Extractor for an artstation user's collections""" + subcategory = "collections" + pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" + r"/(?!artwork|projects|search)([^/?#]+)/collections/?$") + example = "https://www.artstation.com/USER/collections" + + def items(self): + url = self.root + "/collections.json" + params = {"username": self.user} + + for collection in self.request( + url, params=params, notfound="collections").json(): + url = "{}/{}/collections/{}".format( + self.root, self.user, collection["id"]) + collection["_extractor"] = ArtstationCollectionExtractor + yield Message.Queue, url, collection + + class ArtstationChallengeExtractor(ArtstationExtractor): """Extractor for submissions of artstation challenges""" subcategory = "challenge" @@ -355,7 +422,7 @@ class ArtstationImageExtractor(ArtstationExtractor): """Extractor for images from a single artstation project""" subcategory = "image" pattern = (r"(?:https?://)?(?:" - r"(?:\w+\.)?artstation\.com/(?:artwork|projects|search)" + r"(?:[\w-]+\.)?artstation\.com/(?:artwork|projects|search)" r"|artstn\.co/p)/(\w+)") example = "https://www.artstation.com/artwork/abcde" |
