diff options
Diffstat (limited to 'gallery_dl/extractor/vsco.py')
| -rw-r--r-- | gallery_dl/extractor/vsco.py | 91 |
1 files changed, 85 insertions, 6 deletions
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 14e3c7b..41141c6 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -11,8 +11,8 @@ from .common import Extractor, Message from .. import text, util - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)" class VscoExtractor(Extractor): @@ -115,7 +115,7 @@ class VscoExtractor(Extractor): class VscoUserExtractor(VscoExtractor): """Extractor for images from a user on vsco.co""" subcategory = "user" - pattern = BASE_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])" + pattern = USER_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])" example = "https://vsco.co/USER/gallery" def images(self): @@ -139,8 +139,8 @@ class VscoCollectionExtractor(VscoExtractor): subcategory = "collection" directory_fmt = ("{category}", "{user}", "collection") archive_fmt = "c_{user}_{id}" - pattern = BASE_PATTERN + r"/collection/" - example = "https://vsco.co/USER/collection/12345" + pattern = USER_PATTERN + r"/collection" + example = "https://vsco.co/USER/collection/1" def images(self): url = "{}/{}/collection/1".format(self.root, self.user) @@ -159,10 +159,89 @@ class VscoCollectionExtractor(VscoExtractor): )) +class VscoSpaceExtractor(VscoExtractor): + """Extractor for a vsco.co space""" + subcategory = "space" + directory_fmt = ("{category}", "space", "{user}") + archive_fmt = "s_{user}_{id}" + pattern = BASE_PATTERN + r"/spaces/([^/?#]+)" + example = "https://vsco.co/spaces/a1b2c3d4e5f" + + def images(self): + url = "{}/spaces/{}".format(self.root, self.user) + data = self._extract_preload_state(url) + + tkn = data["users"]["currentUser"]["tkn"] + sid = self.user + + posts = data["entities"]["posts"] + images = data["entities"]["postImages"] + for post in posts.values(): + post["image"] = images[post["image"]] + + space = data["spaces"]["byId"][sid] + space["postsList"] = [posts[pid] for pid in space["postsList"]] + + url = "{}/grpc/spaces/{}/posts".format(self.root, sid) + params = {} + return self._pagination(url, params, tkn, space) + + def _pagination(self, url, params, token, data): + headers = { + "Accept" : "application/json", + "Referer" : "{}/spaces/{}".format(self.root, self.user), + "Content-Type" : "application/json", + "Authorization": "Bearer " + token, + } + + while True: + for post in data["postsList"]: + post = self._transform_media(post["image"]) + post["upload_date"] = post["upload_date"]["sec"] * 1000 + yield post + + cursor = data["cursor"] + if cursor.get("atEnd"): + return + params["cursor"] = cursor["postcursorcontext"]["postId"] + + data = self.request(url, params=params, headers=headers).json() + + +class VscoSpacesExtractor(VscoExtractor): + """Extractor for a vsco.co user's spaces""" + subcategory = "spaces" + pattern = USER_PATTERN + r"/spaces" + example = "https://vsco.co/USER/spaces" + + def items(self): + url = "{}/{}/spaces".format(self.root, self.user) + data = self._extract_preload_state(url) + + tkn = data["users"]["currentUser"]["tkn"] + uid = data["sites"]["siteByUsername"][self.user]["site"]["userId"] + + headers = { + "Accept" : "application/json", + "Referer" : url, + "Content-Type" : "application/json", + "Authorization": "Bearer " + tkn, + } + # this would theoretically need to be paginated + url = "{}/grpc/spaces/user/{}".format(self.root, uid) + data = self.request(url, headers=headers).json() + + for space in data["spacesWithRoleList"]: + space = space["space"] + url = "{}/spaces/{}".format(self.root, space["id"]) + space["_extractor"] = VscoSpaceExtractor + yield Message.Queue, url, space + + class VscoImageExtractor(VscoExtractor): """Extractor for individual images on vsco.co""" subcategory = "image" - pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)" + pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)" example = "https://vsco.co/USER/media/0123456789abcdef" def __init__(self, match): |
