diff options
| author | 2025-04-27 20:34:08 -0400 | |
|---|---|---|
| committer | 2025-04-27 20:34:08 -0400 | |
| commit | 4a18b5837c1dd82f5964afcfc3fecc53cd97e79c (patch) | |
| tree | 44019190a44fd449daa0efd07c65bbe524688c33 /gallery_dl/extractor/scrolller.py | |
| parent | b830dc03b3b7c9dd119648e1be9c1145d56e096c (diff) | |
New upstream version 1.29.5.upstream/1.29.5
Diffstat (limited to 'gallery_dl/extractor/scrolller.py')
| -rw-r--r-- | gallery_dl/extractor/scrolller.py | 218 |
1 files changed, 165 insertions, 53 deletions
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py index c818c98..f97fa14 100644 --- a/gallery_dl/extractor/scrolller.py +++ b/gallery_dl/extractor/scrolller.py @@ -20,8 +20,8 @@ class ScrolllerExtractor(Extractor): category = "scrolller" root = "https://scrolller.com" directory_fmt = ("{category}", "{subredditTitle}") - filename_fmt = "{id}{title:? //}.{extension}" - archive_fmt = "{id}" + filename_fmt = "{id}{num:?_//>03}{title:? //}.{extension}" + archive_fmt = "{id}_{num}" request_interval = (0.5, 1.5) def _init(self): @@ -31,23 +31,36 @@ class ScrolllerExtractor(Extractor): self.login() for post in self.posts(): - - media_sources = post.get("mediaSources") - if not media_sources: - self.log.warning("%s: No media files", post.get("id")) - continue - - src = max(media_sources, key=self._sort_key) - post.update(src) - url = src["url"] - text.nameext_from_url(url, post) + files = self._extract_files(post) + post["count"] = len(files) yield Message.Directory, post - yield Message.Url, url, post + for file in files: + url = file["url"] + post.update(file) + yield Message.Url, url, text.nameext_from_url(url, post) def posts(self): return () + def _extract_files(self, post): + album = post.pop("albumContent", None) + if not album: + sources = post.get("mediaSources") + if not sources: + self.log.warning("%s: No media files", post.get("id")) + return () + src = max(sources, key=self._sort_key) + src["num"] = 0 + return (src,) + + files = [] + for num, media in enumerate(album, 1): + src = max(media["mediaSources"], key=self._sort_key) + src["num"] = num + files.append(src) + return files + def login(self): username, password = self._get_auth_info() if username: @@ -63,7 +76,7 @@ class ScrolllerExtractor(Extractor): } try: - data = self._request_graphql("LoginQuery", variables) + data = self._request_graphql("LoginQuery", variables, False) except exception.HttpError as exc: if exc.status == 403: raise exception.AuthenticationError() @@ -71,10 +84,9 @@ class ScrolllerExtractor(Extractor): return data["login"]["token"] - def _request_graphql(self, opname, variables): - url = "https://api.scrolller.com/api/v2/graphql" + def _request_graphql(self, opname, variables, admin=True): headers = { - "Content-Type" : "text/plain;charset=UTF-8", + "Content-Type" : None, "Origin" : self.root, "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", @@ -85,14 +97,23 @@ class ScrolllerExtractor(Extractor): "variables" : variables, "authorization": self.auth_token, } + + if admin: + url = "https://api.scrolller.com/admin" + headers["Content-Type"] = "application/json" + else: + url = "https://api.scrolller.com/api/v2/graphql" + headers["Content-Type"] = "text/plain;charset=UTF-8" + return self.request( url, method="POST", headers=headers, data=util.json_dumps(data), ).json()["data"] - def _pagination(self, opname, variables): - while True: + def _pagination(self, opname, variables, data=None): + if data is None: data = self._request_graphql(opname, variables) + while True: while "items" not in data: data = data.popitem()[1] yield from data["items"] @@ -101,6 +122,8 @@ class ScrolllerExtractor(Extractor): return variables["iterator"] = data["iterator"] + data = self._request_graphql(opname, variables) + def _sort_key(self, src): return src["width"], not src["isOptimized"] @@ -114,6 +137,7 @@ class ScrolllerSubredditExtractor(ScrolllerExtractor): def posts(self): url, query = self.groups filter = None + sort = "RANDOM" if query: params = text.parse_query(query) @@ -121,12 +145,24 @@ class ScrolllerSubredditExtractor(ScrolllerExtractor): filter = params["filter"].upper().rstrip("S") variables = { - "url" : url, - "iterator" : None, - "filter" : filter, - "hostsDown": None, + "url" : url, + "filter": filter, + "sortBy": sort, + "limit" : 50, } - return self._pagination("SubredditQuery", variables) + subreddit = self._request_graphql( + "SubredditQuery", variables)["getSubreddit"] + + variables = { + "subredditId": subreddit["id"], + "iterator": None, + "filter" : filter, + "sortBy" : sort, + "limit" : 50, + "isNsfw" : subreddit["isNsfw"], + } + return self._pagination( + "SubredditChildrenQuery", variables, subreddit["children"]) class ScrolllerFollowingExtractor(ScrolllerExtractor): @@ -142,11 +178,14 @@ class ScrolllerFollowingExtractor(ScrolllerExtractor): raise exception.AuthorizationError("Login required") variables = { - "iterator" : None, - "hostsDown": None, + "iterator": None, + "filter" : None, + "limit" : 10, + "isNsfw" : False, + "sortBy" : "RANDOM", } - for subreddit in self._pagination("FollowingQuery", variables): + for subreddit in self._pagination("GetFollowingSubreddits", variables): url = self.root + subreddit["url"] subreddit["_extractor"] = ScrolllerSubredditExtractor yield Message.Queue, url, subreddit @@ -156,39 +195,62 @@ class ScrolllerPostExtractor(ScrolllerExtractor): """Extractor for media from a single scrolller post""" subcategory = "post" pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)" - example = "https://scrolller.com/title-slug-a1b2c3d4f5" + example = "https://scrolller.com/TITLE-SLUG-a1b2c3d4f5" def posts(self): - url = "{}/{}".format(self.root, self.groups[0]) - page = self.request(url).text - data = util.json_loads(text.extr( - page, '<script>window.scrolllerConfig="', '"</script>') - .replace('\\"', '"')) - return (data["item"],) + variables = {"url": "/" + self.groups[0]} + data = self._request_graphql("SubredditPostQuery", variables) + return (data["getPost"],) QUERIES = { + "SubredditPostQuery": """\ +query SubredditPostQuery( + $url: String! +) { + getPost( + data: { url: $url } + ) { + __typename id url title subredditId subredditTitle subredditUrl + redditPath isNsfw hasAudio fullLengthSource gfycatSource redgifsSource + ownerAvatar username displayName favoriteCount isPaid tags + commentsCount commentsRepliesCount isFavorite + albumContent { mediaSources { url width height isOptimized } } + mediaSources { url width height isOptimized } + blurredMediaSources { url width height isOptimized } + } +} +""", + "SubredditQuery": """\ query SubredditQuery( $url: String! - $filter: SubredditPostFilter $iterator: String + $sortBy: GallerySortBy + $filter: GalleryFilter + $limit: Int! ) { getSubreddit( - url: $url + data: { + url: $url, + iterator: $iterator, + filter: $filter, + limit: $limit, + sortBy: $sortBy + } ) { - children( - limit: 50 - iterator: $iterator - filter: $filter - disabledHosts: null - ) { + __typename id url title secondaryTitle description createdAt isNsfw + subscribers isComplete itemCount videoCount pictureCount albumCount + isPaid username tags isFollowing + banner { url width height isOptimized } + children { iterator items { - __typename id url title subredditId subredditTitle - subredditUrl redditPath isNsfw albumUrl hasAudio - fullLengthSource gfycatSource redgifsSource ownerAvatar - username displayName isPaid tags isFavorite + __typename id url title subredditId subredditTitle subredditUrl + redditPath isNsfw hasAudio fullLengthSource gfycatSource + redgifsSource ownerAvatar username displayName favoriteCount + isPaid tags commentsCount commentsRepliesCount isFavorite + albumContent { mediaSources { url width height isOptimized } } mediaSources { url width height isOptimized } blurredMediaSources { url width height isOptimized } } @@ -197,19 +259,59 @@ query SubredditQuery( } """, - "FollowingQuery": """\ -query FollowingQuery( + "SubredditChildrenQuery": """\ +query SubredditChildrenQuery( + $subredditId: Int! $iterator: String + $filter: GalleryFilter + $sortBy: GallerySortBy + $limit: Int! + $isNsfw: Boolean ) { - getFollowing( - limit: 10 - iterator: $iterator + getSubredditChildren( + data: { + subredditId: $subredditId, + iterator: $iterator, + filter: $filter, + sortBy: $sortBy, + limit: $limit, + isNsfw: $isNsfw + }, + ) { + iterator items { + __typename id url title subredditId subredditTitle subredditUrl + redditPath isNsfw hasAudio fullLengthSource gfycatSource + redgifsSource ownerAvatar username displayName favoriteCount isPaid + tags commentsCount commentsRepliesCount isFavorite + albumContent { mediaSources { url width height isOptimized } } + mediaSources { url width height isOptimized } + blurredMediaSources { url width height isOptimized } + } + } +} +""", + + "GetFollowingSubreddits": """\ +query GetFollowingSubreddits( + $iterator: String, + $limit: Int!, + $filter: GalleryFilter, + $isNsfw: Boolean, + $sortBy: GallerySortBy +) { + getFollowingSubreddits( + data: { + isNsfw: $isNsfw + limit: $limit + filter: $filter + iterator: $iterator + sortBy: $sortBy + } ) { iterator items { __typename id url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount - isPaid username tags isFollowing - banner { url width height isOptimized } + isFollowing } } } @@ -229,4 +331,14 @@ query LoginQuery( } """, + "ItemTypeQuery": """\ +query ItemTypeQuery( + $url: String! +) { + getItemType( + url: $url + ) +} +""", + } |
