summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/scrolller.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/scrolller.py')
-rw-r--r--gallery_dl/extractor/scrolller.py227
1 files changed, 227 insertions, 0 deletions
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
new file mode 100644
index 0000000..9f9f0c4
--- /dev/null
+++ b/gallery_dl/extractor/scrolller.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://scrolller.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
+
+
+class ScrolllerExtractor(Extractor):
+ """Base class for scrolller extractors"""
+ category = "scrolller"
+ root = "https://scrolller.com"
+ directory_fmt = ("{category}", "{subredditTitle}")
+ filename_fmt = "{id}{title:? //}.{extension}"
+ archive_fmt = "{id}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.auth_token = None
+
+ def items(self):
+ self.login()
+
+ for post in self.posts():
+
+ src = max(post["mediaSources"], key=self._sort_key)
+ post.update(src)
+ url = src["url"]
+ text.nameext_from_url(url, post)
+
+ yield Message.Directory, post
+ yield Message.Url, url, post
+
+ def posts(self):
+ return ()
+
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self.auth_token = self._login_impl(username, password)
+
+ @cache(maxage=28*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ variables = {
+ "username": username,
+ "password": password,
+ }
+
+ try:
+ data = self._request_graphql("LoginQuery", variables)
+ except exception.HttpError as exc:
+ if exc.status == 403:
+ raise exception.AuthenticationError()
+ raise
+
+ return data["login"]["token"]
+
+ def _request_graphql(self, opname, variables):
+ url = "https://api.scrolller.com/api/v2/graphql"
+ headers = {
+ "Content-Type" : "text/plain;charset=UTF-8",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
+ }
+ data = {
+ "query" : QUERIES[opname],
+ "variables" : variables,
+ "authorization": self.auth_token,
+ }
+ return self.request(
+ url, method="POST", headers=headers, data=util.json_dumps(data),
+ ).json()["data"]
+
+ def _pagination(self, opname, variables):
+ while True:
+ data = self._request_graphql(opname, variables)
+
+ while "items" not in data:
+ data = data.popitem()[1]
+ yield from data["items"]
+
+ if not data["iterator"]:
+ return
+ variables["iterator"] = data["iterator"]
+
+ def _sort_key(self, src):
+ return src["width"], not src["isOptimized"]
+
+
+class ScrolllerSubredditExtractor(ScrolllerExtractor):
+ """Extractor for media from a scrolller subreddit"""
+ subcategory = "subreddit"
+ pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
+ example = "https://scrolller.com/r/SUBREDDIT"
+
+ def posts(self):
+ url, query = self.groups
+ filter = None
+
+ if query:
+ params = text.parse_query(query)
+ if "filter" in params:
+ filter = params["filter"].upper().rstrip("S")
+
+ variables = {
+ "url" : url,
+ "iterator" : None,
+ "filter" : filter,
+ "hostsDown": None,
+ }
+ return self._pagination("SubredditQuery", variables)
+
+
+class ScrolllerFollowingExtractor(ScrolllerExtractor):
+ """Extractor for followed scrolller subreddits"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/following"
+ example = "https://scrolller.com/following"
+
+ def items(self):
+ self.login()
+
+ if not self.auth_token:
+ raise exception.AuthorizationError("Login required")
+
+ variables = {
+ "iterator" : None,
+ "hostsDown": None,
+ }
+
+ for subreddit in self._pagination("FollowingQuery", variables):
+ url = self.root + subreddit["url"]
+ subreddit["_extractor"] = ScrolllerSubredditExtractor
+ yield Message.Queue, url, subreddit
+
+
+class ScrolllerPostExtractor(ScrolllerExtractor):
+ """Extractor for media from a single scrolller post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
+ example = "https://scrolller.com/title-slug-a1b2c3d4f5"
+
+ def posts(self):
+ url = "{}/{}".format(self.root, self.groups[0])
+ page = self.request(url).text
+ data = util.json_loads(text.extr(
+ page, '<script>window.scrolllerConfig="', '"</script>')
+ .replace('\\"', '"'))
+ return (data["item"],)
+
+
+QUERIES = {
+
+ "SubredditQuery": """\
+query SubredditQuery(
+ $url: String!
+ $filter: SubredditPostFilter
+ $iterator: String
+) {
+ getSubreddit(
+ url: $url
+ ) {
+ children(
+ limit: 50
+ iterator: $iterator
+ filter: $filter
+ disabledHosts: null
+ ) {
+ iterator items {
+ __typename id url title subredditId subredditTitle
+ subredditUrl redditPath isNsfw albumUrl hasAudio
+ fullLengthSource gfycatSource redgifsSource ownerAvatar
+ username displayName isPaid tags isFavorite
+ mediaSources { url width height isOptimized }
+ blurredMediaSources { url width height isOptimized }
+ }
+ }
+ }
+}
+""",
+
+ "FollowingQuery": """\
+query FollowingQuery(
+ $iterator: String
+) {
+ getFollowing(
+ limit: 10
+ iterator: $iterator
+ ) {
+ iterator items {
+ __typename id url title secondaryTitle description createdAt isNsfw
+ subscribers isComplete itemCount videoCount pictureCount albumCount
+ isPaid username tags isFollowing
+ banner { url width height isOptimized }
+ }
+ }
+}
+""",
+
+ "LoginQuery": """\
+query LoginQuery(
+ $username: String!,
+ $password: String!
+) {
+ login(
+ username: $username,
+ password: $password
+ ) {
+ username token expiresAt isAdmin status isPremium
+ }
+}
+""",
+
+}