summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/bluesky.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/bluesky.py')
-rw-r--r--gallery_dl/extractor/bluesky.py121
1 files changed, 82 insertions, 39 deletions
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index f8fef93..ec274b8 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -25,10 +25,6 @@ class BlueskyExtractor(Extractor):
archive_fmt = "{filename}"
root = "https://bsky.app"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user = match.group(1)
-
def _init(self):
meta = self.config("metadata") or ()
if meta:
@@ -87,6 +83,22 @@ class BlueskyExtractor(Extractor):
def posts(self):
return ()
+ def _posts_records(self, actor, collection):
+ depth = self.config("depth", "0")
+
+ for record in self.api.list_records(actor, collection):
+ uri = None
+ try:
+ uri = record["value"]["subject"]["uri"]
+ if "/app.bsky.feed.post/" in uri:
+ yield from self.api.get_post_thread_uri(uri, depth)
+ except exception.StopExtraction:
+ pass # deleted post
+ except Exception as exc:
+ self.log.debug(record, exc_info=exc)
+ self.log.warning("Failed to extract %s (%s: %s)",
+ uri or "record", exc.__class__.__name__, exc)
+
def _pid(self, post):
return post["uri"].rpartition("/")[2]
@@ -203,7 +215,7 @@ class BlueskyUserExtractor(BlueskyExtractor):
pass
def items(self):
- base = "{}/profile/{}/".format(self.root, self.user)
+ base = "{}/profile/{}/".format(self.root, self.groups[0])
default = ("posts" if self.config("quoted", False) or
self.config("reposts", False) else "media")
return self._dispatch_extractors((
@@ -213,6 +225,7 @@ class BlueskyUserExtractor(BlueskyExtractor):
(BlueskyPostsExtractor , base + "posts"),
(BlueskyRepliesExtractor , base + "replies"),
(BlueskyMediaExtractor , base + "media"),
+ (BlueskyVideoExtractor , base + "video"),
(BlueskyLikesExtractor , base + "likes"),
), (default,))
@@ -223,7 +236,8 @@ class BlueskyPostsExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/posts"
def posts(self):
- return self.api.get_author_feed(self.user, "posts_and_author_threads")
+ return self.api.get_author_feed(
+ self.groups[0], "posts_and_author_threads")
class BlueskyRepliesExtractor(BlueskyExtractor):
@@ -232,7 +246,8 @@ class BlueskyRepliesExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/replies"
def posts(self):
- return self.api.get_author_feed(self.user, "posts_with_replies")
+ return self.api.get_author_feed(
+ self.groups[0], "posts_with_replies")
class BlueskyMediaExtractor(BlueskyExtractor):
@@ -241,7 +256,18 @@ class BlueskyMediaExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/media"
def posts(self):
- return self.api.get_author_feed(self.user, "posts_with_media")
+ return self.api.get_author_feed(
+ self.groups[0], "posts_with_media")
+
+
+class BlueskyVideoExtractor(BlueskyExtractor):
+ subcategory = "video"
+ pattern = USER_PATTERN + r"/video"
+ example = "https://bsky.app/profile/HANDLE/video"
+
+ def posts(self):
+ return self.api.get_author_feed(
+ self.groups[0], "posts_with_video")
class BlueskyLikesExtractor(BlueskyExtractor):
@@ -250,7 +276,9 @@ class BlueskyLikesExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/likes"
def posts(self):
- return self.api.get_actor_likes(self.user)
+ if self.config("endpoint") == "getActorLikes":
+ return self.api.get_actor_likes(self.groups[0])
+ return self._posts_records(self.groups[0], "app.bsky.feed.like")
class BlueskyFeedExtractor(BlueskyExtractor):
@@ -258,12 +286,9 @@ class BlueskyFeedExtractor(BlueskyExtractor):
pattern = USER_PATTERN + r"/feed/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/feed/NAME"
- def __init__(self, match):
- BlueskyExtractor.__init__(self, match)
- self.feed = match.group(2)
-
def posts(self):
- return self.api.get_feed(self.user, self.feed)
+ actor, feed = self.groups
+ return self.api.get_feed(actor, feed)
class BlueskyListExtractor(BlueskyExtractor):
@@ -271,12 +296,9 @@ class BlueskyListExtractor(BlueskyExtractor):
pattern = USER_PATTERN + r"/lists/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/lists/ID"
- def __init__(self, match):
- BlueskyExtractor.__init__(self, match)
- self.list = match.group(2)
-
def posts(self):
- return self.api.get_list_feed(self.user, self.list)
+ actor, list_id = self.groups
+ return self.api.get_list_feed(actor, list_id)
class BlueskyFollowingExtractor(BlueskyExtractor):
@@ -285,7 +307,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/follows"
def items(self):
- for user in self.api.get_follows(self.user):
+ for user in self.api.get_follows(self.groups[0]):
url = "https://bsky.app/profile/" + user["did"]
user["_extractor"] = BlueskyUserExtractor
yield Message.Queue, url, user
@@ -296,12 +318,9 @@ class BlueskyPostExtractor(BlueskyExtractor):
pattern = USER_PATTERN + r"/post/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/post/ID"
- def __init__(self, match):
- BlueskyExtractor.__init__(self, match)
- self.post_id = match.group(2)
-
def posts(self):
- return self.api.get_post_thread(self.user, self.post_id)
+ actor, post_id = self.groups
+ return self.api.get_post_thread(actor, post_id)
class BlueskyInfoExtractor(BlueskyExtractor):
@@ -311,7 +330,7 @@ class BlueskyInfoExtractor(BlueskyExtractor):
def items(self):
self._metadata_user = True
- self.api._did_from_actor(self.user)
+ self.api._did_from_actor(self.groups[0])
return iter(((Message.Directory, self._user),))
@@ -322,7 +341,7 @@ class BlueskyAvatarExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/avatar"
def posts(self):
- return self._make_post(self.user, "avatar")
+ return self._make_post(self.groups[0], "avatar")
class BlueskyBackgroundExtractor(BlueskyExtractor):
@@ -332,7 +351,7 @@ class BlueskyBackgroundExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/banner"
def posts(self):
- return self._make_post(self.user, "banner")
+ return self._make_post(self.groups[0], "banner")
class BlueskySearchExtractor(BlueskyExtractor):
@@ -341,7 +360,7 @@ class BlueskySearchExtractor(BlueskyExtractor):
example = "https://bsky.app/search?q=QUERY"
def posts(self):
- query = text.unquote(self.user.replace("+", " "))
+ query = text.unquote(self.groups[0].replace("+", " "))
return self.api.search_posts(query)
@@ -351,13 +370,14 @@ class BlueskyHashtagExtractor(BlueskyExtractor):
example = "https://bsky.app/hashtag/NAME"
def posts(self):
- return self.api.search_posts("#"+self.user, self.groups[1])
+ hashtag, order = self.groups
+ return self.api.search_posts("#"+hashtag, order)
class BlueskyAPI():
"""Interface for the Bluesky API
- https://www.docs.bsky.app/docs/category/http-reference
+ https://docs.bsky.app/docs/category/http-reference
"""
def __init__(self, extractor):
@@ -378,7 +398,7 @@ class BlueskyAPI():
"actor": self._did_from_actor(actor),
"limit": "100",
}
- return self._pagination(endpoint, params)
+ return self._pagination(endpoint, params, check_empty=True)
def get_author_feed(self, actor, filter="posts_and_author_threads"):
endpoint = "app.bsky.feed.getAuthorFeed"
@@ -416,11 +436,16 @@ class BlueskyAPI():
return self._pagination(endpoint, params)
def get_post_thread(self, actor, post_id):
+ uri = "at://{}/app.bsky.feed.post/{}".format(
+ self._did_from_actor(actor), post_id)
+ depth = self.extractor.config("depth", "0")
+ return self.get_post_thread_uri(uri, depth)
+
+ def get_post_thread_uri(self, uri, depth="0"):
endpoint = "app.bsky.feed.getPostThread"
params = {
- "uri": "at://{}/app.bsky.feed.post/{}".format(
- self._did_from_actor(actor), post_id),
- "depth" : self.extractor.config("depth", "0"),
+ "uri" : uri,
+ "depth" : depth,
"parentHeight": "0",
}
@@ -443,6 +468,18 @@ class BlueskyAPI():
params = {"actor": did}
return self._call(endpoint, params)
+ def list_records(self, actor, collection):
+ endpoint = "com.atproto.repo.listRecords"
+ actor_did = self._did_from_actor(actor)
+ params = {
+ "repo" : actor_did,
+ "collection": collection,
+ "limit" : "100",
+ # "reverse" : "false",
+ }
+ return self._pagination(endpoint, params, "records",
+ self.service_endpoint(actor_did))
+
@memcache(keyarg=1)
def resolve_handle(self, handle):
endpoint = "com.atproto.identity.resolveHandle"
@@ -523,8 +560,10 @@ class BlueskyAPI():
_refresh_token_cache.update(self.username, data["refreshJwt"])
return "Bearer " + data["accessJwt"]
- def _call(self, endpoint, params):
- url = "{}/xrpc/{}".format(self.root, endpoint)
+ def _call(self, endpoint, params, root=None):
+ if root is None:
+ root = self.root
+ url = "{}/xrpc/{}".format(root, endpoint)
while True:
self.authenticate()
@@ -549,9 +588,13 @@ class BlueskyAPI():
self.extractor.log.debug("Server response: %s", response.text)
raise exception.StopExtraction(msg)
- def _pagination(self, endpoint, params, key="feed"):
+ def _pagination(self, endpoint, params,
+ key="feed", root=None, check_empty=False):
while True:
- data = self._call(endpoint, params)
+ data = self._call(endpoint, params, root)
+
+ if check_empty and not data[key]:
+ return
yield from data[key]
cursor = data.get("cursor")