summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/bluesky.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/bluesky.py')
-rw-r--r--gallery_dl/extractor/bluesky.py49
1 files changed, 43 insertions, 6 deletions
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index a1a488e..bbff17c 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -12,7 +12,8 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
-BASE_PATTERN = r"(?:https?://)?bsky\.app"
+BASE_PATTERN = (r"(?:https?://)?"
+ r"(?:(?:www\.)?(?:c|[fv]x)?bs[ky]y[ex]?\.app|main\.bsky\.dev)")
USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)"
@@ -60,8 +61,10 @@ class BlueskyExtractor(Extractor):
yield Message.Directory, post
if files:
- base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
- "?did={}&cid=".format(post["author"]["did"]))
+ did = post["author"]["did"]
+ base = (
+ "{}/xrpc/com.atproto.sync.getBlob?did={}&cid=".format(
+ self.api.service_endpoint(did), did))
for post["num"], file in enumerate(files, 1):
post.update(file)
yield Message.Url, base + file["filename"], post
@@ -84,7 +87,14 @@ class BlueskyExtractor(Extractor):
def _pid(self, post):
return post["uri"].rpartition("/")[2]
+ @memcache(keyarg=1)
+ def _instance(self, handle):
+ return ".".join(handle.rsplit(".", 2)[-2:])
+
def _prepare(self, post):
+ author = post["author"]
+ author["instance"] = self._instance(author["handle"])
+
if self._metadata_facets:
if "facets" in post:
post["hashtags"] = tags = []
@@ -102,7 +112,7 @@ class BlueskyExtractor(Extractor):
post["hashtags"] = post["mentions"] = post["uris"] = ()
if self._metadata_user:
- post["user"] = self._user or post["author"]
+ post["user"] = self._user or author
post["instance"] = self.instance
post["post_id"] = self._pid(post)
@@ -317,6 +327,15 @@ class BlueskySearchExtractor(BlueskyExtractor):
return self.api.search_posts(self.user)
+class BlueskyHashtagExtractor(BlueskyExtractor):
+ subcategory = "hashtag"
+ pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)(?:/(top|latest))?"
+ example = "https://bsky.app/hashtag/NAME"
+
+ def posts(self):
+ return self.api.search_posts("#"+self.user, self.groups[1])
+
+
class BlueskyAPI():
"""Interface for the Bluesky API
@@ -412,11 +431,28 @@ class BlueskyAPI():
params = {"handle": handle}
return self._call(endpoint, params)["did"]
- def search_posts(self, query):
+ @memcache(keyarg=1)
+ def service_endpoint(self, did):
+ if did.startswith('did:web:'):
+ url = "https://" + did[8:] + "/.well-known/did.json"
+ else:
+ url = "https://plc.directory/" + did
+
+ try:
+ data = self.extractor.request(url).json()
+ for service in data["service"]:
+ if service["type"] == "AtprotoPersonalDataServer":
+ return service["serviceEndpoint"]
+ except Exception:
+ pass
+ return "https://bsky.social"
+
+ def search_posts(self, query, sort=None):
endpoint = "app.bsky.feed.searchPosts"
params = {
"q" : query,
"limit": "100",
+ "sort" : sort,
}
return self._pagination(endpoint, params, "posts")
@@ -430,7 +466,8 @@ class BlueskyAPI():
if user_did and not extr.config("reposts", False):
extr._user_did = did
if extr._metadata_user:
- extr._user = self.get_profile(did)
+ extr._user = user = self.get_profile(did)
+ user["instance"] = extr._instance(user["handle"])
return did