diff options
Diffstat (limited to 'gallery_dl/extractor/bluesky.py')
| -rw-r--r-- | gallery_dl/extractor/bluesky.py | 49 |
1 files changed, 43 insertions, 6 deletions
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index a1a488e..bbff17c 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -12,7 +12,8 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache, memcache -BASE_PATTERN = r"(?:https?://)?bsky\.app" +BASE_PATTERN = (r"(?:https?://)?" + r"(?:(?:www\.)?(?:c|[fv]x)?bs[ky]y[ex]?\.app|main\.bsky\.dev)") USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)" @@ -60,8 +61,10 @@ class BlueskyExtractor(Extractor): yield Message.Directory, post if files: - base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" - "?did={}&cid=".format(post["author"]["did"])) + did = post["author"]["did"] + base = ( + "{}/xrpc/com.atproto.sync.getBlob?did={}&cid=".format( + self.api.service_endpoint(did), did)) for post["num"], file in enumerate(files, 1): post.update(file) yield Message.Url, base + file["filename"], post @@ -84,7 +87,14 @@ class BlueskyExtractor(Extractor): def _pid(self, post): return post["uri"].rpartition("/")[2] + @memcache(keyarg=1) + def _instance(self, handle): + return ".".join(handle.rsplit(".", 2)[-2:]) + def _prepare(self, post): + author = post["author"] + author["instance"] = self._instance(author["handle"]) + if self._metadata_facets: if "facets" in post: post["hashtags"] = tags = [] @@ -102,7 +112,7 @@ class BlueskyExtractor(Extractor): post["hashtags"] = post["mentions"] = post["uris"] = () if self._metadata_user: - post["user"] = self._user or post["author"] + post["user"] = self._user or author post["instance"] = self.instance post["post_id"] = self._pid(post) @@ -317,6 +327,15 @@ class BlueskySearchExtractor(BlueskyExtractor): return self.api.search_posts(self.user) +class BlueskyHashtagExtractor(BlueskyExtractor): + subcategory = "hashtag" + pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)(?:/(top|latest))?" + example = "https://bsky.app/hashtag/NAME" + + def posts(self): + return self.api.search_posts("#"+self.user, self.groups[1]) + + class BlueskyAPI(): """Interface for the Bluesky API @@ -412,11 +431,28 @@ class BlueskyAPI(): params = {"handle": handle} return self._call(endpoint, params)["did"] - def search_posts(self, query): + @memcache(keyarg=1) + def service_endpoint(self, did): + if did.startswith('did:web:'): + url = "https://" + did[8:] + "/.well-known/did.json" + else: + url = "https://plc.directory/" + did + + try: + data = self.extractor.request(url).json() + for service in data["service"]: + if service["type"] == "AtprotoPersonalDataServer": + return service["serviceEndpoint"] + except Exception: + pass + return "https://bsky.social" + + def search_posts(self, query, sort=None): endpoint = "app.bsky.feed.searchPosts" params = { "q" : query, "limit": "100", + "sort" : sort, } return self._pagination(endpoint, params, "posts") @@ -430,7 +466,8 @@ class BlueskyAPI(): if user_did and not extr.config("reposts", False): extr._user_did = did if extr._metadata_user: - extr._user = self.get_profile(did) + extr._user = user = self.get_profile(did) + user["instance"] = extr._instance(user["handle"]) return did |
