diff options
| author | 2024-10-25 17:27:36 -0400 | |
|---|---|---|
| committer | 2024-10-25 17:27:36 -0400 | |
| commit | a46d8cec37ef1e7370a3127dd5bf3a47e7dc40de (patch) | |
| tree | 27382aedd6d14d1add2b1a37e6df2f3e52f0ac4e /gallery_dl/extractor/bluesky.py | |
| parent | e4f39ad7148b104ab522ee13e4af3d3003b65e0f (diff) | |
| parent | fc004701f923bb954a22c7fec2ae8d607e78cb2b (diff) | |
Update upstream source from tag 'upstream/1.27.7'
Update to upstream version '1.27.7'
with Debian dir f4e7d47b82b8fc4fb17fad4aa54873015dcc81c1
Diffstat (limited to 'gallery_dl/extractor/bluesky.py')
| -rw-r--r-- | gallery_dl/extractor/bluesky.py | 101 |
1 files changed, 58 insertions, 43 deletions
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index 39c5635..a1a488e 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -42,62 +42,76 @@ class BlueskyExtractor(Extractor): self._user = self._user_did = None self.instance = self.root.partition("://")[2] self.videos = self.config("videos", True) + self.quoted = self.config("quoted", False) def items(self): for post in self.posts(): if "post" in post: post = post["post"] - - pid = post["uri"].rpartition("/")[2] if self._user_did and post["author"]["did"] != self._user_did: - self.log.debug("Skipping %s (repost)", pid) - continue - - post.update(post["record"]) - del post["record"] - - if self._metadata_facets: - if "facets" in post: - post["hashtags"] = tags = [] - post["mentions"] = dids = [] - post["uris"] = uris = [] - for facet in post["facets"]: - features = facet["features"][0] - if "tag" in features: - tags.append(features["tag"]) - elif "did" in features: - dids.append(features["did"]) - elif "uri" in features: - uris.append(features["uri"]) - else: - post["hashtags"] = post["mentions"] = post["uris"] = () - - if self._metadata_user: - post["user"] = self._user or post["author"] - - files = self._extract_files(post) - post["instance"] = self.instance - post["post_id"] = pid - post["count"] = len(files) - post["date"] = text.parse_datetime( - post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") - - yield Message.Directory, post - - if not files: + self.log.debug("Skipping %s (repost)", self._pid(post)) continue - - base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" - "?did={}&cid=".format(post["author"]["did"])) - for post["num"], file in enumerate(files, 1): - post.update(file) - yield Message.Url, base + file["filename"], post + embed = post.get("embed") + post.update(post.pop("record")) + + while True: + self._prepare(post) + files = self._extract_files(post) + + yield Message.Directory, post + if files: + base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" + "?did={}&cid=".format(post["author"]["did"])) + for post["num"], file in enumerate(files, 1): + post.update(file) + yield Message.Url, base + file["filename"], post + + if not self.quoted or not embed or "record" not in embed: + break + + quote = embed["record"] + if "record" in quote: + quote = quote["record"] + quote["quote_id"] = self._pid(post) + quote["quote_by"] = post["author"] + embed = quote.get("embed") + quote.update(quote.pop("value")) + post = quote def posts(self): return () + def _pid(self, post): + return post["uri"].rpartition("/")[2] + + def _prepare(self, post): + if self._metadata_facets: + if "facets" in post: + post["hashtags"] = tags = [] + post["mentions"] = dids = [] + post["uris"] = uris = [] + for facet in post["facets"]: + features = facet["features"][0] + if "tag" in features: + tags.append(features["tag"]) + elif "did" in features: + dids.append(features["did"]) + elif "uri" in features: + uris.append(features["uri"]) + else: + post["hashtags"] = post["mentions"] = post["uris"] = () + + if self._metadata_user: + post["user"] = self._user or post["author"] + + post["instance"] = self.instance + post["post_id"] = self._pid(post) + post["date"] = text.parse_datetime( + post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") + def _extract_files(self, post): if "embed" not in post: + post["count"] = 0 return () files = [] @@ -111,6 +125,7 @@ class BlueskyExtractor(Extractor): if "video" in media and self.videos: files.append(self._extract_media(media, "video")) + post["count"] = len(files) return files def _extract_media(self, media, key): |
