aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/bluesky.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-10-25 17:27:36 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-10-25 17:27:36 -0400
commita46d8cec37ef1e7370a3127dd5bf3a47e7dc40de (patch)
tree27382aedd6d14d1add2b1a37e6df2f3e52f0ac4e /gallery_dl/extractor/bluesky.py
parente4f39ad7148b104ab522ee13e4af3d3003b65e0f (diff)
parentfc004701f923bb954a22c7fec2ae8d607e78cb2b (diff)
Update upstream source from tag 'upstream/1.27.7'
Update to upstream version '1.27.7' with Debian dir f4e7d47b82b8fc4fb17fad4aa54873015dcc81c1
Diffstat (limited to 'gallery_dl/extractor/bluesky.py')
-rw-r--r--gallery_dl/extractor/bluesky.py101
1 files changed, 58 insertions, 43 deletions
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index 39c5635..a1a488e 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -42,62 +42,76 @@ class BlueskyExtractor(Extractor):
self._user = self._user_did = None
self.instance = self.root.partition("://")[2]
self.videos = self.config("videos", True)
+ self.quoted = self.config("quoted", False)
def items(self):
for post in self.posts():
if "post" in post:
post = post["post"]
-
- pid = post["uri"].rpartition("/")[2]
if self._user_did and post["author"]["did"] != self._user_did:
- self.log.debug("Skipping %s (repost)", pid)
- continue
-
- post.update(post["record"])
- del post["record"]
-
- if self._metadata_facets:
- if "facets" in post:
- post["hashtags"] = tags = []
- post["mentions"] = dids = []
- post["uris"] = uris = []
- for facet in post["facets"]:
- features = facet["features"][0]
- if "tag" in features:
- tags.append(features["tag"])
- elif "did" in features:
- dids.append(features["did"])
- elif "uri" in features:
- uris.append(features["uri"])
- else:
- post["hashtags"] = post["mentions"] = post["uris"] = ()
-
- if self._metadata_user:
- post["user"] = self._user or post["author"]
-
- files = self._extract_files(post)
- post["instance"] = self.instance
- post["post_id"] = pid
- post["count"] = len(files)
- post["date"] = text.parse_datetime(
- post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
-
- yield Message.Directory, post
-
- if not files:
+ self.log.debug("Skipping %s (repost)", self._pid(post))
continue
-
- base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
- "?did={}&cid=".format(post["author"]["did"]))
- for post["num"], file in enumerate(files, 1):
- post.update(file)
- yield Message.Url, base + file["filename"], post
+ embed = post.get("embed")
+ post.update(post.pop("record"))
+
+ while True:
+ self._prepare(post)
+ files = self._extract_files(post)
+
+ yield Message.Directory, post
+ if files:
+ base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
+ "?did={}&cid=".format(post["author"]["did"]))
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ yield Message.Url, base + file["filename"], post
+
+ if not self.quoted or not embed or "record" not in embed:
+ break
+
+ quote = embed["record"]
+ if "record" in quote:
+ quote = quote["record"]
+ quote["quote_id"] = self._pid(post)
+ quote["quote_by"] = post["author"]
+ embed = quote.get("embed")
+ quote.update(quote.pop("value"))
+ post = quote
def posts(self):
return ()
+ def _pid(self, post):
+ return post["uri"].rpartition("/")[2]
+
+ def _prepare(self, post):
+ if self._metadata_facets:
+ if "facets" in post:
+ post["hashtags"] = tags = []
+ post["mentions"] = dids = []
+ post["uris"] = uris = []
+ for facet in post["facets"]:
+ features = facet["features"][0]
+ if "tag" in features:
+ tags.append(features["tag"])
+ elif "did" in features:
+ dids.append(features["did"])
+ elif "uri" in features:
+ uris.append(features["uri"])
+ else:
+ post["hashtags"] = post["mentions"] = post["uris"] = ()
+
+ if self._metadata_user:
+ post["user"] = self._user or post["author"]
+
+ post["instance"] = self.instance
+ post["post_id"] = self._pid(post)
+ post["date"] = text.parse_datetime(
+ post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+
def _extract_files(self, post):
if "embed" not in post:
+ post["count"] = 0
return ()
files = []
@@ -111,6 +125,7 @@ class BlueskyExtractor(Extractor):
if "video" in media and self.videos:
files.append(self._extract_media(media, "video"))
+ post["count"] = len(files)
return files
def _extract_media(self, media, key):