New upstream version 1.27.0.upstream/1.27.0

author: Unit 193 <unit193@unit193.net> 2024-06-06 02:40:15 -0400
committer: Unit 193 <unit193@unit193.net> 2024-06-06 02:40:15 -0400
commit: 1c28712d865e30ed752988ba0b6944882250b665 (patch)
tree: e5d5083a418f5c19616cb940c090c2dfb646d3cb /gallery_dl/extractor/reddit.py
parent: 6e662211019a89caec44de8a57c675872b0b5498 (diff)
1 files changed, 41 insertions, 8 deletions
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index e099c7e..ce602f6 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -74,8 +74,8 @@ class RedditExtractor(Extractor):
                         yield Message.Url, url, submission
 
                     elif "gallery_data" in media:
-                        for submission["num"], url in enumerate(
-                                self._extract_gallery(media), 1):
+                        for url in self._extract_gallery(media):
+                            submission["num"] += 1
                             text.nameext_from_url(url, submission)
                             yield Message.Url, url, submission
 
@@ -99,7 +99,10 @@ class RedditExtractor(Extractor):
                             urls.append((url, submission))
                     for comment in comments:
                         html = comment["body_html"] or ""
-                        if ' href="' in html:
+                        href = (' href="' in html)
+                        media = ("media_metadata" in comment)
+
+                        if media or href:
                             comment["date"] = text.parse_timestamp(
                                 comment["created_utc"])
                             if submission:
@@ -107,6 +110,14 @@ class RedditExtractor(Extractor):
                                 data["comment"] = comment
                             else:
                                 data = comment
+
+                        if media:
+                            for embed in self._extract_embed(comment):
+                                submission["num"] += 1
+                                text.nameext_from_url(embed, submission)
+                                yield Message.Url, embed, submission
+
+                        if href:
                             for url in text.extract_iter(html, ' href="', '"'):
                                 urls.append((url, data))
 
@@ -118,6 +129,7 @@ class RedditExtractor(Extractor):
                     if url.startswith((
                         "https://www.reddit.com/message/compose",
                         "https://reddit.com/message/compose",
+                        "https://preview.redd.it/",
                     )):
                         continue
 
@@ -172,6 +184,27 @@ class RedditExtractor(Extractor):
                     submission["id"], item["media_id"])
                 self.log.debug(src)
 
+    def _extract_embed(self, submission):
+        meta = submission["media_metadata"]
+        if not meta:
+            return
+
+        for mid, data in meta.items():
+            if data["status"] != "valid" or "s" not in data:
+                self.log.warning(
+                    "embed %s: skipping item %s (status: %s)",
+                    submission["id"], mid, data.get("status"))
+                continue
+            src = data["s"]
+            url = src.get("u") or src.get("gif") or src.get("mp4")
+            if url:
+                yield url.partition("?")[0].replace("/preview.", "/i.", 1)
+            else:
+                self.log.error(
+                    "embed %s: unable to fetch download URL for item %s",
+                    submission["id"], mid)
+                self.log.debug(src)
+
     def _extract_video_ytdl(self, submission):
         return "https://www.reddit.com" + submission["permalink"]
 
@@ -454,14 +487,14 @@ class RedditAPI():
 
             remaining = response.headers.get("x-ratelimit-remaining")
             if remaining and float(remaining) < 2:
-                if self._warn_429:
-                    self._warn_429 = False
+                self.log.warning("API rate limit exceeded")
+                if self._warn_429 and self.client_id == self.CLIENT_ID:
                     self.log.info(
                         "Register your own OAuth application and use its "
                         "credentials to prevent this error: "
-                        "https://github.com/mikf/gallery-dl/blob/master"
-                        "/docs/configuration.rst"
-                        "#extractorredditclient-id--user-agent")
+                        "https://gdl-org.github.io/docs/configuration.html"
+                        "#extractor-reddit-client-id-user-agent")
+                self._warn_429 = False
                 self.extractor.wait(
                     seconds=response.headers["x-ratelimit-reset"])
                 continue
author	Unit 193 <unit193@unit193.net>	2024-06-06 02:40:15 -0400
committer	Unit 193 <unit193@unit193.net>	2024-06-06 02:40:15 -0400
commit	1c28712d865e30ed752988ba0b6944882250b665 (patch)
tree	e5d5083a418f5c19616cb940c090c2dfb646d3cb /gallery_dl/extractor/reddit.py
parent	6e662211019a89caec44de8a57c675872b0b5498 (diff)