summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/reddit.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-06-06 02:40:15 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-06-06 02:40:15 -0400
commit1c28712d865e30ed752988ba0b6944882250b665 (patch)
treee5d5083a418f5c19616cb940c090c2dfb646d3cb /gallery_dl/extractor/reddit.py
parent6e662211019a89caec44de8a57c675872b0b5498 (diff)
New upstream version 1.27.0.upstream/1.27.0
Diffstat (limited to 'gallery_dl/extractor/reddit.py')
-rw-r--r--gallery_dl/extractor/reddit.py49
1 files changed, 41 insertions, 8 deletions
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index e099c7e..ce602f6 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -74,8 +74,8 @@ class RedditExtractor(Extractor):
yield Message.Url, url, submission
elif "gallery_data" in media:
- for submission["num"], url in enumerate(
- self._extract_gallery(media), 1):
+ for url in self._extract_gallery(media):
+ submission["num"] += 1
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
@@ -99,7 +99,10 @@ class RedditExtractor(Extractor):
urls.append((url, submission))
for comment in comments:
html = comment["body_html"] or ""
- if ' href="' in html:
+ href = (' href="' in html)
+ media = ("media_metadata" in comment)
+
+ if media or href:
comment["date"] = text.parse_timestamp(
comment["created_utc"])
if submission:
@@ -107,6 +110,14 @@ class RedditExtractor(Extractor):
data["comment"] = comment
else:
data = comment
+
+ if media:
+ for embed in self._extract_embed(comment):
+ submission["num"] += 1
+ text.nameext_from_url(embed, submission)
+ yield Message.Url, embed, submission
+
+ if href:
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, data))
@@ -118,6 +129,7 @@ class RedditExtractor(Extractor):
if url.startswith((
"https://www.reddit.com/message/compose",
"https://reddit.com/message/compose",
+ "https://preview.redd.it/",
)):
continue
@@ -172,6 +184,27 @@ class RedditExtractor(Extractor):
submission["id"], item["media_id"])
self.log.debug(src)
+ def _extract_embed(self, submission):
+ meta = submission["media_metadata"]
+ if not meta:
+ return
+
+ for mid, data in meta.items():
+ if data["status"] != "valid" or "s" not in data:
+ self.log.warning(
+ "embed %s: skipping item %s (status: %s)",
+ submission["id"], mid, data.get("status"))
+ continue
+ src = data["s"]
+ url = src.get("u") or src.get("gif") or src.get("mp4")
+ if url:
+ yield url.partition("?")[0].replace("/preview.", "/i.", 1)
+ else:
+ self.log.error(
+ "embed %s: unable to fetch download URL for item %s",
+ submission["id"], mid)
+ self.log.debug(src)
+
def _extract_video_ytdl(self, submission):
return "https://www.reddit.com" + submission["permalink"]
@@ -454,14 +487,14 @@ class RedditAPI():
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
- if self._warn_429:
- self._warn_429 = False
+ self.log.warning("API rate limit exceeded")
+ if self._warn_429 and self.client_id == self.CLIENT_ID:
self.log.info(
"Register your own OAuth application and use its "
"credentials to prevent this error: "
- "https://github.com/mikf/gallery-dl/blob/master"
- "/docs/configuration.rst"
- "#extractorredditclient-id--user-agent")
+ "https://gdl-org.github.io/docs/configuration.html"
+ "#extractor-reddit-client-id-user-agent")
+ self._warn_429 = False
self.extractor.wait(
seconds=response.headers["x-ratelimit-reset"])
continue