diff options
Diffstat (limited to 'gallery_dl/extractor/reddit.py')
| -rw-r--r-- | gallery_dl/extractor/reddit.py | 163 |
1 files changed, 50 insertions, 113 deletions
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 54b162b..8553312 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -19,7 +19,7 @@ class RedditExtractor(Extractor): directory_fmt = ("{category}", "{subreddit}") filename_fmt = "{id}{num:? //>02} {title[:220]}.{extension}" archive_fmt = "{filename}" - cookiedomain = ".reddit.com" + cookies_domain = ".reddit.com" request_interval = 0.6 def items(self): @@ -30,6 +30,7 @@ class RedditExtractor(Extractor): parentdir = self.config("parent-directory") max_depth = self.config("recursion", 0) + previews = self.config("previews", True) videos = self.config("videos", True) if videos: @@ -65,7 +66,10 @@ class RedditExtractor(Extractor): media = submission url = media["url"] - if url and url.startswith("https://i.redd.it/"): + if url and url.startswith(( + "https://i.redd.it/", + "https://preview.redd.it/", + )): text.nameext_from_url(url, submission) yield Message.Url, url, submission @@ -94,21 +98,38 @@ class RedditExtractor(Extractor): ' href="', '"'): urls.append((url, submission)) for comment in comments: - for url in text.extract_iter( - comment["body_html"] or "", ' href="', '"'): - urls.append((url, comment)) + html = comment["body_html"] or "" + if ' href="' in html: + comment["date"] = text.parse_timestamp( + comment["created_utc"]) + if submission: + data = submission.copy() + data["comment"] = comment + else: + data = comment + for url in text.extract_iter(html, ' href="', '"'): + urls.append((url, data)) for url, data in urls: if not url or url[0] == "#": continue if url[0] == "/": url = "https://www.reddit.com" + url + if url.startswith(( + "https://www.reddit.com/message/compose", + "https://reddit.com/message/compose", + )): + continue match = match_submission(url) if match: extra.append(match.group(1)) elif not match_user(url) and not match_subreddit(url): + if previews and "preview" in data: + data["_fallback"] = self._previews(data) yield Message.Queue, text.unescape(url), data + if "_fallback" in data: + del data["_fallback"] if not extra or depth == max_depth: return @@ -165,22 +186,30 @@ class RedditExtractor(Extractor): submission["_ytdl_extra"] = {"title": submission["title"]} return submission["url"] + def _previews(self, post): + try: + if "reddit_video_preview" in post["preview"]: + video = post["preview"]["reddit_video_preview"] + if "dash_url" in video: + yield "ytdl:" + video["dash_url"] + if "hls_url" in video: + yield "ytdl:" + video["hls_url"] + except Exception as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) + + try: + for image in post["preview"]["images"]: + yield image["source"]["url"] + except Exception as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) + class RedditSubredditExtractor(RedditExtractor): """Extractor for URLs from subreddits on reddit.com""" subcategory = "subreddit" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com" r"(/r/[^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)") - test = ( - ("https://www.reddit.com/r/lavaporn/", { - "range": "1-20", - "count": ">= 20", - }), - ("https://www.reddit.com/r/lavaporn/top/?sort=top&t=month"), - ("https://old.reddit.com/r/lavaporn/"), - ("https://np.reddit.com/r/lavaporn/"), - ("https://m.reddit.com/r/lavaporn/"), - ) + example = "https://www.reddit.com/r/SUBREDDIT/" def __init__(self, match): self.subreddit, sub, params = match.groups() @@ -198,13 +227,7 @@ class RedditHomeExtractor(RedditSubredditExtractor): subcategory = "home" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com" r"((?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)") - test = ( - ("https://www.reddit.com/", { - "range": "1-20", - "count": ">= 20", - }), - ("https://old.reddit.com/top/?sort=top&t=month"), - ) + example = "https://www.reddit.com/" class RedditUserExtractor(RedditExtractor): @@ -212,14 +235,7 @@ class RedditUserExtractor(RedditExtractor): subcategory = "user" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/" r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?$") - test = ( - ("https://www.reddit.com/user/username/", { - "count": ">= 2", - }), - ("https://www.reddit.com/user/username/gilded/?sort=top&t=month"), - ("https://old.reddit.com/user/username/"), - ("https://www.reddit.com/u/username/"), - ) + example = "https://www.reddit.com/user/USER/" def __init__(self, match): self.user, sub, params = match.groups() @@ -238,71 +254,7 @@ class RedditSubmissionExtractor(RedditExtractor): pattern = (r"(?:https?://)?(?:" r"(?:\w+\.)?reddit\.com/(?:(?:r|u|user)/[^/?#]+" r"/comments|gallery)|redd\.it)/([a-z0-9]+)") - test = ( - ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { - "pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg", - "count": 1, - }), - ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { - "options": (("comments", 500),), - "pattern": r"https://", - "count": 3, - }), - ("https://www.reddit.com/gallery/hrrh23", { - "url": "25b91ede15459470274dd17291424b037ed8b0ae", - "content": "1e7dde4ee7d5f4c4b45749abfd15b2dbfa27df3f", - "count": 3, - }), - # video - ("https://www.reddit.com/r/aww/comments/90bu6w/", { - "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11", - "count": 1, - }), - # video (ytdl) - ("https://www.reddit.com/r/aww/comments/90bu6w/", { - "options": (("videos", "ytdl"),), - "pattern": r"ytdl:https://www.reddit.com/r/aww/comments/90bu6w" - r"/heat_index_was_110_degrees_so_we_offered_him_a/", - "count": 1, - }), - # video (dash) - ("https://www.reddit.com/r/aww/comments/90bu6w/", { - "options": (("videos", "dash"),), - "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11" - r"/DASHPlaylist.mpd\?a=", - "count": 1, - }), - # deleted gallery (#953) - ("https://www.reddit.com/gallery/icfgzv", { - "count": 0, - }), - # animated gallery items (#955) - ("https://www.reddit.com/r/araragi/comments/ib32hm", { - "pattern": r"https://i\.redd\.it/\w+\.gif", - "count": 2, - }), - # "failed" gallery item (#1127) - ("https://www.reddit.com/r/cosplay/comments/jvwaqr", { - "count": 1, - }), - # gallery with no 'media_metadata' (#2001) - ("https://www.reddit.com/r/kpopfap/comments/qjj04q/", { - "count": 0, - }), - # user page submission (#2301) - ("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", { - "pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg", - "count": 1, - }), - # cross-posted video (#887, #3586, #3976) - ("https://www.reddit.com/r/kittengifs/comments/12m0b8d", { - "pattern": r"ytdl:https://v\.redd\.it/cvabpjacrvta1", - }), - ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://redd.it/2a00np/"), - ) + example = "https://www.reddit.com/r/SUBREDDIT/comments/id/" def __init__(self, match): RedditExtractor.__init__(self, match) @@ -319,22 +271,7 @@ class RedditImageExtractor(Extractor): archive_fmt = "{filename}" pattern = (r"(?:https?://)?((?:i|preview)\.redd\.it|i\.reddituploads\.com)" r"/([^/?#]+)(\?[^#]*)?") - test = ( - ("https://i.redd.it/upjtjcx2npzz.jpg", { - "url": "0de614900feef103e580b632190458c0b62b641a", - "content": "cc9a68cf286708d5ce23c68e79cd9cf7826db6a3", - }), - (("https://i.reddituploads.com/0f44f1b1fca2461f957c713d9592617d" - "?fit=max&h=1536&w=1536&s=e96ce7846b3c8e1f921d2ce2671fb5e2"), { - "url": "f24f25efcedaddeec802e46c60d77ef975dc52a5", - "content": "541dbcc3ad77aa01ee21ca49843c5e382371fae7", - }), - # preview.redd.it -> i.redd.it - (("https://preview.redd.it/00af44lpn0u51.jpg?width=960&crop=smart" - "&auto=webp&v=enabled&s=dbca8ab84033f4a433772d9c15dbe0429c74e8ac"), { - "pattern": r"^https://i\.redd\.it/00af44lpn0u51\.jpg$" - }), - ) + example = "https://i.redd.it/NAME.EXT" def __init__(self, match): Extractor.__init__(self, match) @@ -399,9 +336,9 @@ class RedditAPI(): if not self.refresh_token: # allow downloading from quarantined subreddits (#2180) - extractor._cookiejar.set( + extractor.cookies.set( "_options", '%7B%22pref_quarantine_optin%22%3A%20true%7D', - domain=extractor.cookiedomain) + domain=extractor.cookies_domain) def submission(self, submission_id): """Fetch the (submission, comments)=-tuple for a submission id""" |
