diff options
Diffstat (limited to 'gallery_dl/extractor/reddit.py')
| -rw-r--r-- | gallery_dl/extractor/reddit.py | 56 |
1 files changed, 32 insertions, 24 deletions
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index ecce003..656148e 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -60,14 +60,16 @@ class RedditExtractor(Extractor): def _urls(self, submissions): for submission, comments in submissions: - self._visited.add(submission["id"]) - if not submission["is_self"]: - yield submission["url"], submission + if submission: + self._visited.add(submission["id"]) - for url in text.extract_iter( - submission["selftext_html"] or "", ' href="', '"'): - yield url, submission + if not submission["is_self"]: + yield submission["url"], submission + + for url in text.extract_iter( + submission["selftext_html"] or "", ' href="', '"'): + yield url, submission if comments: for comment in comments: @@ -130,15 +132,14 @@ class RedditSubmissionExtractor(RedditExtractor): r")/([a-z0-9]+)") test = ( ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { - "pattern": r"https://", - "count": 3, - }), - # ignore submission comments (#429) - ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { - "options": (("comments", 0),), "pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg", "count": 1, }), + ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { + "options": (("comments", 500),), + "pattern": r"https://", + "count": 3, + }), ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), @@ -186,7 +187,7 @@ class RedditAPI(): def __init__(self, extractor): self.extractor = extractor - self.comments = text.parse_int(extractor.config("comments", 500)) + self.comments = text.parse_int(extractor.config("comments", 0)) self.morecomments = extractor.config("morecomments", False) self.refresh_token = extractor.config("refresh-token") self.log = extractor.log @@ -298,17 +299,24 @@ class RedditAPI(): while True: data = self._call(endpoint, params)["data"] - for submission in data["children"]: - submission = submission["data"] - if (date_min <= submission["created_utc"] <= date_max and - id_min <= self._decode(submission["id"]) <= id_max): - if submission["num_comments"] and self.comments: - try: - yield self.submission(submission["id"]) - except exception.AuthorizationError: - pass - else: - yield submission, None + for child in data["children"]: + kind = child["kind"] + post = child["data"] + + if (date_min <= post["created_utc"] <= date_max and + id_min <= self._decode(post["id"]) <= id_max): + + if kind == "t3": + if post["num_comments"] and self.comments: + try: + yield self.submission(post["id"]) + except exception.AuthorizationError: + pass + else: + yield post, None + + elif kind == "t1" and self.comments: + yield None, (post,) if not data["after"]: return |
