diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 149 |
1 files changed, 88 insertions, 61 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 7cabb8c..1e985e3 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -31,6 +31,7 @@ class TwitterExtractor(Extractor): self.retweets = self.config("retweets", True) self.replies = self.config("replies", True) self.twitpic = self.config("twitpic", False) + self.quoted = self.config("quoted", True) self.videos = self.config("videos", True) self._user_cache = {} @@ -41,8 +42,9 @@ class TwitterExtractor(Extractor): for tweet in self.tweets(): - if not self.retweets and "retweeted_status_id_str" in tweet or \ - not self.replies and "in_reply_to_user_id_str" in tweet: + if (not self.retweets and "retweeted_status_id_str" in tweet or + not self.replies and "in_reply_to_user_id_str" in tweet or + not self.quoted and "quoted" in tweet): continue if self.twitpic: @@ -60,7 +62,7 @@ class TwitterExtractor(Extractor): tdata["width"] = media["original_info"].get("width", 0) tdata["height"] = media["original_info"].get("height", 0) - if "video_info" in media and self.videos: + if "video_info" in media: if self.videos == "ytdl": url = "ytdl:{}/i/web/status/{}".format( @@ -68,7 +70,7 @@ class TwitterExtractor(Extractor): tdata["extension"] = None yield Message.Url, url, tdata - else: + elif self.videos: video_info = media["video_info"] variant = max( video_info["variants"], @@ -149,11 +151,10 @@ class TwitterExtractor(Extractor): if "in_reply_to_screen_name" in tweet: tdata["reply_to"] = tweet["in_reply_to_screen_name"] - if "full_text_quoted" in tweet: - tdata["content_quoted"] = tweet["full_text_quoted"] - if "author" in tweet: tdata["author"] = self._transform_user(tweet["author"]) + else: + tdata["author"] = tdata["user"] return tdata @@ -264,6 +265,27 @@ class TwitterMediaExtractor(TwitterExtractor): return TwitterAPI(self).timeline_media(self.user) +class TwitterLikesExtractor(TwitterExtractor): + """Extractor for liked tweets""" + subcategory = "likes" + pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" + r"/(?!search)([^/?&#]+)/likes(?!\w)") + test = ("https://twitter.com/supernaturepics/likes",) + + def tweets(self): + return TwitterAPI(self).timeline_favorites(self.user) + + +class TwitterBookmarkExtractor(TwitterExtractor): + """Extractor for bookmarked tweets""" + subcategory = "bookmark" + pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()" + test = ("https://twitter.com/i/bookmarks",) + + def tweets(self): + return TwitterAPI(self).timeline_bookmark() + + class TwitterSearchExtractor(TwitterExtractor): """Extractor for all images from a search timeline""" subcategory = "search" @@ -279,7 +301,7 @@ class TwitterSearchExtractor(TwitterExtractor): return {"search": text.unquote(self.user)} def tweets(self): - return TwitterAPI(self).search(self.user) + return TwitterAPI(self).search(text.unquote(self.user)) class TwitterTweetExtractor(TwitterExtractor): @@ -298,7 +320,6 @@ class TwitterTweetExtractor(TwitterExtractor): }), # video ("https://twitter.com/perrypumas/status/1065692031626829824", { - "options": (("videos", True),), "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5", }), # content with emoji, newlines, hashtags (#338) @@ -310,23 +331,25 @@ class TwitterTweetExtractor(TwitterExtractor): "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ " )}, }), - # Reply to another tweet (#403) - ("https://twitter.com/tyson_hesse/status/1103767554424598528", { - "options": (("videos", "ytdl"),), - "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528", + # Reply to deleted tweet (#403, #838) + ("https://twitter.com/i/web/status/1170041925560258560", { + "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_.jpg:orig", }), # 'replies' option (#705) - ("https://twitter.com/tyson_hesse/status/1103767554424598528", { + ("https://twitter.com/i/web/status/1170041925560258560", { "options": (("replies", False),), "count": 0, }), - # /i/web/ URL - ("https://twitter.com/i/web/status/1155074198240292865", { - "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", + # quoted tweet (#526, #854) + ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+\.jpg", + "count": 8, }), - # quoted tweet (#526) - ("https://twitter.com/Pistachio/status/1222690391817932803", { - "pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg", + # "quoted" option (#854) + ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "options": (("quoted", False),), + "pattern": r"https://pbs\.twimg\.com/media/EaK.+\.jpg", + "count": 4, }), # TwitPic embeds (#579) ("https://twitter.com/i/web/status/112900228289540096", { @@ -344,16 +367,6 @@ class TwitterTweetExtractor(TwitterExtractor): return TwitterAPI(self).tweet(self.tweet_id) -class TwitterBookmarkExtractor(TwitterExtractor): - """Extractor for bookmarked tweets""" - subcategory = "bookmark" - pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()" - test = ("https://twitter.com/i/bookmarks",) - - def tweets(self): - return TwitterAPI(self).bookmarks() - - class TwitterAPI(): def __init__(self, extractor): @@ -409,16 +422,21 @@ class TwitterAPI(): self.headers["x-twitter-auth-type"] = "OAuth2Session" else: # guest token - guest_token = _guest_token(self.extractor, self.headers) + guest_token = self._guest_token() self.headers["x-guest-token"] = guest_token cookies.set("gt", guest_token, domain=".twitter.com") def tweet(self, tweet_id): endpoint = "2/timeline/conversation/{}.json".format(tweet_id) + tweets = [] for tweet in self._pagination(endpoint): if tweet["id_str"] == tweet_id: - return (tweet,) - return () + tweets.append(tweet) + if "quoted_status_id_str" in tweet: + tweet_id = tweet["quoted_status_id_str"] + else: + break + return tweets def timeline_profile(self, screen_name): user = self.user_by_screen_name(screen_name) @@ -430,17 +448,26 @@ class TwitterAPI(): endpoint = "2/timeline/media/{}.json".format(user["rest_id"]) return self._pagination(endpoint) + def timeline_favorites(self, screen_name): + user = self.user_by_screen_name(screen_name) + endpoint = "2/timeline/favorites/{}.json".format(user["rest_id"]) + return self._pagination(endpoint) + + def timeline_bookmark(self): + endpoint = "2/timeline/bookmark.json" + return self._pagination(endpoint) + def search(self, query): endpoint = "2/search/adaptive.json" params = self.params.copy() - params["q"] = text.unquote(query) + params["q"] = query + params["tweet_search_mode"] = "live" + params["query_source"] = "typed_query" + params["pc"] = "1" + params["spelling_corrections"] = "1" return self._pagination( endpoint, params, "sq-I-t-", "sq-cursor-bottom") - def bookmarks(self): - endpoint = "2/timeline/bookmark.json" - return self._pagination(endpoint) - def user_by_screen_name(self, screen_name): endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName" params = { @@ -449,10 +476,16 @@ class TwitterAPI(): } return self._call(endpoint, params)["data"]["user"] - def _call(self, endpoint, params): + @cache(maxage=3600) + def _guest_token(self): + endpoint = "1.1/guest/activate.json" + return self._call(endpoint, None, "POST")["guest_token"] + + def _call(self, endpoint, params, method="GET"): url = "https://api.twitter.com/" + endpoint response = self.extractor.request( - url, params=params, headers=self.headers, fatal=None) + url, method=method, params=params, headers=self.headers, + fatal=None) if response.status_code < 400: return response.json() if response.status_code == 429: @@ -479,28 +512,30 @@ class TwitterAPI(): for entry in instr[0]["addEntries"]["entries"]: if entry["entryId"].startswith(entry_tweet): - tid = entry["content"]["item"]["content"]["tweet"]["id"] - if tid not in tweets: + try: + tweet = tweets[ + entry["content"]["item"]["content"]["tweet"]["id"]] + except KeyError: self.extractor.log.debug( - "Skipping unavailable Tweet %s", tid) + "Skipping unavailable Tweet %s", + entry["entryId"][6:]) continue - tweet = tweets[tid] tweet["user"] = users[tweet["user_id_str"]] - if "quoted_status_id_str" in tweet: - quoted = tweets.get(tweet["quoted_status_id_str"]) - if quoted: - tweet["full_text_quoted"] = quoted["full_text"] - if "extended_entities" in quoted: - tweet["extended_entities"] = \ - quoted["extended_entities"] - elif "retweeted_status_id_str" in tweet: + if "retweeted_status_id_str" in tweet: retweet = tweets.get(tweet["retweeted_status_id_str"]) if retweet: tweet["author"] = users[retweet["user_id_str"]] - yield tweet + if "quoted_status_id_str" in tweet: + quoted = tweets.get(tweet["quoted_status_id_str"]) + if quoted: + quoted["author"] = users[quoted["user_id_str"]] + quoted["user"] = tweet["user"] + quoted["quoted"] = True + yield quoted + elif entry["entryId"].startswith(entry_cursor): cursor = entry["content"]["operation"]["cursor"] if not cursor.get("stopOnEmptyResponse"): @@ -515,11 +550,3 @@ class TwitterAPI(): if not cursor or not tweet: return params["cursor"] = cursor - - -@cache(maxage=3600) -def _guest_token(extr, headers): - return extr.request( - "https://api.twitter.com/1.1/guest/activate.json", - method="POST", headers=headers, - ).json().get("guest_token") |
