diff options
| author | 2020-01-09 22:31:29 -0500 | |
|---|---|---|
| committer | 2020-01-09 22:31:29 -0500 | |
| commit | bc435e826dbe37969d9cbe280f58810d054932cc (patch) | |
| tree | 4c12d1bd5d5fdaa4f6a65c38c85dd8f0ecdadc6b /gallery_dl/extractor/twitter.py | |
| parent | f9a1a9dcb7df977eeac9544786df9c0b93795815 (diff) | |
New upstream version 1.12.2upstream/1.12.2
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 58 |
1 files changed, 46 insertions, 12 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 8ef966f..610e0ee 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -53,10 +53,12 @@ class TwitterExtractor(Extractor): if self.videos == "ytdl": data["extension"] = None - url = "ytdl:{}/{}/status/{}".format( - self.root, data["user"], data["tweet_id"]) + url = "ytdl:{}/i/web/status/{}".format( + self.root, data["tweet_id"]) else: url = self._video_from_tweet(data["tweet_id"]) + if not url: + continue ext = text.ext_from_url(url) if ext == "m3u8": url = "ytdl:" + url @@ -155,6 +157,16 @@ class TwitterExtractor(Extractor): cl, _, cr = content.rpartition("pic.twitter.com/") data["content"] = cl if cl and len(cr) < 16 else content + if extr('<div class="QuoteTweet', '>'): + data["retweet_id"] = text.parse_int(extr('data-item-id="', '"')) + data["retweeter"] = data["user"]["name"] + data["author"] = { + "name" : extr('data-screen-name="', '"'), + "id" : text.parse_int(extr('data-user-id="' , '"')), + "nick" : text.unescape(extr( + 'QuoteTweet-fullname', '<').partition('>')[2]), + } + return data def _video_from_tweet(self, tweet_id): @@ -173,19 +185,28 @@ class TwitterExtractor(Extractor): if self.logged_in: headers["x-twitter-auth-type"] = "OAuth2Session" else: - token = self._guest_token(headers) + token = _guest_token(self, headers) cookies = {"gt": token} headers["x-guest-token"] = token - data = self.request(url, cookies=cookies, headers=headers).json() - return data["track"]["playbackUrl"] + response = self.request( + url, cookies=cookies, headers=headers, fatal=None) + + if response.status_code == 429 or \ + response.headers.get("x-rate-limit-remaining") == "0": + if self.logged_in: + reset = response.headers.get("x-rate-limit-reset") + self.wait(until=reset, reason="rate limit reset") + else: + _guest_token.invalidate() + return self._video_from_tweet(tweet_id) - @memcache() - def _guest_token(self, headers): - return self.request( - "https://api.twitter.com/1.1/guest/activate.json", - method="POST", headers=headers, - ).json().get("guest_token") + elif response.status_code >= 400: + self.log.warning("Unable to fetch video data for %s ('%s %s')", + tweet_id, response.status_code, response.reason) + return None + + return response.json()["track"]["playbackUrl"] def _tweets_from_api(self, url, max_position=None): params = { @@ -313,12 +334,17 @@ class TwitterTweetExtractor(TwitterExtractor): # Reply to another tweet (#403) ("https://twitter.com/tyson_hesse/status/1103767554424598528", { "options": (("videos", "ytdl"),), - "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528", + "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528", }), # /i/web/ URL ("https://twitter.com/i/web/status/1155074198240292865", { "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", }), + # quoted tweet (#526) + ("https://twitter.com/Meiyu_miu/status/1070693241413021696", { + "count": 4, + "keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8", + }), ) def __init__(self, match): @@ -342,3 +368,11 @@ class TwitterTweetExtractor(TwitterExtractor): end = page.index('class="js-tweet-stats-container') beg = page.rindex('<div class="tweet ', 0, end) return (page[beg:end],) + + +@memcache() +def _guest_token(extr, headers): + return extr.request( + "https://api.twitter.com/1.1/guest/activate.json", + method="POST", headers=headers, + ).json().get("guest_token") |
