summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py58
1 files changed, 46 insertions, 12 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 8ef966f..610e0ee 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -53,10 +53,12 @@ class TwitterExtractor(Extractor):
if self.videos == "ytdl":
data["extension"] = None
- url = "ytdl:{}/{}/status/{}".format(
- self.root, data["user"], data["tweet_id"])
+ url = "ytdl:{}/i/web/status/{}".format(
+ self.root, data["tweet_id"])
else:
url = self._video_from_tweet(data["tweet_id"])
+ if not url:
+ continue
ext = text.ext_from_url(url)
if ext == "m3u8":
url = "ytdl:" + url
@@ -155,6 +157,16 @@ class TwitterExtractor(Extractor):
cl, _, cr = content.rpartition("pic.twitter.com/")
data["content"] = cl if cl and len(cr) < 16 else content
+ if extr('<div class="QuoteTweet', '>'):
+ data["retweet_id"] = text.parse_int(extr('data-item-id="', '"'))
+ data["retweeter"] = data["user"]["name"]
+ data["author"] = {
+ "name" : extr('data-screen-name="', '"'),
+ "id" : text.parse_int(extr('data-user-id="' , '"')),
+ "nick" : text.unescape(extr(
+ 'QuoteTweet-fullname', '<').partition('>')[2]),
+ }
+
return data
def _video_from_tweet(self, tweet_id):
@@ -173,19 +185,28 @@ class TwitterExtractor(Extractor):
if self.logged_in:
headers["x-twitter-auth-type"] = "OAuth2Session"
else:
- token = self._guest_token(headers)
+ token = _guest_token(self, headers)
cookies = {"gt": token}
headers["x-guest-token"] = token
- data = self.request(url, cookies=cookies, headers=headers).json()
- return data["track"]["playbackUrl"]
+ response = self.request(
+ url, cookies=cookies, headers=headers, fatal=None)
+
+ if response.status_code == 429 or \
+ response.headers.get("x-rate-limit-remaining") == "0":
+ if self.logged_in:
+ reset = response.headers.get("x-rate-limit-reset")
+ self.wait(until=reset, reason="rate limit reset")
+ else:
+ _guest_token.invalidate()
+ return self._video_from_tweet(tweet_id)
- @memcache()
- def _guest_token(self, headers):
- return self.request(
- "https://api.twitter.com/1.1/guest/activate.json",
- method="POST", headers=headers,
- ).json().get("guest_token")
+ elif response.status_code >= 400:
+ self.log.warning("Unable to fetch video data for %s ('%s %s')",
+ tweet_id, response.status_code, response.reason)
+ return None
+
+ return response.json()["track"]["playbackUrl"]
def _tweets_from_api(self, url, max_position=None):
params = {
@@ -313,12 +334,17 @@ class TwitterTweetExtractor(TwitterExtractor):
# Reply to another tweet (#403)
("https://twitter.com/tyson_hesse/status/1103767554424598528", {
"options": (("videos", "ytdl"),),
- "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528",
+ "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
}),
# /i/web/ URL
("https://twitter.com/i/web/status/1155074198240292865", {
"pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
}),
+ # quoted tweet (#526)
+ ("https://twitter.com/Meiyu_miu/status/1070693241413021696", {
+ "count": 4,
+ "keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8",
+ }),
)
def __init__(self, match):
@@ -342,3 +368,11 @@ class TwitterTweetExtractor(TwitterExtractor):
end = page.index('class="js-tweet-stats-container')
beg = page.rindex('<div class="tweet ', 0, end)
return (page[beg:end],)
+
+
+@memcache()
+def _guest_token(extr, headers):
+ return extr.request(
+ "https://api.twitter.com/1.1/guest/activate.json",
+ method="POST", headers=headers,
+ ).json().get("guest_token")