diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 127 |
1 files changed, 73 insertions, 54 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index c2d8247..17a2202 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -26,6 +26,7 @@ class TwitterExtractor(Extractor): cookiedomain = ".twitter.com" cookienames = ("auth_token",) root = "https://twitter.com" + browser = "firefox" def __init__(self, match): Extractor.__init__(self, match) @@ -945,16 +946,31 @@ class TwitterAPI(): def __init__(self, extractor): self.extractor = extractor - self.root = "https://twitter.com/i/api" + self.root = "https://api.twitter.com" + cookies = extractor.session.cookies + cookiedomain = extractor.cookiedomain + + csrf = extractor.config("csrf") + if csrf is None or csrf == "cookies": + csrf_token = cookies.get("ct0", domain=cookiedomain) + else: + csrf_token = None + if not csrf_token: + csrf_token = util.generate_token() + cookies.set("ct0", csrf_token, domain=cookiedomain) + + auth_token = cookies.get("auth_token", domain=cookiedomain) + self.headers = { "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR" "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu" "4FA33AGWWjCpTnA", "x-guest-token": None, - "x-twitter-auth-type": None, + "x-twitter-auth-type": "OAuth2Session" if auth_token else None, "x-twitter-client-language": "en", "x-twitter-active-user": "yes", - "x-csrf-token": None, + "x-csrf-token": csrf_token, + "Origin": "https://twitter.com", "Referer": "https://twitter.com/", } self.params = { @@ -967,24 +983,36 @@ class TwitterAPI(): "include_can_dm": "1", "include_can_media_tag": "1", "include_ext_has_nft_avatar": "1", + "include_ext_is_blue_verified": "1", + "include_ext_verified_type": "1", "skip_status": "1", "cards_platform": "Web-12", "include_cards": "1", "include_ext_alt_text": "true", + "include_ext_limited_action_results": "false", "include_quote_count": "true", "include_reply_count": "1", "tweet_mode": "extended", + "include_ext_collab_control": "true", + "include_ext_views": "true", "include_entities": "true", "include_user_entities": "true", "include_ext_media_color": "true", "include_ext_media_availability": "true", "include_ext_sensitive_media_warning": "true", + "include_ext_trusted_friends_metadata": "true", "send_error_codes": "true", "simple_quoted_tweet": "true", + "q": None, "count": "100", + "query_source": None, "cursor": None, - "ext": "mediaStats,highlightedLabel,hasNftAvatar," - "voiceInfo,superFollowMetadata", + "pc": None, + "spelling_corrections": None, + "include_ext_edit_control": "true", + "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo," + "enrichments,superFollowMetadata,unmentionInfo,editControl," + "collab_control,vibe", } self.variables = { "includePromotedContent": False, @@ -1006,28 +1034,6 @@ class TwitterAPI(): self._syndication = self.extractor.syndication self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode - cookies = extractor.session.cookies - cookiedomain = extractor.cookiedomain - - csrf = extractor.config("csrf") - if csrf is None or csrf == "cookies": - csrf_token = cookies.get("ct0", domain=cookiedomain) - else: - csrf_token = None - if not csrf_token: - csrf_token = util.generate_token() - cookies.set("ct0", csrf_token, domain=cookiedomain) - self.headers["x-csrf-token"] = csrf_token - - if cookies.get("auth_token", domain=cookiedomain): - # logged in - self.headers["x-twitter-auth-type"] = "OAuth2Session" - else: - # guest - guest_token = self._guest_token() - cookies.set("gt", guest_token, domain=cookiedomain) - self.headers["x-guest-token"] = guest_token - def tweet_detail(self, tweet_id): endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail" variables = { @@ -1183,17 +1189,26 @@ class TwitterAPI(): @cache(maxage=3600) def _guest_token(self): - root = "https://api.twitter.com" endpoint = "/1.1/guest/activate.json" - return str(self._call(endpoint, None, root, "POST")["guest_token"]) + self.extractor.log.info("Requesting guest token") + return str(self._call(endpoint, None, "POST", False)["guest_token"]) + + def _authenticate_guest(self): + guest_token = self._guest_token() + if guest_token != self.headers["x-guest-token"]: + self.headers["x-guest-token"] = guest_token + self.extractor.session.cookies.set( + "gt", guest_token, domain=self.extractor.cookiedomain) - def _call(self, endpoint, params, root=None, method="GET"): - if root is None: - root = self.root + def _call(self, endpoint, params, method="GET", auth=True): + url = self.root + endpoint while True: + if not self.headers["x-twitter-auth-type"] and auth: + self._authenticate_guest() + response = self.extractor.request( - root + endpoint, method=method, params=params, + url, method=method, params=params, headers=self.headers, fatal=None) # update 'x-csrf-token' header (#1170) @@ -1226,21 +1241,33 @@ class TwitterAPI(): def _pagination_legacy(self, endpoint, params): original_retweets = (self.extractor.retweets == "original") + bottom = ("cursor-bottom-", "sq-cursor-bottom") while True: data = self._call(endpoint, params) - instr = data["timeline"]["instructions"] - if not instr: + instructions = data["timeline"]["instructions"] + if not instructions: return tweets = data["globalObjects"]["tweets"] users = data["globalObjects"]["users"] tweet_id = cursor = None tweet_ids = [] + entries = () + + # process instructions + for instr in instructions: + if "addEntries" in instr: + entries = instr["addEntries"]["entries"] + elif "replaceEntry" in instr: + entry = instr["replaceEntry"]["entry"] + if entry["entryId"].startswith(bottom): + cursor = (entry["content"]["operation"] + ["cursor"]["value"]) # collect tweet IDs and cursor value - for entry in instr[0]["addEntries"]["entries"]: + for entry in entries: entry_startswith = entry["entryId"].startswith if entry_startswith(("tweet-", "sq-I-t-")): @@ -1252,7 +1279,7 @@ class TwitterAPI(): entry["content"]["timelineModule"]["metadata"] ["conversationMetadata"]["allTweetIds"][::-1]) - elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")): + elif entry_startswith(bottom): cursor = entry["content"]["operation"]["cursor"] if not cursor.get("stopOnEmptyResponse", True): # keep going even if there are no tweets @@ -1300,11 +1327,7 @@ class TwitterAPI(): quoted["quoted_by_id_str"] = tweet["id_str"] yield quoted - # update cursor value - if "replaceEntry" in instr[-1] : - cursor = (instr[-1]["replaceEntry"]["entry"] - ["content"]["operation"]["cursor"]["value"]) - + # stop on empty response if not cursor or (not tweets and not tweet_id): return params["cursor"] = cursor @@ -1346,12 +1369,8 @@ class TwitterAPI(): if user.get("blocked_by"): if self.headers["x-twitter-auth-type"] and \ extr.config("logout"): - guest_token = self._guest_token() - extr.session.cookies.set( - "gt", guest_token, domain=extr.cookiedomain) extr._cookiefile = None del extr.session.cookies["auth_token"] - self.headers["x-guest-token"] = guest_token self.headers["x-twitter-auth-type"] = None extr.log.info("Retrying API request as guest") continue @@ -1578,8 +1597,6 @@ def _login_impl(extr, username, password): "Login with email is no longer possible. " "You need to provide your username or phone number instead.") - extr.log.info("Logging in as %s", username) - def process(response): try: data = response.json() @@ -1598,8 +1615,10 @@ def _login_impl(extr, username, password): extr.session.cookies.clear() api = TwitterAPI(extr) + api._authenticate_guest() headers = api.headers - headers["Referer"] = "https://twitter.com/i/flow/login" + + extr.log.info("Logging in as %s", username) # init data = { @@ -1653,7 +1672,7 @@ def _login_impl(extr, username, password): "web_modal": 1, }, } - url = "https://twitter.com/i/api/1.1/onboarding/task.json?flow_name=login" + url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login" response = extr.request(url, method="POST", headers=headers, json=data) data = { @@ -1668,7 +1687,7 @@ def _login_impl(extr, username, password): }, ], } - url = "https://twitter.com/i/api/1.1/onboarding/task.json" + url = "https://api.twitter.com/1.1/onboarding/task.json" response = extr.request( url, method="POST", headers=headers, json=data, fatal=None) @@ -1692,7 +1711,7 @@ def _login_impl(extr, username, password): }, ], } - # url = "https://twitter.com/i/api/1.1/onboarding/task.json" + # url = "https://api.twitter.com/1.1/onboarding/task.json" extr.sleep(random.uniform(2.0, 4.0), "login (username)") response = extr.request( url, method="POST", headers=headers, json=data, fatal=None) @@ -1710,7 +1729,7 @@ def _login_impl(extr, username, password): }, ], } - # url = "https://twitter.com/i/api/1.1/onboarding/task.json" + # url = "https://api.twitter.com/1.1/onboarding/task.json" extr.sleep(random.uniform(2.0, 4.0), "login (password)") response = extr.request( url, method="POST", headers=headers, json=data, fatal=None) @@ -1727,7 +1746,7 @@ def _login_impl(extr, username, password): }, ], } - # url = "https://twitter.com/i/api/1.1/onboarding/task.json" + # url = "https://api.twitter.com/1.1/onboarding/task.json" response = extr.request( url, method="POST", headers=headers, json=data, fatal=None) process(response) |
