diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 167 |
1 files changed, 93 insertions, 74 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index a77ea06..b769912 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -239,30 +239,29 @@ class TwitterExtractor(Extractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - url = "https://mobile.twitter.com/i/nojs_router" - params = {"path": "/login"} - headers = {"Referer": self.root + "/", "Origin": self.root} - page = self.request( - url, method="POST", params=params, headers=headers, data={}).text + token = util.generate_csrf_token() + self.session.cookies.clear() + self.request(self.root + "/login") - pos = page.index('name="authenticity_token"') - token = text.extract(page, 'value="', '"', pos)[0] - - url = "https://mobile.twitter.com/sessions" + url = self.root + "/sessions" + cookies = { + "_mb_tk": token, + } data = { + "redirect_after_login" : "/", + "remember_me" : "1", "authenticity_token" : token, + "wfa" : "1", + "ui_metrics" : "{}", "session[username_or_email]": username, "session[password]" : password, - "remember_me" : "1", - "wfa" : "1", - "commit" : "+Log+in+", - "ui_metrics" : "", } - response = self.request(url, method="POST", data=data) + response = self.request( + url, method="POST", cookies=cookies, data=data) + cookies = { cookie.name: cookie.value for cookie in self.session.cookies - if cookie.domain == self.cookiedomain } if "/error" in response.url or "auth_token" not in cookies: @@ -464,15 +463,17 @@ class TwitterAPI(): def __init__(self, extractor): self.extractor = extractor + + self.root = "https://twitter.com/i/api" self.headers = { "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR" "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu" "4FA33AGWWjCpTnA", "x-guest-token": None, + "x-twitter-auth-type": None, "x-twitter-client-language": "en", "x-twitter-active-user": "yes", "x-csrf-token": None, - "Origin": "https://twitter.com", "Referer": "https://twitter.com/", } self.params = { @@ -487,8 +488,8 @@ class TwitterAPI(): "skip_status": "1", "cards_platform": "Web-12", "include_cards": "1", - "include_composer_source": "true", "include_ext_alt_text": "true", + "include_quote_count": "true", "include_reply_count": "1", "tweet_mode": "extended", "include_entities": "true", @@ -497,11 +498,9 @@ class TwitterAPI(): "include_ext_media_availability": "true", "send_error_codes": "true", "simple_quoted_tweet": "true", - # "count": "20", "count": "100", "cursor": None, - "ext": "mediaStats,highlightedLabel,cameraMoment", - "include_quote_count": "true", + "ext": "mediaStats,highlightedLabel", } cookies = self.extractor.session.cookies @@ -516,17 +515,15 @@ class TwitterAPI(): if cookies.get("auth_token", domain=cookiedomain): # logged in - self.root = "https://twitter.com/i/api/" self.headers["x-twitter-auth-type"] = "OAuth2Session" else: # guest - self.root = "https://api.twitter.com/" guest_token = self._guest_token() cookies.set("gt", guest_token, domain=cookiedomain) self.headers["x-guest-token"] = guest_token def tweet(self, tweet_id): - endpoint = "2/timeline/conversation/{}.json".format(tweet_id) + endpoint = "/2/timeline/conversation/{}.json".format(tweet_id) tweets = [] for tweet in self._pagination(endpoint): if tweet["id_str"] == tweet_id or \ @@ -540,43 +537,46 @@ class TwitterAPI(): def timeline_profile(self, screen_name): user_id = self._user_id_by_screen_name(screen_name) - endpoint = "2/timeline/profile/{}.json".format(user_id) - return self._pagination(endpoint) + endpoint = "/2/timeline/profile/{}.json".format(user_id) + params = self.params.copy() + params["include_tweet_replies"] = "false" + return self._pagination(endpoint, params) def timeline_media(self, screen_name): user_id = self._user_id_by_screen_name(screen_name) - endpoint = "2/timeline/media/{}.json".format(user_id) + endpoint = "/2/timeline/media/{}.json".format(user_id) return self._pagination(endpoint) def timeline_favorites(self, screen_name): user_id = self._user_id_by_screen_name(screen_name) - endpoint = "2/timeline/favorites/{}.json".format(user_id) + endpoint = "/2/timeline/favorites/{}.json".format(user_id) + params = self.params.copy() + params["sorted_by_time"] = "true" return self._pagination(endpoint) def timeline_bookmark(self): - endpoint = "2/timeline/bookmark.json" + endpoint = "/2/timeline/bookmark.json" return self._pagination(endpoint) def timeline_list(self, list_id): - endpoint = "2/timeline/list.json" + endpoint = "/2/timeline/list.json" params = self.params.copy() params["list_id"] = list_id params["ranking_mode"] = "reverse_chronological" return self._pagination(endpoint, params) def search(self, query): - endpoint = "2/search/adaptive.json" + endpoint = "/2/search/adaptive.json" params = self.params.copy() params["q"] = query params["tweet_search_mode"] = "live" params["query_source"] = "typed_query" params["pc"] = "1" params["spelling_corrections"] = "1" - return self._pagination( - endpoint, params, "sq-I-t-", "sq-cursor-bottom") + return self._pagination(endpoint, params) def list_members(self, list_id): - endpoint = "graphql/M74V2EwlxxVYGB4DbyAphQ/ListMembers" + endpoint = "/graphql/3pV4YlpljXUTFAa1jVNWQw/ListMembers" variables = { "listId": list_id, "count" : 20, @@ -586,7 +586,7 @@ class TwitterAPI(): return self._pagination_members(endpoint, variables) def list_by_rest_id(self, list_id): - endpoint = "graphql/LXXTUytSX1QY-2p8Xp9BFA/ListByRestId" + endpoint = "/graphql/EhaI2uiCBJI97e28GN8WjQ/ListByRestId" params = {"variables": '{"listId":"' + list_id + '"' ',"withUserResult":false}'} try: @@ -595,7 +595,7 @@ class TwitterAPI(): raise exception.NotFoundError("list") def user_by_screen_name(self, screen_name): - endpoint = "graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName" + endpoint = "/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName" params = {"variables": '{"screen_name":"' + screen_name + '"' ',"withHighlightedLabel":true}'} try: @@ -610,14 +610,16 @@ class TwitterAPI(): @cache(maxage=3600) def _guest_token(self): - endpoint = "1.1/guest/activate.json" - return self._call(endpoint, None, "POST")["guest_token"] + root = "https://api.twitter.com" + endpoint = "/1.1/guest/activate.json" + return self._call(endpoint, None, root, "POST")["guest_token"] - def _call(self, endpoint, params, method="GET"): - url = self.root + endpoint + def _call(self, endpoint, params, root=None, method="GET"): + if root is None: + root = self.root response = self.extractor.request( - url, method=method, params=params, headers=self.headers, - fatal=None) + root + endpoint, method=method, params=params, + headers=self.headers, fatal=None) # update 'x-csrf-token' header (#1170) csrf_token = response.cookies.get("ct0") @@ -641,11 +643,11 @@ class TwitterAPI(): raise exception.StopExtraction( "%s %s (%s)", response.status_code, response.reason, msg) - def _pagination(self, endpoint, params=None, - entry_tweet="tweet-", entry_cursor="cursor-bottom-"): + def _pagination(self, endpoint, params=None): if params is None: params = self.params.copy() original_retweets = (self.extractor.retweets == "original") + pinned_tweet = True while True: cursor = tweet = None @@ -654,48 +656,65 @@ class TwitterAPI(): instr = data["timeline"]["instructions"] if not instr: return + tweet_ids = [] tweets = data["globalObjects"]["tweets"] users = data["globalObjects"]["users"] + if pinned_tweet: + if "pinEntry" in instr[-1]: + tweet_ids.append(instr[-1]["pinEntry"]["entry"]["content"] + ["item"]["content"]["tweet"]["id"]) + pinned_tweet = False + + # collect tweet IDs and cursor value for entry in instr[0]["addEntries"]["entries"]: + entry_startswith = entry["entryId"].startswith + + if entry_startswith(("tweet-", "sq-I-t-")): + tweet_ids.append( + entry["content"]["item"]["content"]["tweet"]["id"]) - if entry["entryId"].startswith(entry_tweet): - try: - tweet = tweets[ - entry["content"]["item"]["content"]["tweet"]["id"]] - except KeyError: - self.extractor.log.debug( - "Skipping %s (deleted)", - entry["entryId"][len(entry_tweet):]) - continue - - if "retweeted_status_id_str" in tweet: - retweet = tweets.get(tweet["retweeted_status_id_str"]) - if original_retweets: - if not retweet: - continue - retweet["_retweet_id_str"] = tweet["id_str"] - tweet = retweet - elif retweet: - tweet["author"] = users[retweet["user_id_str"]] - tweet["user"] = users[tweet["user_id_str"]] - yield tweet - - if "quoted_status_id_str" in tweet: - quoted = tweets.get(tweet["quoted_status_id_str"]) - if quoted: - quoted["author"] = users[quoted["user_id_str"]] - quoted["user"] = tweet["user"] - quoted["quoted"] = True - yield quoted - - elif entry["entryId"].startswith(entry_cursor): + elif entry_startswith("homeConversation-"): + tweet_ids.extend( + entry["content"]["timelineModule"]["metadata"] + ["conversationMetadata"]["allTweetIds"][::-1]) + + elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")): cursor = entry["content"]["operation"]["cursor"] if not cursor.get("stopOnEmptyResponse"): # keep going even if there are no tweets tweet = True cursor = cursor["value"] + # process tweets + for tweet_id in tweet_ids: + try: + tweet = tweets[tweet_id] + except KeyError: + self.extractor.log.debug("Skipping %s (deleted)", tweet_id) + continue + + if "retweeted_status_id_str" in tweet: + retweet = tweets.get(tweet["retweeted_status_id_str"]) + if original_retweets: + if not retweet: + continue + retweet["_retweet_id_str"] = tweet["id_str"] + tweet = retweet + elif retweet: + tweet["author"] = users[retweet["user_id_str"]] + tweet["user"] = users[tweet["user_id_str"]] + yield tweet + + if "quoted_status_id_str" in tweet: + quoted = tweets.get(tweet["quoted_status_id_str"]) + if quoted: + quoted["author"] = users[quoted["user_id_str"]] + quoted["user"] = tweet["user"] + quoted["quoted"] = True + yield quoted + + # update cursor value if "replaceEntry" in instr[-1] : cursor = (instr[-1]["replaceEntry"]["entry"] ["content"]["operation"]["cursor"]["value"]) |
