summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py127
1 files changed, 73 insertions, 54 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c2d8247..17a2202 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -26,6 +26,7 @@ class TwitterExtractor(Extractor):
cookiedomain = ".twitter.com"
cookienames = ("auth_token",)
root = "https://twitter.com"
+ browser = "firefox"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -945,16 +946,31 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
- self.root = "https://twitter.com/i/api"
+ self.root = "https://api.twitter.com"
+ cookies = extractor.session.cookies
+ cookiedomain = extractor.cookiedomain
+
+ csrf = extractor.config("csrf")
+ if csrf is None or csrf == "cookies":
+ csrf_token = cookies.get("ct0", domain=cookiedomain)
+ else:
+ csrf_token = None
+ if not csrf_token:
+ csrf_token = util.generate_token()
+ cookies.set("ct0", csrf_token, domain=cookiedomain)
+
+ auth_token = cookies.get("auth_token", domain=cookiedomain)
+
self.headers = {
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
"4FA33AGWWjCpTnA",
"x-guest-token": None,
- "x-twitter-auth-type": None,
+ "x-twitter-auth-type": "OAuth2Session" if auth_token else None,
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
- "x-csrf-token": None,
+ "x-csrf-token": csrf_token,
+ "Origin": "https://twitter.com",
"Referer": "https://twitter.com/",
}
self.params = {
@@ -967,24 +983,36 @@ class TwitterAPI():
"include_can_dm": "1",
"include_can_media_tag": "1",
"include_ext_has_nft_avatar": "1",
+ "include_ext_is_blue_verified": "1",
+ "include_ext_verified_type": "1",
"skip_status": "1",
"cards_platform": "Web-12",
"include_cards": "1",
"include_ext_alt_text": "true",
+ "include_ext_limited_action_results": "false",
"include_quote_count": "true",
"include_reply_count": "1",
"tweet_mode": "extended",
+ "include_ext_collab_control": "true",
+ "include_ext_views": "true",
"include_entities": "true",
"include_user_entities": "true",
"include_ext_media_color": "true",
"include_ext_media_availability": "true",
"include_ext_sensitive_media_warning": "true",
+ "include_ext_trusted_friends_metadata": "true",
"send_error_codes": "true",
"simple_quoted_tweet": "true",
+ "q": None,
"count": "100",
+ "query_source": None,
"cursor": None,
- "ext": "mediaStats,highlightedLabel,hasNftAvatar,"
- "voiceInfo,superFollowMetadata",
+ "pc": None,
+ "spelling_corrections": None,
+ "include_ext_edit_control": "true",
+ "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,"
+ "enrichments,superFollowMetadata,unmentionInfo,editControl,"
+ "collab_control,vibe",
}
self.variables = {
"includePromotedContent": False,
@@ -1006,28 +1034,6 @@ class TwitterAPI():
self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
- cookies = extractor.session.cookies
- cookiedomain = extractor.cookiedomain
-
- csrf = extractor.config("csrf")
- if csrf is None or csrf == "cookies":
- csrf_token = cookies.get("ct0", domain=cookiedomain)
- else:
- csrf_token = None
- if not csrf_token:
- csrf_token = util.generate_token()
- cookies.set("ct0", csrf_token, domain=cookiedomain)
- self.headers["x-csrf-token"] = csrf_token
-
- if cookies.get("auth_token", domain=cookiedomain):
- # logged in
- self.headers["x-twitter-auth-type"] = "OAuth2Session"
- else:
- # guest
- guest_token = self._guest_token()
- cookies.set("gt", guest_token, domain=cookiedomain)
- self.headers["x-guest-token"] = guest_token
-
def tweet_detail(self, tweet_id):
endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
variables = {
@@ -1183,17 +1189,26 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
- root = "https://api.twitter.com"
endpoint = "/1.1/guest/activate.json"
- return str(self._call(endpoint, None, root, "POST")["guest_token"])
+ self.extractor.log.info("Requesting guest token")
+ return str(self._call(endpoint, None, "POST", False)["guest_token"])
+
+ def _authenticate_guest(self):
+ guest_token = self._guest_token()
+ if guest_token != self.headers["x-guest-token"]:
+ self.headers["x-guest-token"] = guest_token
+ self.extractor.session.cookies.set(
+ "gt", guest_token, domain=self.extractor.cookiedomain)
- def _call(self, endpoint, params, root=None, method="GET"):
- if root is None:
- root = self.root
+ def _call(self, endpoint, params, method="GET", auth=True):
+ url = self.root + endpoint
while True:
+ if not self.headers["x-twitter-auth-type"] and auth:
+ self._authenticate_guest()
+
response = self.extractor.request(
- root + endpoint, method=method, params=params,
+ url, method=method, params=params,
headers=self.headers, fatal=None)
# update 'x-csrf-token' header (#1170)
@@ -1226,21 +1241,33 @@ class TwitterAPI():
def _pagination_legacy(self, endpoint, params):
original_retweets = (self.extractor.retweets == "original")
+ bottom = ("cursor-bottom-", "sq-cursor-bottom")
while True:
data = self._call(endpoint, params)
- instr = data["timeline"]["instructions"]
- if not instr:
+ instructions = data["timeline"]["instructions"]
+ if not instructions:
return
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
tweet_id = cursor = None
tweet_ids = []
+ entries = ()
+
+ # process instructions
+ for instr in instructions:
+ if "addEntries" in instr:
+ entries = instr["addEntries"]["entries"]
+ elif "replaceEntry" in instr:
+ entry = instr["replaceEntry"]["entry"]
+ if entry["entryId"].startswith(bottom):
+ cursor = (entry["content"]["operation"]
+ ["cursor"]["value"])
# collect tweet IDs and cursor value
- for entry in instr[0]["addEntries"]["entries"]:
+ for entry in entries:
entry_startswith = entry["entryId"].startswith
if entry_startswith(("tweet-", "sq-I-t-")):
@@ -1252,7 +1279,7 @@ class TwitterAPI():
entry["content"]["timelineModule"]["metadata"]
["conversationMetadata"]["allTweetIds"][::-1])
- elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")):
+ elif entry_startswith(bottom):
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse", True):
# keep going even if there are no tweets
@@ -1300,11 +1327,7 @@ class TwitterAPI():
quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
- # update cursor value
- if "replaceEntry" in instr[-1] :
- cursor = (instr[-1]["replaceEntry"]["entry"]
- ["content"]["operation"]["cursor"]["value"])
-
+ # stop on empty response
if not cursor or (not tweets and not tweet_id):
return
params["cursor"] = cursor
@@ -1346,12 +1369,8 @@ class TwitterAPI():
if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
- guest_token = self._guest_token()
- extr.session.cookies.set(
- "gt", guest_token, domain=extr.cookiedomain)
extr._cookiefile = None
del extr.session.cookies["auth_token"]
- self.headers["x-guest-token"] = guest_token
self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest")
continue
@@ -1578,8 +1597,6 @@ def _login_impl(extr, username, password):
"Login with email is no longer possible. "
"You need to provide your username or phone number instead.")
- extr.log.info("Logging in as %s", username)
-
def process(response):
try:
data = response.json()
@@ -1598,8 +1615,10 @@ def _login_impl(extr, username, password):
extr.session.cookies.clear()
api = TwitterAPI(extr)
+ api._authenticate_guest()
headers = api.headers
- headers["Referer"] = "https://twitter.com/i/flow/login"
+
+ extr.log.info("Logging in as %s", username)
# init
data = {
@@ -1653,7 +1672,7 @@ def _login_impl(extr, username, password):
"web_modal": 1,
},
}
- url = "https://twitter.com/i/api/1.1/onboarding/task.json?flow_name=login"
+ url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
response = extr.request(url, method="POST", headers=headers, json=data)
data = {
@@ -1668,7 +1687,7 @@ def _login_impl(extr, username, password):
},
],
}
- url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ url = "https://api.twitter.com/1.1/onboarding/task.json"
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
@@ -1692,7 +1711,7 @@ def _login_impl(extr, username, password):
},
],
}
- # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ # url = "https://api.twitter.com/1.1/onboarding/task.json"
extr.sleep(random.uniform(2.0, 4.0), "login (username)")
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
@@ -1710,7 +1729,7 @@ def _login_impl(extr, username, password):
},
],
}
- # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ # url = "https://api.twitter.com/1.1/onboarding/task.json"
extr.sleep(random.uniform(2.0, 4.0), "login (password)")
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
@@ -1727,7 +1746,7 @@ def _login_impl(extr, username, password):
},
],
}
- # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ # url = "https://api.twitter.com/1.1/onboarding/task.json"
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
process(response)