summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py167
1 files changed, 93 insertions, 74 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index a77ea06..b769912 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -239,30 +239,29 @@ class TwitterExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- url = "https://mobile.twitter.com/i/nojs_router"
- params = {"path": "/login"}
- headers = {"Referer": self.root + "/", "Origin": self.root}
- page = self.request(
- url, method="POST", params=params, headers=headers, data={}).text
+ token = util.generate_csrf_token()
+ self.session.cookies.clear()
+ self.request(self.root + "/login")
- pos = page.index('name="authenticity_token"')
- token = text.extract(page, 'value="', '"', pos)[0]
-
- url = "https://mobile.twitter.com/sessions"
+ url = self.root + "/sessions"
+ cookies = {
+ "_mb_tk": token,
+ }
data = {
+ "redirect_after_login" : "/",
+ "remember_me" : "1",
"authenticity_token" : token,
+ "wfa" : "1",
+ "ui_metrics" : "{}",
"session[username_or_email]": username,
"session[password]" : password,
- "remember_me" : "1",
- "wfa" : "1",
- "commit" : "+Log+in+",
- "ui_metrics" : "",
}
- response = self.request(url, method="POST", data=data)
+ response = self.request(
+ url, method="POST", cookies=cookies, data=data)
+
cookies = {
cookie.name: cookie.value
for cookie in self.session.cookies
- if cookie.domain == self.cookiedomain
}
if "/error" in response.url or "auth_token" not in cookies:
@@ -464,15 +463,17 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
+
+ self.root = "https://twitter.com/i/api"
self.headers = {
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
"4FA33AGWWjCpTnA",
"x-guest-token": None,
+ "x-twitter-auth-type": None,
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
"x-csrf-token": None,
- "Origin": "https://twitter.com",
"Referer": "https://twitter.com/",
}
self.params = {
@@ -487,8 +488,8 @@ class TwitterAPI():
"skip_status": "1",
"cards_platform": "Web-12",
"include_cards": "1",
- "include_composer_source": "true",
"include_ext_alt_text": "true",
+ "include_quote_count": "true",
"include_reply_count": "1",
"tweet_mode": "extended",
"include_entities": "true",
@@ -497,11 +498,9 @@ class TwitterAPI():
"include_ext_media_availability": "true",
"send_error_codes": "true",
"simple_quoted_tweet": "true",
- # "count": "20",
"count": "100",
"cursor": None,
- "ext": "mediaStats,highlightedLabel,cameraMoment",
- "include_quote_count": "true",
+ "ext": "mediaStats,highlightedLabel",
}
cookies = self.extractor.session.cookies
@@ -516,17 +515,15 @@ class TwitterAPI():
if cookies.get("auth_token", domain=cookiedomain):
# logged in
- self.root = "https://twitter.com/i/api/"
self.headers["x-twitter-auth-type"] = "OAuth2Session"
else:
# guest
- self.root = "https://api.twitter.com/"
guest_token = self._guest_token()
cookies.set("gt", guest_token, domain=cookiedomain)
self.headers["x-guest-token"] = guest_token
def tweet(self, tweet_id):
- endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
+ endpoint = "/2/timeline/conversation/{}.json".format(tweet_id)
tweets = []
for tweet in self._pagination(endpoint):
if tweet["id_str"] == tweet_id or \
@@ -540,43 +537,46 @@ class TwitterAPI():
def timeline_profile(self, screen_name):
user_id = self._user_id_by_screen_name(screen_name)
- endpoint = "2/timeline/profile/{}.json".format(user_id)
- return self._pagination(endpoint)
+ endpoint = "/2/timeline/profile/{}.json".format(user_id)
+ params = self.params.copy()
+ params["include_tweet_replies"] = "false"
+ return self._pagination(endpoint, params)
def timeline_media(self, screen_name):
user_id = self._user_id_by_screen_name(screen_name)
- endpoint = "2/timeline/media/{}.json".format(user_id)
+ endpoint = "/2/timeline/media/{}.json".format(user_id)
return self._pagination(endpoint)
def timeline_favorites(self, screen_name):
user_id = self._user_id_by_screen_name(screen_name)
- endpoint = "2/timeline/favorites/{}.json".format(user_id)
+ endpoint = "/2/timeline/favorites/{}.json".format(user_id)
+ params = self.params.copy()
+ params["sorted_by_time"] = "true"
return self._pagination(endpoint)
def timeline_bookmark(self):
- endpoint = "2/timeline/bookmark.json"
+ endpoint = "/2/timeline/bookmark.json"
return self._pagination(endpoint)
def timeline_list(self, list_id):
- endpoint = "2/timeline/list.json"
+ endpoint = "/2/timeline/list.json"
params = self.params.copy()
params["list_id"] = list_id
params["ranking_mode"] = "reverse_chronological"
return self._pagination(endpoint, params)
def search(self, query):
- endpoint = "2/search/adaptive.json"
+ endpoint = "/2/search/adaptive.json"
params = self.params.copy()
params["q"] = query
params["tweet_search_mode"] = "live"
params["query_source"] = "typed_query"
params["pc"] = "1"
params["spelling_corrections"] = "1"
- return self._pagination(
- endpoint, params, "sq-I-t-", "sq-cursor-bottom")
+ return self._pagination(endpoint, params)
def list_members(self, list_id):
- endpoint = "graphql/M74V2EwlxxVYGB4DbyAphQ/ListMembers"
+ endpoint = "/graphql/3pV4YlpljXUTFAa1jVNWQw/ListMembers"
variables = {
"listId": list_id,
"count" : 20,
@@ -586,7 +586,7 @@ class TwitterAPI():
return self._pagination_members(endpoint, variables)
def list_by_rest_id(self, list_id):
- endpoint = "graphql/LXXTUytSX1QY-2p8Xp9BFA/ListByRestId"
+ endpoint = "/graphql/EhaI2uiCBJI97e28GN8WjQ/ListByRestId"
params = {"variables": '{"listId":"' + list_id + '"'
',"withUserResult":false}'}
try:
@@ -595,7 +595,7 @@ class TwitterAPI():
raise exception.NotFoundError("list")
def user_by_screen_name(self, screen_name):
- endpoint = "graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName"
+ endpoint = "/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName"
params = {"variables": '{"screen_name":"' + screen_name + '"'
',"withHighlightedLabel":true}'}
try:
@@ -610,14 +610,16 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
- endpoint = "1.1/guest/activate.json"
- return self._call(endpoint, None, "POST")["guest_token"]
+ root = "https://api.twitter.com"
+ endpoint = "/1.1/guest/activate.json"
+ return self._call(endpoint, None, root, "POST")["guest_token"]
- def _call(self, endpoint, params, method="GET"):
- url = self.root + endpoint
+ def _call(self, endpoint, params, root=None, method="GET"):
+ if root is None:
+ root = self.root
response = self.extractor.request(
- url, method=method, params=params, headers=self.headers,
- fatal=None)
+ root + endpoint, method=method, params=params,
+ headers=self.headers, fatal=None)
# update 'x-csrf-token' header (#1170)
csrf_token = response.cookies.get("ct0")
@@ -641,11 +643,11 @@ class TwitterAPI():
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, msg)
- def _pagination(self, endpoint, params=None,
- entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
+ def _pagination(self, endpoint, params=None):
if params is None:
params = self.params.copy()
original_retweets = (self.extractor.retweets == "original")
+ pinned_tweet = True
while True:
cursor = tweet = None
@@ -654,48 +656,65 @@ class TwitterAPI():
instr = data["timeline"]["instructions"]
if not instr:
return
+ tweet_ids = []
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
+ if pinned_tweet:
+ if "pinEntry" in instr[-1]:
+ tweet_ids.append(instr[-1]["pinEntry"]["entry"]["content"]
+ ["item"]["content"]["tweet"]["id"])
+ pinned_tweet = False
+
+ # collect tweet IDs and cursor value
for entry in instr[0]["addEntries"]["entries"]:
+ entry_startswith = entry["entryId"].startswith
+
+ if entry_startswith(("tweet-", "sq-I-t-")):
+ tweet_ids.append(
+ entry["content"]["item"]["content"]["tweet"]["id"])
- if entry["entryId"].startswith(entry_tweet):
- try:
- tweet = tweets[
- entry["content"]["item"]["content"]["tweet"]["id"]]
- except KeyError:
- self.extractor.log.debug(
- "Skipping %s (deleted)",
- entry["entryId"][len(entry_tweet):])
- continue
-
- if "retweeted_status_id_str" in tweet:
- retweet = tweets.get(tweet["retweeted_status_id_str"])
- if original_retweets:
- if not retweet:
- continue
- retweet["_retweet_id_str"] = tweet["id_str"]
- tweet = retweet
- elif retweet:
- tweet["author"] = users[retweet["user_id_str"]]
- tweet["user"] = users[tweet["user_id_str"]]
- yield tweet
-
- if "quoted_status_id_str" in tweet:
- quoted = tweets.get(tweet["quoted_status_id_str"])
- if quoted:
- quoted["author"] = users[quoted["user_id_str"]]
- quoted["user"] = tweet["user"]
- quoted["quoted"] = True
- yield quoted
-
- elif entry["entryId"].startswith(entry_cursor):
+ elif entry_startswith("homeConversation-"):
+ tweet_ids.extend(
+ entry["content"]["timelineModule"]["metadata"]
+ ["conversationMetadata"]["allTweetIds"][::-1])
+
+ elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")):
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse"):
# keep going even if there are no tweets
tweet = True
cursor = cursor["value"]
+ # process tweets
+ for tweet_id in tweet_ids:
+ try:
+ tweet = tweets[tweet_id]
+ except KeyError:
+ self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
+ continue
+
+ if "retweeted_status_id_str" in tweet:
+ retweet = tweets.get(tweet["retweeted_status_id_str"])
+ if original_retweets:
+ if not retweet:
+ continue
+ retweet["_retweet_id_str"] = tweet["id_str"]
+ tweet = retweet
+ elif retweet:
+ tweet["author"] = users[retweet["user_id_str"]]
+ tweet["user"] = users[tweet["user_id_str"]]
+ yield tweet
+
+ if "quoted_status_id_str" in tweet:
+ quoted = tweets.get(tweet["quoted_status_id_str"])
+ if quoted:
+ quoted["author"] = users[quoted["user_id_str"]]
+ quoted["user"] = tweet["user"]
+ quoted["quoted"] = True
+ yield quoted
+
+ # update cursor value
if "replaceEntry" in instr[-1] :
cursor = (instr[-1]["replaceEntry"]["entry"]
["content"]["operation"]["cursor"]["value"])