summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py124
1 files changed, 88 insertions, 36 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 89d96d7..2ccc7e5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -105,6 +105,10 @@ class TwitterExtractor(Extractor):
continue
seen_tweets.add(data["id_str"])
+ if "withheld_scope" in data:
+ txt = data.get("full_text") or data.get("text") or ""
+ self.log.warning("'%s' (%s)", txt, data["id_str"])
+
files = []
if "extended_entities" in data:
self._extract_media(
@@ -256,19 +260,26 @@ class TwitterExtractor(Extractor):
if "legacy" in tweet:
tweet = tweet["legacy"]
+ tweet_id = int(tweet["id_str"])
+ if tweet_id >= 300000000000000:
+ date = text.parse_timestamp(
+ ((tweet_id >> 22) + 1288834974657) // 1000)
+ else:
+ date = text.parse_datetime(
+ tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
+
tget = tweet.get
tdata = {
- "tweet_id" : text.parse_int(tweet["id_str"]),
+ "tweet_id" : tweet_id,
"retweet_id" : text.parse_int(
tget("retweeted_status_id_str")),
"quote_id" : text.parse_int(
tget("quoted_by_id_str")),
"reply_id" : text.parse_int(
tget("in_reply_to_status_id_str")),
- "date" : text.parse_datetime(
- tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "user" : self._user or author,
+ "date" : date,
"author" : author,
+ "user" : self._user or author,
"lang" : tweet["lang"],
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -321,8 +332,10 @@ class TwitterExtractor(Extractor):
user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
uget = user.get
- entities = user["entities"]
+ if uget("withheld_scope"):
+ self.log.warning("'%s'", uget("description"))
+ entities = user["entities"]
self._user_cache[uid] = udata = {
"id" : text.parse_int(uid),
"name" : user["screen_name"],
@@ -398,10 +411,8 @@ class TwitterExtractor(Extractor):
except Exception:
yield tweet
- def _make_tweet(self, user, id_str, url, timestamp):
+ def _make_tweet(self, user, url, id_str):
return {
- "created_at": text.parse_timestamp(timestamp).strftime(
- "%a %b %d %H:%M:%S +0000 %Y"),
"id_str": id_str,
"lang": None,
"user": user,
@@ -564,6 +575,12 @@ class TwitterLikesExtractor(TwitterExtractor):
def tweets(self):
return self.api.user_likes(self.user)
+ def _transform_tweet(self, tweet):
+ tdata = TwitterExtractor._transform_tweet(self, tweet)
+ tdata["date_liked"] = text.parse_timestamp(
+ (int(tweet["sortIndex"]) >> 20) // 1000)
+ return tdata
+
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
@@ -574,6 +591,12 @@ class TwitterBookmarkExtractor(TwitterExtractor):
def tweets(self):
return self.api.user_bookmarks()
+ def _transform_tweet(self, tweet):
+ tdata = TwitterExtractor._transform_tweet(self, tweet)
+ tdata["date_bookmarked"] = text.parse_timestamp(
+ (int(tweet["sortIndex"]) >> 20) // 1000)
+ return tdata
+
class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
@@ -593,7 +616,11 @@ class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
- test = ("https://twitter.com/i/lists/784214683683127296/members",)
+ test = ("https://twitter.com/i/lists/784214683683127296/members", {
+ "pattern": TwitterTimelineExtractor.pattern,
+ "range": "1-40",
+ "count": 40,
+ })
def items(self):
self.login()
@@ -780,6 +807,16 @@ class TwitterTweetExtractor(TwitterExtractor):
("cards-blacklist", ("twitch.tv",))),
"count": 0,
}),
+ # retweet
+ ("https://twitter.com/jessica_3978/status/1296304589591810048", {
+ "options": (("retweets", True),),
+ "count": 2,
+ "keyword": {
+ "tweet_id" : 1296304589591810048,
+ "retweet_id": 1296296016002547713,
+ "date" : "dt:2020-08-20 04:34:32",
+ },
+ }),
# original retweets (#1026)
("https://twitter.com/jessica_3978/status/1296304589591810048", {
"options": (("retweets", "original"),),
@@ -915,9 +952,8 @@ class TwitterAvatarExtractor(TwitterExtractor):
url = url.replace("_normal.", ".")
id_str = url.rsplit("/", 2)[1]
- timestamp = ((int(id_str) >> 22) + 1288834974657) // 1000
- return (self._make_tweet(user, id_str, url, timestamp),)
+ return (self._make_tweet(user, url, id_str),)
class TwitterBackgroundExtractor(TwitterExtractor):
@@ -932,7 +968,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
"keyword": {
"date": "dt:2015-01-12 10:29:43",
"filename": "1421058583",
- "tweet_id": 0,
+ "tweet_id": 554586009367478272,
},
}),
("https://twitter.com/User16/header_photo", {
@@ -950,7 +986,8 @@ class TwitterBackgroundExtractor(TwitterExtractor):
except (KeyError, ValueError):
return ()
- return (self._make_tweet(user, None, url, timestamp),)
+ id_str = str((int(timestamp) * 1000 - 1288834974657) << 22)
+ return (self._make_tweet(user, url, id_str),)
class TwitterImageExtractor(Extractor):
@@ -1008,9 +1045,6 @@ class TwitterAPI():
auth_token = cookies.get("auth_token", domain=cookiedomain)
- if not auth_token:
- self.user_media = self.user_media_legacy
-
self.headers = {
"Accept": "*/*",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
@@ -1071,6 +1105,7 @@ class TwitterAPI():
"withReactionsPerspective": False,
}
self.features = {
+ "blue_business_profile_image_shape_enabled": False,
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
@@ -1079,6 +1114,7 @@ class TwitterAPI():
"responsive_web_graphql_timeline_navigation_enabled": True,
}
self.features_pagination = {
+ "blue_business_profile_image_shape_enabled": False,
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
@@ -1103,7 +1139,7 @@ class TwitterAPI():
}
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
+ endpoint = "/graphql/AV_lPTkN6Fc6LgerQpK8Zg/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"referrer": "profile",
@@ -1121,7 +1157,7 @@ class TwitterAPI():
endpoint, variables, ("threaded_conversation_with_injections_v2",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets"
+ endpoint = "/graphql/BeHK76TOCY3P8nO-FWocjA/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1133,7 +1169,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies"
+ endpoint = "/graphql/eZVlZu_1gwb6hMUDXBnZoQ/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1145,7 +1181,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia"
+ endpoint = "/graphql/d_ONZLUHGCsErBCriRsLXg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1178,7 +1214,7 @@ class TwitterAPI():
features=False)
def user_likes(self, screen_name):
- endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes"
+ endpoint = "/graphql/fN4-E0MjFJ9Cn7IYConL7g/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1191,15 +1227,18 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks"
+ endpoint = "/graphql/RV1g3b8n_SGOHwkqKYSCFw/Bookmarks"
variables = {
"count": 100,
}
+ features = self.features_pagination.copy()
+ features["graphql_timeline_v2_bookmark_timeline"] = True
return self._pagination_tweets(
- endpoint, variables, ("bookmark_timeline", "timeline"), False)
+ endpoint, variables, ("bookmark_timeline_v2", "timeline"), False,
+ features=features)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline"
+ endpoint = "/graphql/5DAiJG3bD77SiWEs4xViBw/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1234,7 +1273,7 @@ class TwitterAPI():
["twitter_objects"]["live_events"][event_id])
def list_by_rest_id(self, list_id):
- endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId"
+ endpoint = "/graphql/D0EoyrDcct2MEqC-LnPzFg/ListByRestId"
params = {
"variables": self._json_dumps({
"listId": list_id,
@@ -1248,7 +1287,7 @@ class TwitterAPI():
raise exception.NotFoundError("list")
def list_members(self, list_id):
- endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers"
+ endpoint = "/graphql/tzsIIbGUH9RyFCVmtO2W2w/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1258,7 +1297,7 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following"
+ endpoint = "/graphql/FaBzCqZXuQCb4PhB0RHqHw/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1267,7 +1306,7 @@ class TwitterAPI():
return self._pagination_users(endpoint, variables)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId"
+ endpoint = "/graphql/S2BkcAyFMG--jef2N6Dgzw/UserByRestId"
params = {
"variables": self._json_dumps({
"userId": rest_id,
@@ -1278,7 +1317,7 @@ class TwitterAPI():
return self._call(endpoint, params)["data"]["user"]["result"]
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName"
+ endpoint = "/graphql/k26ASEiniqy4eXMdknTSoQ/UserByScreenName"
params = {
"variables": self._json_dumps({
"screen_name": screen_name,
@@ -1451,15 +1490,17 @@ class TwitterAPI():
params["cursor"] = cursor
def _pagination_tweets(self, endpoint, variables,
- path=None, stop_tweets=True, features=True):
+ path=None, stop_tweets=True, features=None):
extr = self.extractor
variables.update(self.variables)
original_retweets = (extr.retweets == "original")
pinned_tweet = extr.pinned
params = {"variables": None}
+ if features is None:
+ features = self.features_pagination
if features:
- params["features"] = self._json_dumps(self.features_pagination)
+ params["features"] = self._json_dumps(features)
while True:
params["variables"] = self._json_dumps(variables)
@@ -1550,6 +1591,7 @@ class TwitterAPI():
if "tweet" in tweet:
tweet = tweet["tweet"]
legacy = tweet["legacy"]
+ tweet["sortIndex"] = entry.get("sortIndex")
except KeyError:
extr.log.debug(
"Skipping %s (deleted)",
@@ -1574,10 +1616,17 @@ class TwitterAPI():
retweet["rest_id"]
tweet["author"] = \
retweet["core"]["user_results"]["result"]
- if "extended_entities" in retweet["legacy"] and \
+
+ rtlegacy = retweet["legacy"]
+ if "extended_entities" in rtlegacy and \
"extended_entities" not in legacy:
legacy["extended_entities"] = \
- retweet["legacy"]["extended_entities"]
+ rtlegacy["extended_entities"]
+ if "withheld_scope" in rtlegacy and \
+ "withheld_scope" not in legacy:
+ legacy["withheld_scope"] = \
+ rtlegacy["withheld_scope"]
+ legacy["full_text"] = rtlegacy["full_text"]
except KeyError:
pass
@@ -1590,6 +1639,8 @@ class TwitterAPI():
tweet["core"]["user_results"]["result"]
["legacy"]["screen_name"])
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
+ quoted["sortIndex"] = entry.get("sortIndex")
+
yield quoted
except KeyError:
extr.log.debug(
@@ -1679,9 +1730,10 @@ class TwitterAPI():
"in_reply_to_status_id_str" not in tweet:
tweet["conversation_id_str"] = tweet["id_str"]
- tweet["created_at"] = text.parse_datetime(
- tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
- "%a %b %d %H:%M:%S +0000 %Y")
+ if int(tweet_id) < 300000000000000:
+ tweet["created_at"] = text.parse_datetime(
+ tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
+ "%a %b %d %H:%M:%S +0000 %Y")
if "video" in tweet:
video = tweet["video"]