summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py214
1 files changed, 167 insertions, 47 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 29b4ac3..89d96d7 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -248,11 +248,15 @@ class TwitterExtractor(Extractor):
author = tweet["user"]
author = self._transform_user(author)
+ if "note_tweet" in tweet:
+ note = tweet["note_tweet"]["note_tweet_results"]["result"]
+ else:
+ note = None
+
if "legacy" in tweet:
tweet = tweet["legacy"]
tget = tweet.get
- entities = tweet["entities"]
tdata = {
"tweet_id" : text.parse_int(tweet["id_str"]),
"retweet_id" : text.parse_int(
@@ -272,6 +276,8 @@ class TwitterExtractor(Extractor):
"retweet_count" : tget("retweet_count"),
}
+ entities = note["entity_set"] if note else tweet["entities"]
+
hashtags = entities.get("hashtags")
if hashtags:
tdata["hashtags"] = [t["text"] for t in hashtags]
@@ -284,7 +290,8 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
- content = text.unescape(tget("full_text") or tget("text") or "")
+ content = text.unescape(
+ note["text"] if note else tget("full_text") or tget("text") or "")
urls = entities.get("urls")
if urls:
for url in urls:
@@ -642,6 +649,21 @@ class TwitterSearchExtractor(TwitterExtractor):
return self.api.search_adaptive(query)
+class TwitterHashtagExtractor(TwitterExtractor):
+ """Extractor for Twitter hashtags"""
+ subcategory = "hashtag"
+ pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+ test = ("https://twitter.com/hashtag/nature", {
+ "pattern": TwitterSearchExtractor.pattern,
+ "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
+ })
+
+ def items(self):
+ url = "{}/search?q=%23{}".format(self.root, self.user)
+ data = {"_extractor": TwitterSearchExtractor}
+ yield Message.Queue, url, data
+
+
class TwitterEventExtractor(TwitterExtractor):
"""Extractor for Tweets from a Twitter Event"""
subcategory = "event"
@@ -803,6 +825,23 @@ class TwitterTweetExtractor(TwitterExtractor):
r"\?format=(jpg|png)&name=orig$",
"range": "1-2",
}),
+ # note tweet with long 'content'
+ ("https://twitter.com/i/web/status/1629193457112686592", {
+ "keyword": {
+ "content": """\
+BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
+just contradicted federal government regulators, saying that toxic air \
+pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
+Washington Post writes, "Three weeks after the toxic train derailment in \
+Ohio, an analysis of Environmental Protection Agency data has found nine air \
+pollutants at levels that could raise long-term health concerns in and around \
+East Palestine, according to an independent analysis. \n\n\"The analysis by \
+Texas A&M University seems to contradict statements by state and federal \
+regulators that air near the crash site is completely safe, despite residents \
+complaining about rashes, breathing problems and other health effects." \
+Your reaction.""",
+ },
+ }),
)
def __init__(self, match):
@@ -951,6 +990,10 @@ class TwitterAPI():
self.extractor = extractor
self.root = "https://api.twitter.com"
+ self._nsfw_warning = True
+ self._syndication = self.extractor.syndication
+ self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
@@ -965,7 +1008,11 @@ class TwitterAPI():
auth_token = cookies.get("auth_token", domain=cookiedomain)
+ if not auth_token:
+ self.user_media = self.user_media_legacy
+
self.headers = {
+ "Accept": "*/*",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
"4FA33AGWWjCpTnA",
@@ -1019,73 +1066,132 @@ class TwitterAPI():
"collab_control,vibe",
}
self.variables = {
- "includePromotedContent": False,
- "withSuperFollowsUserFields": True,
- "withBirdwatchPivots": False,
"withDownvotePerspective": False,
"withReactionsMetadata": False,
"withReactionsPerspective": False,
- "withSuperFollowsTweetFields": True,
- "withClientEventToken": False,
- "withBirdwatchNotes": False,
- "withVoice": True,
- "withV2Timeline": False,
- "__fs_interactive_text": False,
- "__fs_dont_mention_me_view_api_enabled": False,
}
-
- self._nsfw_warning = True
- self._syndication = self.extractor.syndication
- self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+ self.features = {
+ "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+ "responsive_web_graphql_exclude_directive_enabled": True,
+ "verified_phone_label_enabled": False,
+ "responsive_web_graphql_skip_user_profile_"
+ "image_extensions_enabled": False,
+ "responsive_web_graphql_timeline_navigation_enabled": True,
+ }
+ self.features_pagination = {
+ "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+ "responsive_web_graphql_exclude_directive_enabled": True,
+ "verified_phone_label_enabled": False,
+ "responsive_web_graphql_timeline_navigation_enabled": True,
+ "responsive_web_graphql_skip_user_profile_"
+ "image_extensions_enabled": False,
+ "tweetypie_unmention_optimization_enabled": True,
+ "vibe_api_enabled": True,
+ "responsive_web_edit_tweet_api_enabled": True,
+ "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
+ "view_counts_everywhere_api_enabled": True,
+ "longform_notetweets_consumption_enabled": True,
+ "tweet_awards_web_tipping_enabled": False,
+ "freedom_of_speech_not_reach_fetch_enabled": False,
+ "standardized_nudges_misinfo": True,
+ "tweet_with_visibility_results_prefer_gql_"
+ "limited_actions_policy_enabled": False,
+ "interactive_text_enabled": True,
+ "responsive_web_text_conversations_enabled": False,
+ "longform_notetweets_richtext_consumption_enabled": False,
+ "responsive_web_enhance_cards_enabled": False,
+ }
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
+ endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
variables = {
"focalTweetId": tweet_id,
+ "referrer": "profile",
"with_rux_injections": False,
+ "includePromotedContent": True,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
"withBirdwatchNotes": False,
+ "withSuperFollowsUserFields": True,
+ "withSuperFollowsTweetFields": True,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(
- endpoint, variables, ("threaded_conversation_with_injections",))
+ endpoint, variables, ("threaded_conversation_with_injections_v2",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets"
+ endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": True,
"withQuickPromoteEligibilityTweetFields": True,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies"
+ endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": True,
"withCommunity": True,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+ endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": False,
+ "withClientEventToken": False,
+ "withBirdwatchNotes": False,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
+ def user_media_legacy(self, screen_name):
+ endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+ variables = {
+ "userId": self._user_id_by_screen_name(screen_name),
+ "count": 100,
+ "includePromotedContent": False,
+ "withSuperFollowsUserFields": True,
+ "withBirdwatchPivots": False,
+ "withSuperFollowsTweetFields": True,
+ "withClientEventToken": False,
+ "withBirdwatchNotes": False,
+ "withVoice": True,
+ "withV2Timeline": False,
+ "__fs_interactive_text": False,
+ "__fs_dont_mention_me_view_api_enabled": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, ("user", "result", "timeline", "timeline"),
+ features=False)
+
def user_likes(self, screen_name):
- endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes"
+ endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": False,
+ "withClientEventToken": False,
+ "withBirdwatchNotes": False,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks"
+ endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks"
variables = {
"count": 100,
}
@@ -1093,7 +1199,7 @@ class TwitterAPI():
endpoint, variables, ("bookmark_timeline", "timeline"), False)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
+ endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1128,18 +1234,21 @@ class TwitterAPI():
["twitter_objects"]["live_events"][event_id])
def list_by_rest_id(self, list_id):
- endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId"
- params = {"variables": self._json_dumps({
- "listId": list_id,
- "withSuperFollowsUserFields": True,
- })}
+ endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId"
+ params = {
+ "variables": self._json_dumps({
+ "listId": list_id,
+ "withSuperFollowsUserFields": True,
+ }),
+ "features": self._json_dumps(self.features),
+ }
try:
return self._call(endpoint, params)["data"]["list"]
except KeyError:
raise exception.NotFoundError("list")
def list_members(self, list_id):
- endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers"
+ endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1149,29 +1258,34 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following"
+ endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": False,
}
return self._pagination_users(endpoint, variables)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId"
- params = {"variables": self._json_dumps({
- "userId": rest_id,
- "withSafetyModeUserFields": True,
- "withSuperFollowsUserFields": True,
- })}
+ endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId"
+ params = {
+ "variables": self._json_dumps({
+ "userId": rest_id,
+ "withSafetyModeUserFields": True,
+ }),
+ "features": self._json_dumps(self.features),
+ }
return self._call(endpoint, params)["data"]["user"]["result"]
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName"
- params = {"variables": self._json_dumps({
- "screen_name": screen_name,
- "withSafetyModeUserFields": True,
- "withSuperFollowsUserFields": True,
- })}
+ endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName"
+ params = {
+ "variables": self._json_dumps({
+ "screen_name": screen_name,
+ "withSafetyModeUserFields": True,
+ }),
+ "features": self._json_dumps(self.features),
+ }
return self._call(endpoint, params)["data"]["user"]["result"]
def _user_id_by_screen_name(self, screen_name):
@@ -1337,19 +1451,23 @@ class TwitterAPI():
params["cursor"] = cursor
def _pagination_tweets(self, endpoint, variables,
- path=None, stop_tweets=True):
+ path=None, stop_tweets=True, features=True):
extr = self.extractor
variables.update(self.variables)
original_retweets = (extr.retweets == "original")
pinned_tweet = extr.pinned
+ params = {"variables": None}
+ if features:
+ params["features"] = self._json_dumps(self.features_pagination)
+
while True:
- params = {"variables": self._json_dumps(variables)}
+ params["variables"] = self._json_dumps(variables)
data = self._call(endpoint, params)["data"]
try:
if path is None:
- instructions = (data["user"]["result"]["timeline"]
+ instructions = (data["user"]["result"]["timeline_v2"]
["timeline"]["instructions"])
else:
instructions = data
@@ -1487,10 +1605,12 @@ class TwitterAPI():
def _pagination_users(self, endpoint, variables, path=None):
variables.update(self.variables)
+ params = {"variables": None,
+ "features" : self._json_dumps(self.features_pagination)}
while True:
cursor = entry = stop = None
- params = {"variables": self._json_dumps(variables)}
+ params["variables"] = self._json_dumps(variables)
data = self._call(endpoint, params)["data"]
try: