1 files changed, 167 insertions, 47 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 29b4ac3..89d96d7 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -248,11 +248,15 @@ class TwitterExtractor(Extractor):
             author = tweet["user"]
         author = self._transform_user(author)
 
+        if "note_tweet" in tweet:
+            note = tweet["note_tweet"]["note_tweet_results"]["result"]
+        else:
+            note = None
+
         if "legacy" in tweet:
             tweet = tweet["legacy"]
 
         tget = tweet.get
-        entities = tweet["entities"]
         tdata = {
             "tweet_id"      : text.parse_int(tweet["id_str"]),
             "retweet_id"    : text.parse_int(
@@ -272,6 +276,8 @@ class TwitterExtractor(Extractor):
             "retweet_count" : tget("retweet_count"),
         }
 
+        entities = note["entity_set"] if note else tweet["entities"]
+
         hashtags = entities.get("hashtags")
         if hashtags:
             tdata["hashtags"] = [t["text"] for t in hashtags]
@@ -284,7 +290,8 @@ class TwitterExtractor(Extractor):
                 "nick": u["name"],
             } for u in mentions]
 
-        content = text.unescape(tget("full_text") or tget("text") or "")
+        content = text.unescape(
+            note["text"] if note else tget("full_text") or tget("text") or "")
         urls = entities.get("urls")
         if urls:
             for url in urls:
@@ -642,6 +649,21 @@ class TwitterSearchExtractor(TwitterExtractor):
         return self.api.search_adaptive(query)
 
 
+class TwitterHashtagExtractor(TwitterExtractor):
+    """Extractor for Twitter hashtags"""
+    subcategory = "hashtag"
+    pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+    test = ("https://twitter.com/hashtag/nature", {
+        "pattern": TwitterSearchExtractor.pattern,
+        "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
+    })
+
+    def items(self):
+        url = "{}/search?q=%23{}".format(self.root, self.user)
+        data = {"_extractor": TwitterSearchExtractor}
+        yield Message.Queue, url, data
+
+
 class TwitterEventExtractor(TwitterExtractor):
     """Extractor for Tweets from a Twitter Event"""
     subcategory = "event"
@@ -803,6 +825,23 @@ class TwitterTweetExtractor(TwitterExtractor):
                        r"\?format=(jpg|png)&name=orig$",
             "range": "1-2",
         }),
+        # note tweet with long 'content'
+        ("https://twitter.com/i/web/status/1629193457112686592", {
+            "keyword": {
+                "content": """\
+BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
+just contradicted federal government regulators, saying that toxic air \
+pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
+Washington Post writes, "Three weeks after the toxic train derailment in \
+Ohio, an analysis of Environmental Protection Agency data has found nine air \
+pollutants at levels that could raise long-term health concerns in and around \
+East Palestine, according to an independent analysis. \n\n\"The analysis by \
+Texas A&M University seems to contradict statements by state and federal \
+regulators that air near the crash site is completely safe, despite residents \
+complaining about rashes, breathing problems and other health effects." \
+Your reaction.""",
+            },
+        }),
     )
 
     def __init__(self, match):
@@ -951,6 +990,10 @@ class TwitterAPI():
         self.extractor = extractor
 
         self.root = "https://api.twitter.com"
+        self._nsfw_warning = True
+        self._syndication = self.extractor.syndication
+        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+
         cookies = extractor.session.cookies
         cookiedomain = extractor.cookiedomain
 
@@ -965,7 +1008,11 @@ class TwitterAPI():
 
         auth_token = cookies.get("auth_token", domain=cookiedomain)
 
+        if not auth_token:
+            self.user_media = self.user_media_legacy
+
         self.headers = {
+            "Accept": "*/*",
             "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
                              "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
                              "4FA33AGWWjCpTnA",
@@ -1019,73 +1066,132 @@ class TwitterAPI():
                    "collab_control,vibe",
         }
         self.variables = {
-            "includePromotedContent": False,
-            "withSuperFollowsUserFields": True,
-            "withBirdwatchPivots": False,
             "withDownvotePerspective": False,
             "withReactionsMetadata": False,
             "withReactionsPerspective": False,
-            "withSuperFollowsTweetFields": True,
-            "withClientEventToken": False,
-            "withBirdwatchNotes": False,
-            "withVoice": True,
-            "withV2Timeline": False,
-            "__fs_interactive_text": False,
-            "__fs_dont_mention_me_view_api_enabled": False,
         }
-
-        self._nsfw_warning = True
-        self._syndication = self.extractor.syndication
-        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+        self.features = {
+            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+            "responsive_web_graphql_exclude_directive_enabled": True,
+            "verified_phone_label_enabled": False,
+            "responsive_web_graphql_skip_user_profile_"
+            "image_extensions_enabled": False,
+            "responsive_web_graphql_timeline_navigation_enabled": True,
+        }
+        self.features_pagination = {
+            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+            "responsive_web_graphql_exclude_directive_enabled": True,
+            "verified_phone_label_enabled": False,
+            "responsive_web_graphql_timeline_navigation_enabled": True,
+            "responsive_web_graphql_skip_user_profile_"
+            "image_extensions_enabled": False,
+            "tweetypie_unmention_optimization_enabled": True,
+            "vibe_api_enabled": True,
+            "responsive_web_edit_tweet_api_enabled": True,
+            "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
+            "view_counts_everywhere_api_enabled": True,
+            "longform_notetweets_consumption_enabled": True,
+            "tweet_awards_web_tipping_enabled": False,
+            "freedom_of_speech_not_reach_fetch_enabled": False,
+            "standardized_nudges_misinfo": True,
+            "tweet_with_visibility_results_prefer_gql_"
+            "limited_actions_policy_enabled": False,
+            "interactive_text_enabled": True,
+            "responsive_web_text_conversations_enabled": False,
+            "longform_notetweets_richtext_consumption_enabled": False,
+            "responsive_web_enhance_cards_enabled": False,
+        }
 
     def tweet_detail(self, tweet_id):
-        endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
+        endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
         variables = {
             "focalTweetId": tweet_id,
+            "referrer": "profile",
             "with_rux_injections": False,
+            "includePromotedContent": True,
             "withCommunity": True,
             "withQuickPromoteEligibilityTweetFields": True,
             "withBirdwatchNotes": False,
+            "withSuperFollowsUserFields": True,
+            "withSuperFollowsTweetFields": True,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(
-            endpoint, variables, ("threaded_conversation_with_injections",))
+            endpoint, variables, ("threaded_conversation_with_injections_v2",))
 
     def user_tweets(self, screen_name):
-        endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets"
+        endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": True,
             "withQuickPromoteEligibilityTweetFields": True,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
     def user_tweets_and_replies(self, screen_name):
-        endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies"
+        endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": True,
             "withCommunity": True,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
     def user_media(self, screen_name):
-        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+        endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": False,
+            "withClientEventToken": False,
+            "withBirdwatchNotes": False,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
+    def user_media_legacy(self, screen_name):
+        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+        variables = {
+            "userId": self._user_id_by_screen_name(screen_name),
+            "count": 100,
+            "includePromotedContent": False,
+            "withSuperFollowsUserFields": True,
+            "withBirdwatchPivots": False,
+            "withSuperFollowsTweetFields": True,
+            "withClientEventToken": False,
+            "withBirdwatchNotes": False,
+            "withVoice": True,
+            "withV2Timeline": False,
+            "__fs_interactive_text": False,
+            "__fs_dont_mention_me_view_api_enabled": False,
+        }
+        return self._pagination_tweets(
+            endpoint, variables, ("user", "result", "timeline", "timeline"),
+            features=False)
+
     def user_likes(self, screen_name):
-        endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes"
+        endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": False,
+            "withClientEventToken": False,
+            "withBirdwatchNotes": False,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
     def user_bookmarks(self):
-        endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks"
+        endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks"
         variables = {
             "count": 100,
         }
@@ -1093,7 +1199,7 @@ class TwitterAPI():
             endpoint, variables, ("bookmark_timeline", "timeline"), False)
 
     def list_latest_tweets_timeline(self, list_id):
-        endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
+        endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline"
         variables = {
             "listId": list_id,
             "count": 100,
@@ -1128,18 +1234,21 @@ class TwitterAPI():
                 ["twitter_objects"]["live_events"][event_id])
 
     def list_by_rest_id(self, list_id):
-        endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId"
-        params = {"variables": self._json_dumps({
-            "listId": list_id,
-            "withSuperFollowsUserFields": True,
-        })}
+        endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId"
+        params = {
+            "variables": self._json_dumps({
+                "listId": list_id,
+                "withSuperFollowsUserFields": True,
+            }),
+            "features": self._json_dumps(self.features),
+        }
         try:
             return self._call(endpoint, params)["data"]["list"]
         except KeyError:
             raise exception.NotFoundError("list")
 
     def list_members(self, list_id):
-        endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers"
+        endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers"
         variables = {
             "listId": list_id,
             "count": 100,
@@ -1149,29 +1258,34 @@ class TwitterAPI():
             endpoint, variables, ("list", "members_timeline", "timeline"))
 
     def user_following(self, screen_name):
-        endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following"
+        endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": False,
         }
         return self._pagination_users(endpoint, variables)
 
     def user_by_rest_id(self, rest_id):
-        endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId"
-        params = {"variables": self._json_dumps({
-            "userId": rest_id,
-            "withSafetyModeUserFields": True,
-            "withSuperFollowsUserFields": True,
-        })}
+        endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId"
+        params = {
+            "variables": self._json_dumps({
+                "userId": rest_id,
+                "withSafetyModeUserFields": True,
+            }),
+            "features": self._json_dumps(self.features),
+        }
         return self._call(endpoint, params)["data"]["user"]["result"]
 
     def user_by_screen_name(self, screen_name):
-        endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName"
-        params = {"variables": self._json_dumps({
-            "screen_name": screen_name,
-            "withSafetyModeUserFields": True,
-            "withSuperFollowsUserFields": True,
-        })}
+        endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName"
+        params = {
+            "variables": self._json_dumps({
+                "screen_name": screen_name,
+                "withSafetyModeUserFields": True,
+            }),
+            "features": self._json_dumps(self.features),
+        }
         return self._call(endpoint, params)["data"]["user"]["result"]
 
     def _user_id_by_screen_name(self, screen_name):
@@ -1337,19 +1451,23 @@ class TwitterAPI():
             params["cursor"] = cursor
 
     def _pagination_tweets(self, endpoint, variables,
-                           path=None, stop_tweets=True):
+                           path=None, stop_tweets=True, features=True):
         extr = self.extractor
         variables.update(self.variables)
         original_retweets = (extr.retweets == "original")
         pinned_tweet = extr.pinned
 
+        params = {"variables": None}
+        if features:
+            params["features"] = self._json_dumps(self.features_pagination)
+
         while True:
-            params = {"variables": self._json_dumps(variables)}
+            params["variables"] = self._json_dumps(variables)
             data = self._call(endpoint, params)["data"]
 
             try:
                 if path is None:
-                    instructions = (data["user"]["result"]["timeline"]
+                    instructions = (data["user"]["result"]["timeline_v2"]
                                     ["timeline"]["instructions"])
                 else:
                     instructions = data
@@ -1487,10 +1605,12 @@ class TwitterAPI():
 
     def _pagination_users(self, endpoint, variables, path=None):
         variables.update(self.variables)
+        params = {"variables": None,
+                  "features" : self._json_dumps(self.features_pagination)}
 
         while True:
             cursor = entry = stop = None
-            params = {"variables": self._json_dumps(variables)}
+            params["variables"] = self._json_dumps(variables)
             data = self._call(endpoint, params)["data"]
 
             try: