diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 124 |
1 files changed, 88 insertions, 36 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 89d96d7..2ccc7e5 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -105,6 +105,10 @@ class TwitterExtractor(Extractor): continue seen_tweets.add(data["id_str"]) + if "withheld_scope" in data: + txt = data.get("full_text") or data.get("text") or "" + self.log.warning("'%s' (%s)", txt, data["id_str"]) + files = [] if "extended_entities" in data: self._extract_media( @@ -256,19 +260,26 @@ class TwitterExtractor(Extractor): if "legacy" in tweet: tweet = tweet["legacy"] + tweet_id = int(tweet["id_str"]) + if tweet_id >= 300000000000000: + date = text.parse_timestamp( + ((tweet_id >> 22) + 1288834974657) // 1000) + else: + date = text.parse_datetime( + tweet["created_at"], "%a %b %d %H:%M:%S %z %Y") + tget = tweet.get tdata = { - "tweet_id" : text.parse_int(tweet["id_str"]), + "tweet_id" : tweet_id, "retweet_id" : text.parse_int( tget("retweeted_status_id_str")), "quote_id" : text.parse_int( tget("quoted_by_id_str")), "reply_id" : text.parse_int( tget("in_reply_to_status_id_str")), - "date" : text.parse_datetime( - tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"), - "user" : self._user or author, + "date" : date, "author" : author, + "user" : self._user or author, "lang" : tweet["lang"], "favorite_count": tget("favorite_count"), "quote_count" : tget("quote_count"), @@ -321,8 +332,10 @@ class TwitterExtractor(Extractor): user = self.api.user_by_screen_name(user["screen_name"])["legacy"] uget = user.get - entities = user["entities"] + if uget("withheld_scope"): + self.log.warning("'%s'", uget("description")) + entities = user["entities"] self._user_cache[uid] = udata = { "id" : text.parse_int(uid), "name" : user["screen_name"], @@ -398,10 +411,8 @@ class TwitterExtractor(Extractor): except Exception: yield tweet - def _make_tweet(self, user, id_str, url, timestamp): + def _make_tweet(self, user, url, id_str): return { - "created_at": text.parse_timestamp(timestamp).strftime( - "%a %b %d %H:%M:%S +0000 %Y"), "id_str": id_str, "lang": None, "user": user, @@ -564,6 +575,12 @@ class TwitterLikesExtractor(TwitterExtractor): def tweets(self): return self.api.user_likes(self.user) + def _transform_tweet(self, tweet): + tdata = TwitterExtractor._transform_tweet(self, tweet) + tdata["date_liked"] = text.parse_timestamp( + (int(tweet["sortIndex"]) >> 20) // 1000) + return tdata + class TwitterBookmarkExtractor(TwitterExtractor): """Extractor for bookmarked tweets""" @@ -574,6 +591,12 @@ class TwitterBookmarkExtractor(TwitterExtractor): def tweets(self): return self.api.user_bookmarks() + def _transform_tweet(self, tweet): + tdata = TwitterExtractor._transform_tweet(self, tweet) + tdata["date_bookmarked"] = text.parse_timestamp( + (int(tweet["sortIndex"]) >> 20) // 1000) + return tdata + class TwitterListExtractor(TwitterExtractor): """Extractor for Twitter lists""" @@ -593,7 +616,11 @@ class TwitterListMembersExtractor(TwitterExtractor): """Extractor for members of a Twitter list""" subcategory = "list-members" pattern = BASE_PATTERN + r"/i/lists/(\d+)/members" - test = ("https://twitter.com/i/lists/784214683683127296/members",) + test = ("https://twitter.com/i/lists/784214683683127296/members", { + "pattern": TwitterTimelineExtractor.pattern, + "range": "1-40", + "count": 40, + }) def items(self): self.login() @@ -780,6 +807,16 @@ class TwitterTweetExtractor(TwitterExtractor): ("cards-blacklist", ("twitch.tv",))), "count": 0, }), + # retweet + ("https://twitter.com/jessica_3978/status/1296304589591810048", { + "options": (("retweets", True),), + "count": 2, + "keyword": { + "tweet_id" : 1296304589591810048, + "retweet_id": 1296296016002547713, + "date" : "dt:2020-08-20 04:34:32", + }, + }), # original retweets (#1026) ("https://twitter.com/jessica_3978/status/1296304589591810048", { "options": (("retweets", "original"),), @@ -915,9 +952,8 @@ class TwitterAvatarExtractor(TwitterExtractor): url = url.replace("_normal.", ".") id_str = url.rsplit("/", 2)[1] - timestamp = ((int(id_str) >> 22) + 1288834974657) // 1000 - return (self._make_tweet(user, id_str, url, timestamp),) + return (self._make_tweet(user, url, id_str),) class TwitterBackgroundExtractor(TwitterExtractor): @@ -932,7 +968,7 @@ class TwitterBackgroundExtractor(TwitterExtractor): "keyword": { "date": "dt:2015-01-12 10:29:43", "filename": "1421058583", - "tweet_id": 0, + "tweet_id": 554586009367478272, }, }), ("https://twitter.com/User16/header_photo", { @@ -950,7 +986,8 @@ class TwitterBackgroundExtractor(TwitterExtractor): except (KeyError, ValueError): return () - return (self._make_tweet(user, None, url, timestamp),) + id_str = str((int(timestamp) * 1000 - 1288834974657) << 22) + return (self._make_tweet(user, url, id_str),) class TwitterImageExtractor(Extractor): @@ -1008,9 +1045,6 @@ class TwitterAPI(): auth_token = cookies.get("auth_token", domain=cookiedomain) - if not auth_token: - self.user_media = self.user_media_legacy - self.headers = { "Accept": "*/*", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR" @@ -1071,6 +1105,7 @@ class TwitterAPI(): "withReactionsPerspective": False, } self.features = { + "blue_business_profile_image_shape_enabled": False, "responsive_web_twitter_blue_verified_badge_is_enabled": True, "responsive_web_graphql_exclude_directive_enabled": True, "verified_phone_label_enabled": False, @@ -1079,6 +1114,7 @@ class TwitterAPI(): "responsive_web_graphql_timeline_navigation_enabled": True, } self.features_pagination = { + "blue_business_profile_image_shape_enabled": False, "responsive_web_twitter_blue_verified_badge_is_enabled": True, "responsive_web_graphql_exclude_directive_enabled": True, "verified_phone_label_enabled": False, @@ -1103,7 +1139,7 @@ class TwitterAPI(): } def tweet_detail(self, tweet_id): - endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail" + endpoint = "/graphql/AV_lPTkN6Fc6LgerQpK8Zg/TweetDetail" variables = { "focalTweetId": tweet_id, "referrer": "profile", @@ -1121,7 +1157,7 @@ class TwitterAPI(): endpoint, variables, ("threaded_conversation_with_injections_v2",)) def user_tweets(self, screen_name): - endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets" + endpoint = "/graphql/BeHK76TOCY3P8nO-FWocjA/UserTweets" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, @@ -1133,7 +1169,7 @@ class TwitterAPI(): return self._pagination_tweets(endpoint, variables) def user_tweets_and_replies(self, screen_name): - endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies" + endpoint = "/graphql/eZVlZu_1gwb6hMUDXBnZoQ/UserTweetsAndReplies" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, @@ -1145,7 +1181,7 @@ class TwitterAPI(): return self._pagination_tweets(endpoint, variables) def user_media(self, screen_name): - endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia" + endpoint = "/graphql/d_ONZLUHGCsErBCriRsLXg/UserMedia" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, @@ -1178,7 +1214,7 @@ class TwitterAPI(): features=False) def user_likes(self, screen_name): - endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes" + endpoint = "/graphql/fN4-E0MjFJ9Cn7IYConL7g/Likes" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, @@ -1191,15 +1227,18 @@ class TwitterAPI(): return self._pagination_tweets(endpoint, variables) def user_bookmarks(self): - endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks" + endpoint = "/graphql/RV1g3b8n_SGOHwkqKYSCFw/Bookmarks" variables = { "count": 100, } + features = self.features_pagination.copy() + features["graphql_timeline_v2_bookmark_timeline"] = True return self._pagination_tweets( - endpoint, variables, ("bookmark_timeline", "timeline"), False) + endpoint, variables, ("bookmark_timeline_v2", "timeline"), False, + features=features) def list_latest_tweets_timeline(self, list_id): - endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline" + endpoint = "/graphql/5DAiJG3bD77SiWEs4xViBw/ListLatestTweetsTimeline" variables = { "listId": list_id, "count": 100, @@ -1234,7 +1273,7 @@ class TwitterAPI(): ["twitter_objects"]["live_events"][event_id]) def list_by_rest_id(self, list_id): - endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId" + endpoint = "/graphql/D0EoyrDcct2MEqC-LnPzFg/ListByRestId" params = { "variables": self._json_dumps({ "listId": list_id, @@ -1248,7 +1287,7 @@ class TwitterAPI(): raise exception.NotFoundError("list") def list_members(self, list_id): - endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers" + endpoint = "/graphql/tzsIIbGUH9RyFCVmtO2W2w/ListMembers" variables = { "listId": list_id, "count": 100, @@ -1258,7 +1297,7 @@ class TwitterAPI(): endpoint, variables, ("list", "members_timeline", "timeline")) def user_following(self, screen_name): - endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following" + endpoint = "/graphql/FaBzCqZXuQCb4PhB0RHqHw/Following" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, @@ -1267,7 +1306,7 @@ class TwitterAPI(): return self._pagination_users(endpoint, variables) def user_by_rest_id(self, rest_id): - endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId" + endpoint = "/graphql/S2BkcAyFMG--jef2N6Dgzw/UserByRestId" params = { "variables": self._json_dumps({ "userId": rest_id, @@ -1278,7 +1317,7 @@ class TwitterAPI(): return self._call(endpoint, params)["data"]["user"]["result"] def user_by_screen_name(self, screen_name): - endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName" + endpoint = "/graphql/k26ASEiniqy4eXMdknTSoQ/UserByScreenName" params = { "variables": self._json_dumps({ "screen_name": screen_name, @@ -1451,15 +1490,17 @@ class TwitterAPI(): params["cursor"] = cursor def _pagination_tweets(self, endpoint, variables, - path=None, stop_tweets=True, features=True): + path=None, stop_tweets=True, features=None): extr = self.extractor variables.update(self.variables) original_retweets = (extr.retweets == "original") pinned_tweet = extr.pinned params = {"variables": None} + if features is None: + features = self.features_pagination if features: - params["features"] = self._json_dumps(self.features_pagination) + params["features"] = self._json_dumps(features) while True: params["variables"] = self._json_dumps(variables) @@ -1550,6 +1591,7 @@ class TwitterAPI(): if "tweet" in tweet: tweet = tweet["tweet"] legacy = tweet["legacy"] + tweet["sortIndex"] = entry.get("sortIndex") except KeyError: extr.log.debug( "Skipping %s (deleted)", @@ -1574,10 +1616,17 @@ class TwitterAPI(): retweet["rest_id"] tweet["author"] = \ retweet["core"]["user_results"]["result"] - if "extended_entities" in retweet["legacy"] and \ + + rtlegacy = retweet["legacy"] + if "extended_entities" in rtlegacy and \ "extended_entities" not in legacy: legacy["extended_entities"] = \ - retweet["legacy"]["extended_entities"] + rtlegacy["extended_entities"] + if "withheld_scope" in rtlegacy and \ + "withheld_scope" not in legacy: + legacy["withheld_scope"] = \ + rtlegacy["withheld_scope"] + legacy["full_text"] = rtlegacy["full_text"] except KeyError: pass @@ -1590,6 +1639,8 @@ class TwitterAPI(): tweet["core"]["user_results"]["result"] ["legacy"]["screen_name"]) quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"] + quoted["sortIndex"] = entry.get("sortIndex") + yield quoted except KeyError: extr.log.debug( @@ -1679,9 +1730,10 @@ class TwitterAPI(): "in_reply_to_status_id_str" not in tweet: tweet["conversation_id_str"] = tweet["id_str"] - tweet["created_at"] = text.parse_datetime( - tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime( - "%a %b %d %H:%M:%S +0000 %Y") + if int(tweet_id) < 300000000000000: + tweet["created_at"] = text.parse_datetime( + tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime( + "%a %b %d %H:%M:%S +0000 %Y") if "video" in tweet: video = tweet["video"] |
