diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 464 |
1 files changed, 128 insertions, 336 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 7b9a2e4..3895c74 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -15,7 +15,7 @@ import itertools import json import re -BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com" +BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:(?:[fv]x)?twitter|x)\.com" class TwitterExtractor(Extractor): @@ -24,14 +24,16 @@ class TwitterExtractor(Extractor): directory_fmt = ("{category}", "{user[name]}") filename_fmt = "{tweet_id}_{num}.{extension}" archive_fmt = "{tweet_id}_{retweet_id}_{num}" - cookiedomain = ".twitter.com" - cookienames = ("auth_token",) + cookies_domain = ".twitter.com" + cookies_names = ("auth_token",) root = "https://twitter.com" browser = "firefox" def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) + + def _init(self): self.textonly = self.config("text-tweets", False) self.retweets = self.config("retweets", False) self.replies = self.config("replies", True) @@ -275,6 +277,8 @@ class TwitterExtractor(Extractor): else: note = None + source = tweet["source"] + if "legacy" in tweet: tweet = tweet["legacy"] @@ -301,6 +305,7 @@ class TwitterExtractor(Extractor): "author" : author, "user" : self._user or author, "lang" : tweet["lang"], + "source" : text.extr(source, ">", "<"), "favorite_count": tget("favorite_count"), "quote_count" : tget("quote_count"), "reply_count" : tget("reply_count"), @@ -334,11 +339,18 @@ class TwitterExtractor(Extractor): tdata["reply_to"] = tweet["in_reply_to_screen_name"] if "quoted_by" in tweet: tdata["quote_by"] = tweet["quoted_by"] + if tdata["retweet_id"]: + tdata["date_original"] = text.parse_timestamp( + ((tdata["retweet_id"] >> 22) + 1288834974657) // 1000) return tdata def _transform_user(self, user): - uid = user.get("rest_id") or user["id_str"] + try: + uid = user.get("rest_id") or user["id_str"] + except KeyError: + # private/invalid user (#4349) + return {} try: return self._user_cache[uid] @@ -394,9 +406,12 @@ class TwitterExtractor(Extractor): def _users_result(self, users): userfmt = self.config("users") - if not userfmt or userfmt == "timeline": - cls = TwitterTimelineExtractor + if not userfmt or userfmt == "user": + cls = TwitterUserExtractor fmt = (self.root + "/i/user/{rest_id}").format_map + elif userfmt == "timeline": + cls = TwitterTimelineExtractor + fmt = (self.root + "/id:{rest_id}/timeline").format_map elif userfmt == "media": cls = TwitterMediaExtractor fmt = (self.root + "/id:{rest_id}/media").format_map @@ -455,37 +470,20 @@ class TwitterExtractor(Extractor): """Yield all relevant tweet objects""" def login(self): - if not self._check_cookies(self.cookienames): - username, password = self._get_auth_info() - if username: - self._update_cookies(_login_impl(self, username, password)) + if self.cookies_check(self.cookies_names): + return + username, password = self._get_auth_info() + if username: + self.cookies_update(_login_impl(self, username, password)) -class TwitterTimelineExtractor(TwitterExtractor): - """Extractor for a Twitter user timeline""" - subcategory = "timeline" + +class TwitterUserExtractor(TwitterExtractor): + """Extractor for a Twitter user""" + subcategory = "user" pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") - test = ( - ("https://twitter.com/supernaturepics", { - "range": "1-40", - "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", - }), - # suspended account (#2216) - ("https://twitter.com/OptionalTypo", { - "exception": exception.NotFoundError, - }), - # suspended account user ID - ("https://twitter.com/id:772949683521978368", { - "exception": exception.NotFoundError, - }), - ("https://mobile.twitter.com/supernaturepics?p=i"), - ("https://www.twitter.com/id:2976459548"), - ("https://twitter.com/i/user/2976459548"), - ("https://twitter.com/intent/user?user_id=2976459548"), - ("https://fxtwitter.com/supernaturepics"), - ("https://vxtwitter.com/supernaturepics"), - ) + example = "https://twitter.com/USER" def __init__(self, match): TwitterExtractor.__init__(self, match) @@ -493,6 +491,28 @@ class TwitterTimelineExtractor(TwitterExtractor): if user_id: self.user = "id:" + user_id + def initialize(self): + pass + + def items(self): + base = "{}/{}/".format(self.root, self.user) + return self._dispatch_extractors(( + (TwitterAvatarExtractor , base + "photo"), + (TwitterBackgroundExtractor, base + "header_photo"), + (TwitterTimelineExtractor , base + "timeline"), + (TwitterTweetsExtractor , base + "tweets"), + (TwitterMediaExtractor , base + "media"), + (TwitterRepliesExtractor , base + "with_replies"), + (TwitterLikesExtractor , base + "likes"), + ), ("timeline",)) + + +class TwitterTimelineExtractor(TwitterExtractor): + """Extractor for a Twitter user timeline""" + subcategory = "timeline" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)" + example = "https://twitter.com/USER/timeline" + def tweets(self): # yield initial batch of (media) tweets tweet = None @@ -536,14 +556,7 @@ class TwitterTweetsExtractor(TwitterExtractor): """Extractor for Tweets from a user's Tweets timeline""" subcategory = "tweets" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)" - test = ( - ("https://twitter.com/supernaturepics/tweets", { - "range": "1-40", - "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", - }), - ("https://mobile.twitter.com/supernaturepics/tweets#t"), - ("https://www.twitter.com/id:2976459548/tweets"), - ) + example = "https://twitter.com/USER/tweets" def tweets(self): return self.api.user_tweets(self.user) @@ -553,14 +566,7 @@ class TwitterRepliesExtractor(TwitterExtractor): """Extractor for Tweets from a user's timeline including replies""" subcategory = "replies" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)" - test = ( - ("https://twitter.com/supernaturepics/with_replies", { - "range": "1-40", - "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", - }), - ("https://mobile.twitter.com/supernaturepics/with_replies#t"), - ("https://www.twitter.com/id:2976459548/with_replies"), - ) + example = "https://twitter.com/USER/with_replies" def tweets(self): return self.api.user_tweets_and_replies(self.user) @@ -570,14 +576,7 @@ class TwitterMediaExtractor(TwitterExtractor): """Extractor for Tweets from a user's Media timeline""" subcategory = "media" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)" - test = ( - ("https://twitter.com/supernaturepics/media", { - "range": "1-40", - "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", - }), - ("https://mobile.twitter.com/supernaturepics/media#t"), - ("https://www.twitter.com/id:2976459548/media"), - ) + example = "https://twitter.com/USER/media" def tweets(self): return self.api.user_media(self.user) @@ -587,7 +586,7 @@ class TwitterLikesExtractor(TwitterExtractor): """Extractor for liked tweets""" subcategory = "likes" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)" - test = ("https://twitter.com/supernaturepics/likes",) + example = "https://twitter.com/USER/likes" def metadata(self): return {"user_likes": self.user} @@ -598,7 +597,7 @@ class TwitterLikesExtractor(TwitterExtractor): def _transform_tweet(self, tweet): tdata = TwitterExtractor._transform_tweet(self, tweet) tdata["date_liked"] = text.parse_timestamp( - (int(tweet["sortIndex"]) >> 20) // 1000) + (int(tweet["sortIndex"] or 0) >> 20) // 1000) return tdata @@ -606,7 +605,7 @@ class TwitterBookmarkExtractor(TwitterExtractor): """Extractor for bookmarked tweets""" subcategory = "bookmark" pattern = BASE_PATTERN + r"/i/bookmarks()" - test = ("https://twitter.com/i/bookmarks",) + example = "https://twitter.com/i/bookmarks" def tweets(self): return self.api.user_bookmarks() @@ -614,7 +613,7 @@ class TwitterBookmarkExtractor(TwitterExtractor): def _transform_tweet(self, tweet): tdata = TwitterExtractor._transform_tweet(self, tweet) tdata["date_bookmarked"] = text.parse_timestamp( - (int(tweet["sortIndex"]) >> 20) // 1000) + (int(tweet["sortIndex"] or 0) >> 20) // 1000) return tdata @@ -622,11 +621,7 @@ class TwitterListExtractor(TwitterExtractor): """Extractor for Twitter lists""" subcategory = "list" pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$" - test = ("https://twitter.com/i/lists/784214683683127296", { - "range": "1-40", - "count": 40, - "archive": False, - }) + example = "https://twitter.com/i/lists/12345" def tweets(self): return self.api.list_latest_tweets_timeline(self.user) @@ -636,11 +631,7 @@ class TwitterListMembersExtractor(TwitterExtractor): """Extractor for members of a Twitter list""" subcategory = "list-members" pattern = BASE_PATTERN + r"/i/lists/(\d+)/members" - test = ("https://twitter.com/i/lists/784214683683127296/members", { - "pattern": TwitterTimelineExtractor.pattern, - "range": "1-40", - "count": 40, - }) + example = "https://twitter.com/i/lists/12345/members" def items(self): self.login() @@ -651,10 +642,7 @@ class TwitterFollowingExtractor(TwitterExtractor): """Extractor for followed users""" subcategory = "following" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)" - test = ( - ("https://twitter.com/supernaturepics/following"), - ("https://www.twitter.com/id:2976459548/following"), - ) + example = "https://twitter.com/USER/following" def items(self): self.login() @@ -665,11 +653,7 @@ class TwitterSearchExtractor(TwitterExtractor): """Extractor for Twitter search results""" subcategory = "search" pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)" - test = ("https://twitter.com/search?q=nature", { - "range": "1-20", - "count": 20, - "archive": False, - }) + example = "https://twitter.com/search?q=QUERY" def metadata(self): return {"search": text.unquote(self.user)} @@ -700,10 +684,7 @@ class TwitterHashtagExtractor(TwitterExtractor): """Extractor for Twitter hashtags""" subcategory = "hashtag" pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)" - test = ("https://twitter.com/hashtag/nature", { - "pattern": TwitterSearchExtractor.pattern, - "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9", - }) + example = "https://twitter.com/hashtag/NAME" def items(self): url = "{}/search?q=%23{}".format(self.root, self.user) @@ -717,10 +698,7 @@ class TwitterEventExtractor(TwitterExtractor): directory_fmt = ("{category}", "Events", "{event[id]} {event[short_title]}") pattern = BASE_PATTERN + r"/i/events/(\d+)" - test = ("https://twitter.com/i/events/1484669206993903616", { - "range": "1-20", - "count": ">=1", - }) + example = "https://twitter.com/i/events/12345" def metadata(self): return {"event": self.api.live_event(self.user)} @@ -733,186 +711,7 @@ class TwitterTweetExtractor(TwitterExtractor): """Extractor for images from individual tweets""" subcategory = "tweet" pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" - test = ( - ("https://twitter.com/supernaturepics/status/604341487988576256", { - "url": "88a40f7d25529c2501c46f2218f9e0de9aa634b4", - "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab", - }), - # 4 images - ("https://twitter.com/perrypumas/status/894001459754180609", { - "url": "3a2a43dc5fb79dd5432c701d8e55e87c4e551f47", - }), - # video - ("https://twitter.com/perrypumas/status/1065692031626829824", { - "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5", - }), - # content with emoji, newlines, hashtags (#338) - ("https://twitter.com/playpokemon/status/1263832915173048321", { - "keyword": {"content": ( - r"re:Gear up for #PokemonSwordShieldEX with special Mystery " - "Gifts! \n\nYou’ll be able to receive four Galarian form " - "Pokémon with Hidden Abilities, plus some very useful items. " - "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ " - )}, - }), - # Reply to deleted tweet (#403, #838) - ("https://twitter.com/i/web/status/1170041925560258560", { - "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_", - }), - # 'replies' option (#705) - ("https://twitter.com/i/web/status/1170041925560258560", { - "options": (("replies", False),), - "count": 0, - }), - # 'replies' to self (#1254) - ("https://twitter.com/i/web/status/1424882930803908612", { - "options": (("replies", "self"),), - "count": 4, - "keyword": {"user": { - "description": "re:business email-- rhettaro.bloom@gmail.com " - "patreon- http://patreon.com/Princecanary", - "url": "http://princecanary.tumblr.com", - }}, - }), - ("https://twitter.com/i/web/status/1424898916156284928", { - "options": (("replies", "self"),), - "count": 1, - }), - # "quoted" option (#854) - ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { - "options": (("quoted", True),), - "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg", - "count": 8, - }), - # quoted tweet (#526, #854) - ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { - "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg", - "count": 4, - }), - # different 'user' and 'author' in quoted Tweet (#3922) - ("https://twitter.com/web/status/1644907989109751810", { - "keyword": { - "author": {"id": 321629993 , "name": "Cakes_Comics"}, - "user" : {"id": 718928225360080897, "name": "StobiesGalaxy"}, - }, - }), - # TwitPic embeds (#579) - ("https://twitter.com/i/web/status/112900228289540096", { - "options": (("twitpic", True), ("cards", False)), - "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg", - "count": 2, # 1 duplicate - }), - # TwitPic URL not in 'urls' (#3792) - ("https://twitter.com/shimoigusaP/status/8138669971", { - "options": (("twitpic", True),), - "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png", - "count": 1, - }), - # Twitter card (#1005) - ("https://twitter.com/billboard/status/1306599586602135555", { - "options": (("cards", True),), - "pattern": r"https://pbs.twimg.com/card_img/\d+/", - }), - # unified_card image_website (#2875) - ("https://twitter.com/i/web/status/1561674543323910144", { - "options": (("cards", True),), - "pattern": r"https://pbs\.twimg\.com/media/F.+=jpg", - }), - # unified_card image_carousel_website - ("https://twitter.com/doax_vv_staff/status/1479438945662685184", { - "options": (("cards", True),), - "pattern": r"https://pbs\.twimg\.com/media/F.+=png", - "count": 6, - }), - # unified_card video_website (#2875) - ("https://twitter.com/bang_dream_1242/status/1561548715348746241", { - "options": (("cards", True),), - "pattern": r"https://video\.twimg\.com/amplify_video" - r"/1560607284333449216/vid/720x720/\w+\.mp4", - }), - # unified_card without type - ("https://twitter.com/i/web/status/1466183847628865544", { - "count": 0, - }), - # 'cards-blacklist' option - ("https://twitter.com/i/web/status/1571141912295243776", { - "options": (("cards", "ytdl"), - ("cards-blacklist", ("twitch.tv",))), - "count": 0, - }), - # retweet - ("https://twitter.com/jessica_3978/status/1296304589591810048", { - "options": (("retweets", True),), - "count": 2, - "keyword": { - "tweet_id" : 1296304589591810048, - "retweet_id": 1296296016002547713, - "date" : "dt:2020-08-20 04:34:32", - }, - }), - # original retweets (#1026) - ("https://twitter.com/jessica_3978/status/1296304589591810048", { - "options": (("retweets", "original"),), - "count": 2, - "keyword": { - "tweet_id" : 1296296016002547713, - "retweet_id": 1296296016002547713, - "date" : "dt:2020-08-20 04:00:28", - }, - }), - # all Tweets from a 'conversation' (#1319) - ("https://twitter.com/supernaturepics/status/604341487988576256", { - "options": (("conversations", True),), - "count": 5, - }), - # retweet with missing media entities (#1555) - ("https://twitter.com/morino_ya/status/1392763691599237121", { - "options": (("retweets", True),), - "count": 0, # private - }), - # deleted quote tweet (#2225) - ("https://twitter.com/i/web/status/1460044411165888515", { - "count": 0, - }), - # "Misleading" content - ("https://twitter.com/i/web/status/1486373748911575046", { - "count": 4, - }), - # age-restricted (#2354) - ("https://twitter.com/mightbecursed/status/1492954264909479936", { - "options": (("syndication", True),), - "keyword": {"date": "dt:2022-02-13 20:10:09"}, - "count": 1, - }), - # media alt texts / descriptions (#2617) - ("https://twitter.com/my0nruri/status/1528379296041299968", { - "keyword": {"description": "oc"} - }), - # '?format=...&name=...'-style URLs - ("https://twitter.com/poco_dandy/status/1150646424461176832", { - "options": (("cards", True),), - "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+" - r"\?format=(jpg|png)&name=orig$", - "range": "1-2", - }), - # note tweet with long 'content' - ("https://twitter.com/i/web/status/1629193457112686592", { - "keyword": { - "content": """\ -BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \ -just contradicted federal government regulators, saying that toxic air \ -pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \ -Washington Post writes, "Three weeks after the toxic train derailment in \ -Ohio, an analysis of Environmental Protection Agency data has found nine air \ -pollutants at levels that could raise long-term health concerns in and around \ -East Palestine, according to an independent analysis. \n\n\"The analysis by \ -Texas A&M University seems to contradict statements by state and federal \ -regulators that air near the crash site is completely safe, despite residents \ -complaining about rashes, breathing problems and other health effects." \ -Your reaction.""", - }, - }), - ) + example = "https://twitter.com/USER/status/12345" def __init__(self, match): TwitterExtractor.__init__(self, match) @@ -923,21 +722,49 @@ Your reaction.""", if conversations: self._accessible = (conversations == "accessible") return self._tweets_conversation(self.tweet_id) - else: - return self._tweets_single(self.tweet_id) - def _tweets_single(self, tweet_id): - tweets = [] + endpoint = self.config("tweet-endpoint") + if endpoint == "detail" or endpoint in (None, "auto") and \ + self.api.headers["x-twitter-auth-type"]: + return self._tweets_detail(self.tweet_id) + return self._tweets_single(self.tweet_id) + + def _tweets_single(self, tweet_id): tweet = self.api.tweet_result_by_rest_id(tweet_id) - self._assign_user(tweet["core"]["user_results"]["result"]) + + try: + self._assign_user(tweet["core"]["user_results"]["result"]) + except KeyError: + raise exception.StopExtraction( + "'%s'", tweet.get("reason") or "Unavailable") + + yield tweet + + if not self.quoted: + return while True: - tweets.append(tweet) tweet_id = tweet["legacy"].get("quoted_status_id_str") if not tweet_id: break tweet = self.api.tweet_result_by_rest_id(tweet_id) + tweet["legacy"]["quoted_by_id_str"] = tweet_id + yield tweet + + def _tweets_detail(self, tweet_id): + tweets = [] + + for tweet in self.api.tweet_detail(tweet_id): + if tweet["rest_id"] == tweet_id or \ + tweet.get("_retweet_id_str") == tweet_id: + if self._user_obj is None: + self._assign_user(tweet["core"]["user_results"]["result"]) + tweets.append(tweet) + + tweet_id = tweet["legacy"].get("quoted_status_id_str") + if not tweet_id: + break return tweets @@ -965,21 +792,7 @@ class TwitterAvatarExtractor(TwitterExtractor): filename_fmt = "avatar {date}.{extension}" archive_fmt = "AV_{user[id]}_{date}" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo" - test = ( - ("https://twitter.com/supernaturepics/photo", { - "pattern": r"https://pbs\.twimg\.com/profile_images" - r"/554585280938659841/FLVAlX18\.jpeg", - "keyword": { - "date": "dt:2015-01-12 10:26:49", - "extension": "jpeg", - "filename": "FLVAlX18", - "tweet_id": 554585280938659841, - }, - }), - ("https://twitter.com/User16/photo", { - "count": 0, - }), - ) + example = "https://twitter.com/USER/photo" def tweets(self): self.api._user_id_by_screen_name(self.user) @@ -1001,20 +814,7 @@ class TwitterBackgroundExtractor(TwitterExtractor): filename_fmt = "background {date}.{extension}" archive_fmt = "BG_{user[id]}_{date}" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo" - test = ( - ("https://twitter.com/supernaturepics/header_photo", { - "pattern": r"https://pbs\.twimg\.com/profile_banners" - r"/2976459548/1421058583", - "keyword": { - "date": "dt:2015-01-12 10:29:43", - "filename": "1421058583", - "tweet_id": 554586009367478272, - }, - }), - ("https://twitter.com/User16/header_photo", { - "count": 0, - }), - ) + example = "https://twitter.com/USER/header_photo" def tweets(self): self.api._user_id_by_screen_name(self.user) @@ -1034,13 +834,7 @@ class TwitterImageExtractor(Extractor): category = "twitter" subcategory = "image" pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)" - test = ( - ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", { - "options": (("size", "4096x4096,orig"),), - "url": "cb3042a6f6826923da98f0d2b66c427e9385114c", - }), - ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"), - ) + example = "https://pbs.twimg.com/media/ABCDE?format=jpg&name=orig" def __init__(self, match): Extractor.__init__(self, match) @@ -1071,23 +865,19 @@ class TwitterAPI(): self._syndication = self.extractor.syndication self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode - cookies = extractor.session.cookies - cookiedomain = extractor.cookiedomain + cookies = extractor.cookies + cookies_domain = extractor.cookies_domain csrf = extractor.config("csrf") if csrf is None or csrf == "cookies": - csrf_token = cookies.get("ct0", domain=cookiedomain) + csrf_token = cookies.get("ct0", domain=cookies_domain) else: csrf_token = None if not csrf_token: csrf_token = util.generate_token() - cookies.set("ct0", csrf_token, domain=cookiedomain) + cookies.set("ct0", csrf_token, domain=cookies_domain) - auth_token = cookies.get("auth_token", domain=cookiedomain) - - search = extractor.config("search-endpoint") - if search == "rest": - self.search_timeline = self.search_adaptive + auth_token = cookies.get("auth_token", domain=cookies_domain) self.headers = { "Accept": "*/*", @@ -1216,7 +1006,19 @@ class TwitterAPI(): "withArticleRichContentState": False, }), } - return self._call(endpoint, params)["data"]["tweetResult"]["result"] + tweet = self._call(endpoint, params)["data"]["tweetResult"]["result"] + if "tweet" in tweet: + tweet = tweet["tweet"] + + if tweet.get("__typename") == "TweetUnavailable": + reason = tweet.get("reason") + if reason == "NsfwLoggedOut": + raise exception.AuthorizationError("NSFW Tweet") + if reason == "Protected": + raise exception.AuthorizationError("Protected Tweet") + raise exception.StopExtraction("Tweet unavailable ('%s')", reason) + + return tweet def tweet_detail(self, tweet_id): endpoint = "/graphql/JlLZj42Ltr2qwjasw-l5lQ/TweetDetail" @@ -1324,16 +1126,6 @@ class TwitterAPI(): return self._pagination_tweets( endpoint, variables, ("list", "tweets_timeline", "timeline")) - def search_adaptive(self, query): - endpoint = "/2/search/adaptive.json" - params = self.params.copy() - params["q"] = query - params["tweet_search_mode"] = "live" - params["query_source"] = "typed_query" - params["pc"] = "1" - params["spelling_corrections"] = "1" - return self._pagination_legacy(endpoint, params) - def search_timeline(self, query): endpoint = "/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline" variables = { @@ -1453,8 +1245,8 @@ class TwitterAPI(): guest_token = self._guest_token() if guest_token != self.headers["x-guest-token"]: self.headers["x-guest-token"] = guest_token - self.extractor.session.cookies.set( - "gt", guest_token, domain=self.extractor.cookiedomain) + self.extractor.cookies.set( + "gt", guest_token, domain=self.extractor.cookies_domain) def _call(self, endpoint, params, method="GET", auth=True, root=None): url = (root or self.root) + endpoint @@ -1647,8 +1439,8 @@ class TwitterAPI(): if user.get("blocked_by"): if self.headers["x-twitter-auth-type"] and \ extr.config("logout"): - extr._cookiefile = None - del extr.session.cookies["auth_token"] + extr.cookies_file = None + del extr.cookies["auth_token"] self.headers["x-twitter-auth-type"] = None extr.log.info("Retrying API request as guest") continue @@ -1902,7 +1694,7 @@ def _login_impl(extr, username, password): extr.log.debug(response.text) raise exception.AuthenticationError(", ".join(errors)) - extr.session.cookies.clear() + extr.cookies.clear() api = TwitterAPI(extr) api._authenticate_guest() headers = api.headers @@ -2042,5 +1834,5 @@ def _login_impl(extr, username, password): return { cookie.name: cookie.value - for cookie in extr.session.cookies + for cookie in extr.cookies } |
