aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py464
1 files changed, 128 insertions, 336 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7b9a2e4..3895c74 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,7 +15,7 @@ import itertools
import json
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:(?:[fv]x)?twitter|x)\.com"
class TwitterExtractor(Extractor):
@@ -24,14 +24,16 @@ class TwitterExtractor(Extractor):
directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
- cookiedomain = ".twitter.com"
- cookienames = ("auth_token",)
+ cookies_domain = ".twitter.com"
+ cookies_names = ("auth_token",)
root = "https://twitter.com"
browser = "firefox"
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
+
+ def _init(self):
self.textonly = self.config("text-tweets", False)
self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True)
@@ -275,6 +277,8 @@ class TwitterExtractor(Extractor):
else:
note = None
+ source = tweet["source"]
+
if "legacy" in tweet:
tweet = tweet["legacy"]
@@ -301,6 +305,7 @@ class TwitterExtractor(Extractor):
"author" : author,
"user" : self._user or author,
"lang" : tweet["lang"],
+ "source" : text.extr(source, ">", "<"),
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
"reply_count" : tget("reply_count"),
@@ -334,11 +339,18 @@ class TwitterExtractor(Extractor):
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
if "quoted_by" in tweet:
tdata["quote_by"] = tweet["quoted_by"]
+ if tdata["retweet_id"]:
+ tdata["date_original"] = text.parse_timestamp(
+ ((tdata["retweet_id"] >> 22) + 1288834974657) // 1000)
return tdata
def _transform_user(self, user):
- uid = user.get("rest_id") or user["id_str"]
+ try:
+ uid = user.get("rest_id") or user["id_str"]
+ except KeyError:
+ # private/invalid user (#4349)
+ return {}
try:
return self._user_cache[uid]
@@ -394,9 +406,12 @@ class TwitterExtractor(Extractor):
def _users_result(self, users):
userfmt = self.config("users")
- if not userfmt or userfmt == "timeline":
- cls = TwitterTimelineExtractor
+ if not userfmt or userfmt == "user":
+ cls = TwitterUserExtractor
fmt = (self.root + "/i/user/{rest_id}").format_map
+ elif userfmt == "timeline":
+ cls = TwitterTimelineExtractor
+ fmt = (self.root + "/id:{rest_id}/timeline").format_map
elif userfmt == "media":
cls = TwitterMediaExtractor
fmt = (self.root + "/id:{rest_id}/media").format_map
@@ -455,37 +470,20 @@ class TwitterExtractor(Extractor):
"""Yield all relevant tweet objects"""
def login(self):
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(_login_impl(self, username, password))
+ if self.cookies_check(self.cookies_names):
+ return
+ username, password = self._get_auth_info()
+ if username:
+ self.cookies_update(_login_impl(self, username, password))
-class TwitterTimelineExtractor(TwitterExtractor):
- """Extractor for a Twitter user timeline"""
- subcategory = "timeline"
+
+class TwitterUserExtractor(TwitterExtractor):
+ """Extractor for a Twitter user"""
+ subcategory = "user"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
- test = (
- ("https://twitter.com/supernaturepics", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- # suspended account (#2216)
- ("https://twitter.com/OptionalTypo", {
- "exception": exception.NotFoundError,
- }),
- # suspended account user ID
- ("https://twitter.com/id:772949683521978368", {
- "exception": exception.NotFoundError,
- }),
- ("https://mobile.twitter.com/supernaturepics?p=i"),
- ("https://www.twitter.com/id:2976459548"),
- ("https://twitter.com/i/user/2976459548"),
- ("https://twitter.com/intent/user?user_id=2976459548"),
- ("https://fxtwitter.com/supernaturepics"),
- ("https://vxtwitter.com/supernaturepics"),
- )
+ example = "https://twitter.com/USER"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
@@ -493,6 +491,28 @@ class TwitterTimelineExtractor(TwitterExtractor):
if user_id:
self.user = "id:" + user_id
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (TwitterAvatarExtractor , base + "photo"),
+ (TwitterBackgroundExtractor, base + "header_photo"),
+ (TwitterTimelineExtractor , base + "timeline"),
+ (TwitterTweetsExtractor , base + "tweets"),
+ (TwitterMediaExtractor , base + "media"),
+ (TwitterRepliesExtractor , base + "with_replies"),
+ (TwitterLikesExtractor , base + "likes"),
+ ), ("timeline",))
+
+
+class TwitterTimelineExtractor(TwitterExtractor):
+ """Extractor for a Twitter user timeline"""
+ subcategory = "timeline"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
+ example = "https://twitter.com/USER/timeline"
+
def tweets(self):
# yield initial batch of (media) tweets
tweet = None
@@ -536,14 +556,7 @@ class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""
subcategory = "tweets"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/tweets", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- ("https://mobile.twitter.com/supernaturepics/tweets#t"),
- ("https://www.twitter.com/id:2976459548/tweets"),
- )
+ example = "https://twitter.com/USER/tweets"
def tweets(self):
return self.api.user_tweets(self.user)
@@ -553,14 +566,7 @@ class TwitterRepliesExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline including replies"""
subcategory = "replies"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/with_replies", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
- ("https://www.twitter.com/id:2976459548/with_replies"),
- )
+ example = "https://twitter.com/USER/with_replies"
def tweets(self):
return self.api.user_tweets_and_replies(self.user)
@@ -570,14 +576,7 @@ class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/media", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- ("https://mobile.twitter.com/supernaturepics/media#t"),
- ("https://www.twitter.com/id:2976459548/media"),
- )
+ example = "https://twitter.com/USER/media"
def tweets(self):
return self.api.user_media(self.user)
@@ -587,7 +586,7 @@ class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
- test = ("https://twitter.com/supernaturepics/likes",)
+ example = "https://twitter.com/USER/likes"
def metadata(self):
return {"user_likes": self.user}
@@ -598,7 +597,7 @@ class TwitterLikesExtractor(TwitterExtractor):
def _transform_tweet(self, tweet):
tdata = TwitterExtractor._transform_tweet(self, tweet)
tdata["date_liked"] = text.parse_timestamp(
- (int(tweet["sortIndex"]) >> 20) // 1000)
+ (int(tweet["sortIndex"] or 0) >> 20) // 1000)
return tdata
@@ -606,7 +605,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
pattern = BASE_PATTERN + r"/i/bookmarks()"
- test = ("https://twitter.com/i/bookmarks",)
+ example = "https://twitter.com/i/bookmarks"
def tweets(self):
return self.api.user_bookmarks()
@@ -614,7 +613,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
def _transform_tweet(self, tweet):
tdata = TwitterExtractor._transform_tweet(self, tweet)
tdata["date_bookmarked"] = text.parse_timestamp(
- (int(tweet["sortIndex"]) >> 20) // 1000)
+ (int(tweet["sortIndex"] or 0) >> 20) // 1000)
return tdata
@@ -622,11 +621,7 @@ class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
subcategory = "list"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
- test = ("https://twitter.com/i/lists/784214683683127296", {
- "range": "1-40",
- "count": 40,
- "archive": False,
- })
+ example = "https://twitter.com/i/lists/12345"
def tweets(self):
return self.api.list_latest_tweets_timeline(self.user)
@@ -636,11 +631,7 @@ class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
- test = ("https://twitter.com/i/lists/784214683683127296/members", {
- "pattern": TwitterTimelineExtractor.pattern,
- "range": "1-40",
- "count": 40,
- })
+ example = "https://twitter.com/i/lists/12345/members"
def items(self):
self.login()
@@ -651,10 +642,7 @@ class TwitterFollowingExtractor(TwitterExtractor):
"""Extractor for followed users"""
subcategory = "following"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/following"),
- ("https://www.twitter.com/id:2976459548/following"),
- )
+ example = "https://twitter.com/USER/following"
def items(self):
self.login()
@@ -665,11 +653,7 @@ class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for Twitter search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
- test = ("https://twitter.com/search?q=nature", {
- "range": "1-20",
- "count": 20,
- "archive": False,
- })
+ example = "https://twitter.com/search?q=QUERY"
def metadata(self):
return {"search": text.unquote(self.user)}
@@ -700,10 +684,7 @@ class TwitterHashtagExtractor(TwitterExtractor):
"""Extractor for Twitter hashtags"""
subcategory = "hashtag"
pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
- test = ("https://twitter.com/hashtag/nature", {
- "pattern": TwitterSearchExtractor.pattern,
- "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
- })
+ example = "https://twitter.com/hashtag/NAME"
def items(self):
url = "{}/search?q=%23{}".format(self.root, self.user)
@@ -717,10 +698,7 @@ class TwitterEventExtractor(TwitterExtractor):
directory_fmt = ("{category}", "Events",
"{event[id]} {event[short_title]}")
pattern = BASE_PATTERN + r"/i/events/(\d+)"
- test = ("https://twitter.com/i/events/1484669206993903616", {
- "range": "1-20",
- "count": ">=1",
- })
+ example = "https://twitter.com/i/events/12345"
def metadata(self):
return {"event": self.api.live_event(self.user)}
@@ -733,186 +711,7 @@ class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
- test = (
- ("https://twitter.com/supernaturepics/status/604341487988576256", {
- "url": "88a40f7d25529c2501c46f2218f9e0de9aa634b4",
- "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
- }),
- # 4 images
- ("https://twitter.com/perrypumas/status/894001459754180609", {
- "url": "3a2a43dc5fb79dd5432c701d8e55e87c4e551f47",
- }),
- # video
- ("https://twitter.com/perrypumas/status/1065692031626829824", {
- "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
- }),
- # content with emoji, newlines, hashtags (#338)
- ("https://twitter.com/playpokemon/status/1263832915173048321", {
- "keyword": {"content": (
- r"re:Gear up for #PokemonSwordShieldEX with special Mystery "
- "Gifts! \n\nYou’ll be able to receive four Galarian form "
- "Pokémon with Hidden Abilities, plus some very useful items. "
- "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
- )},
- }),
- # Reply to deleted tweet (#403, #838)
- ("https://twitter.com/i/web/status/1170041925560258560", {
- "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_",
- }),
- # 'replies' option (#705)
- ("https://twitter.com/i/web/status/1170041925560258560", {
- "options": (("replies", False),),
- "count": 0,
- }),
- # 'replies' to self (#1254)
- ("https://twitter.com/i/web/status/1424882930803908612", {
- "options": (("replies", "self"),),
- "count": 4,
- "keyword": {"user": {
- "description": "re:business email-- rhettaro.bloom@gmail.com "
- "patreon- http://patreon.com/Princecanary",
- "url": "http://princecanary.tumblr.com",
- }},
- }),
- ("https://twitter.com/i/web/status/1424898916156284928", {
- "options": (("replies", "self"),),
- "count": 1,
- }),
- # "quoted" option (#854)
- ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
- "options": (("quoted", True),),
- "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg",
- "count": 8,
- }),
- # quoted tweet (#526, #854)
- ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
- "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
- "count": 4,
- }),
- # different 'user' and 'author' in quoted Tweet (#3922)
- ("https://twitter.com/web/status/1644907989109751810", {
- "keyword": {
- "author": {"id": 321629993 , "name": "Cakes_Comics"},
- "user" : {"id": 718928225360080897, "name": "StobiesGalaxy"},
- },
- }),
- # TwitPic embeds (#579)
- ("https://twitter.com/i/web/status/112900228289540096", {
- "options": (("twitpic", True), ("cards", False)),
- "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
- "count": 2, # 1 duplicate
- }),
- # TwitPic URL not in 'urls' (#3792)
- ("https://twitter.com/shimoigusaP/status/8138669971", {
- "options": (("twitpic", True),),
- "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png",
- "count": 1,
- }),
- # Twitter card (#1005)
- ("https://twitter.com/billboard/status/1306599586602135555", {
- "options": (("cards", True),),
- "pattern": r"https://pbs.twimg.com/card_img/\d+/",
- }),
- # unified_card image_website (#2875)
- ("https://twitter.com/i/web/status/1561674543323910144", {
- "options": (("cards", True),),
- "pattern": r"https://pbs\.twimg\.com/media/F.+=jpg",
- }),
- # unified_card image_carousel_website
- ("https://twitter.com/doax_vv_staff/status/1479438945662685184", {
- "options": (("cards", True),),
- "pattern": r"https://pbs\.twimg\.com/media/F.+=png",
- "count": 6,
- }),
- # unified_card video_website (#2875)
- ("https://twitter.com/bang_dream_1242/status/1561548715348746241", {
- "options": (("cards", True),),
- "pattern": r"https://video\.twimg\.com/amplify_video"
- r"/1560607284333449216/vid/720x720/\w+\.mp4",
- }),
- # unified_card without type
- ("https://twitter.com/i/web/status/1466183847628865544", {
- "count": 0,
- }),
- # 'cards-blacklist' option
- ("https://twitter.com/i/web/status/1571141912295243776", {
- "options": (("cards", "ytdl"),
- ("cards-blacklist", ("twitch.tv",))),
- "count": 0,
- }),
- # retweet
- ("https://twitter.com/jessica_3978/status/1296304589591810048", {
- "options": (("retweets", True),),
- "count": 2,
- "keyword": {
- "tweet_id" : 1296304589591810048,
- "retweet_id": 1296296016002547713,
- "date" : "dt:2020-08-20 04:34:32",
- },
- }),
- # original retweets (#1026)
- ("https://twitter.com/jessica_3978/status/1296304589591810048", {
- "options": (("retweets", "original"),),
- "count": 2,
- "keyword": {
- "tweet_id" : 1296296016002547713,
- "retweet_id": 1296296016002547713,
- "date" : "dt:2020-08-20 04:00:28",
- },
- }),
- # all Tweets from a 'conversation' (#1319)
- ("https://twitter.com/supernaturepics/status/604341487988576256", {
- "options": (("conversations", True),),
- "count": 5,
- }),
- # retweet with missing media entities (#1555)
- ("https://twitter.com/morino_ya/status/1392763691599237121", {
- "options": (("retweets", True),),
- "count": 0, # private
- }),
- # deleted quote tweet (#2225)
- ("https://twitter.com/i/web/status/1460044411165888515", {
- "count": 0,
- }),
- # "Misleading" content
- ("https://twitter.com/i/web/status/1486373748911575046", {
- "count": 4,
- }),
- # age-restricted (#2354)
- ("https://twitter.com/mightbecursed/status/1492954264909479936", {
- "options": (("syndication", True),),
- "keyword": {"date": "dt:2022-02-13 20:10:09"},
- "count": 1,
- }),
- # media alt texts / descriptions (#2617)
- ("https://twitter.com/my0nruri/status/1528379296041299968", {
- "keyword": {"description": "oc"}
- }),
- # '?format=...&name=...'-style URLs
- ("https://twitter.com/poco_dandy/status/1150646424461176832", {
- "options": (("cards", True),),
- "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
- r"\?format=(jpg|png)&name=orig$",
- "range": "1-2",
- }),
- # note tweet with long 'content'
- ("https://twitter.com/i/web/status/1629193457112686592", {
- "keyword": {
- "content": """\
-BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
-just contradicted federal government regulators, saying that toxic air \
-pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
-Washington Post writes, "Three weeks after the toxic train derailment in \
-Ohio, an analysis of Environmental Protection Agency data has found nine air \
-pollutants at levels that could raise long-term health concerns in and around \
-East Palestine, according to an independent analysis. \n\n\"The analysis by \
-Texas A&M University seems to contradict statements by state and federal \
-regulators that air near the crash site is completely safe, despite residents \
-complaining about rashes, breathing problems and other health effects." \
-Your reaction.""",
- },
- }),
- )
+ example = "https://twitter.com/USER/status/12345"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
@@ -923,21 +722,49 @@ Your reaction.""",
if conversations:
self._accessible = (conversations == "accessible")
return self._tweets_conversation(self.tweet_id)
- else:
- return self._tweets_single(self.tweet_id)
- def _tweets_single(self, tweet_id):
- tweets = []
+ endpoint = self.config("tweet-endpoint")
+ if endpoint == "detail" or endpoint in (None, "auto") and \
+ self.api.headers["x-twitter-auth-type"]:
+ return self._tweets_detail(self.tweet_id)
+ return self._tweets_single(self.tweet_id)
+
+ def _tweets_single(self, tweet_id):
tweet = self.api.tweet_result_by_rest_id(tweet_id)
- self._assign_user(tweet["core"]["user_results"]["result"])
+
+ try:
+ self._assign_user(tweet["core"]["user_results"]["result"])
+ except KeyError:
+ raise exception.StopExtraction(
+ "'%s'", tweet.get("reason") or "Unavailable")
+
+ yield tweet
+
+ if not self.quoted:
+ return
while True:
- tweets.append(tweet)
tweet_id = tweet["legacy"].get("quoted_status_id_str")
if not tweet_id:
break
tweet = self.api.tweet_result_by_rest_id(tweet_id)
+ tweet["legacy"]["quoted_by_id_str"] = tweet_id
+ yield tweet
+
+ def _tweets_detail(self, tweet_id):
+ tweets = []
+
+ for tweet in self.api.tweet_detail(tweet_id):
+ if tweet["rest_id"] == tweet_id or \
+ tweet.get("_retweet_id_str") == tweet_id:
+ if self._user_obj is None:
+ self._assign_user(tweet["core"]["user_results"]["result"])
+ tweets.append(tweet)
+
+ tweet_id = tweet["legacy"].get("quoted_status_id_str")
+ if not tweet_id:
+ break
return tweets
@@ -965,21 +792,7 @@ class TwitterAvatarExtractor(TwitterExtractor):
filename_fmt = "avatar {date}.{extension}"
archive_fmt = "AV_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
- test = (
- ("https://twitter.com/supernaturepics/photo", {
- "pattern": r"https://pbs\.twimg\.com/profile_images"
- r"/554585280938659841/FLVAlX18\.jpeg",
- "keyword": {
- "date": "dt:2015-01-12 10:26:49",
- "extension": "jpeg",
- "filename": "FLVAlX18",
- "tweet_id": 554585280938659841,
- },
- }),
- ("https://twitter.com/User16/photo", {
- "count": 0,
- }),
- )
+ example = "https://twitter.com/USER/photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
@@ -1001,20 +814,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
filename_fmt = "background {date}.{extension}"
archive_fmt = "BG_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
- test = (
- ("https://twitter.com/supernaturepics/header_photo", {
- "pattern": r"https://pbs\.twimg\.com/profile_banners"
- r"/2976459548/1421058583",
- "keyword": {
- "date": "dt:2015-01-12 10:29:43",
- "filename": "1421058583",
- "tweet_id": 554586009367478272,
- },
- }),
- ("https://twitter.com/User16/header_photo", {
- "count": 0,
- }),
- )
+ example = "https://twitter.com/USER/header_photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
@@ -1034,13 +834,7 @@ class TwitterImageExtractor(Extractor):
category = "twitter"
subcategory = "image"
pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)"
- test = (
- ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", {
- "options": (("size", "4096x4096,orig"),),
- "url": "cb3042a6f6826923da98f0d2b66c427e9385114c",
- }),
- ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"),
- )
+ example = "https://pbs.twimg.com/media/ABCDE?format=jpg&name=orig"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -1071,23 +865,19 @@ class TwitterAPI():
self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
- cookies = extractor.session.cookies
- cookiedomain = extractor.cookiedomain
+ cookies = extractor.cookies
+ cookies_domain = extractor.cookies_domain
csrf = extractor.config("csrf")
if csrf is None or csrf == "cookies":
- csrf_token = cookies.get("ct0", domain=cookiedomain)
+ csrf_token = cookies.get("ct0", domain=cookies_domain)
else:
csrf_token = None
if not csrf_token:
csrf_token = util.generate_token()
- cookies.set("ct0", csrf_token, domain=cookiedomain)
+ cookies.set("ct0", csrf_token, domain=cookies_domain)
- auth_token = cookies.get("auth_token", domain=cookiedomain)
-
- search = extractor.config("search-endpoint")
- if search == "rest":
- self.search_timeline = self.search_adaptive
+ auth_token = cookies.get("auth_token", domain=cookies_domain)
self.headers = {
"Accept": "*/*",
@@ -1216,7 +1006,19 @@ class TwitterAPI():
"withArticleRichContentState": False,
}),
}
- return self._call(endpoint, params)["data"]["tweetResult"]["result"]
+ tweet = self._call(endpoint, params)["data"]["tweetResult"]["result"]
+ if "tweet" in tweet:
+ tweet = tweet["tweet"]
+
+ if tweet.get("__typename") == "TweetUnavailable":
+ reason = tweet.get("reason")
+ if reason == "NsfwLoggedOut":
+ raise exception.AuthorizationError("NSFW Tweet")
+ if reason == "Protected":
+ raise exception.AuthorizationError("Protected Tweet")
+ raise exception.StopExtraction("Tweet unavailable ('%s')", reason)
+
+ return tweet
def tweet_detail(self, tweet_id):
endpoint = "/graphql/JlLZj42Ltr2qwjasw-l5lQ/TweetDetail"
@@ -1324,16 +1126,6 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, ("list", "tweets_timeline", "timeline"))
- def search_adaptive(self, query):
- endpoint = "/2/search/adaptive.json"
- params = self.params.copy()
- params["q"] = query
- params["tweet_search_mode"] = "live"
- params["query_source"] = "typed_query"
- params["pc"] = "1"
- params["spelling_corrections"] = "1"
- return self._pagination_legacy(endpoint, params)
-
def search_timeline(self, query):
endpoint = "/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline"
variables = {
@@ -1453,8 +1245,8 @@ class TwitterAPI():
guest_token = self._guest_token()
if guest_token != self.headers["x-guest-token"]:
self.headers["x-guest-token"] = guest_token
- self.extractor.session.cookies.set(
- "gt", guest_token, domain=self.extractor.cookiedomain)
+ self.extractor.cookies.set(
+ "gt", guest_token, domain=self.extractor.cookies_domain)
def _call(self, endpoint, params, method="GET", auth=True, root=None):
url = (root or self.root) + endpoint
@@ -1647,8 +1439,8 @@ class TwitterAPI():
if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
- extr._cookiefile = None
- del extr.session.cookies["auth_token"]
+ extr.cookies_file = None
+ del extr.cookies["auth_token"]
self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest")
continue
@@ -1902,7 +1694,7 @@ def _login_impl(extr, username, password):
extr.log.debug(response.text)
raise exception.AuthenticationError(", ".join(errors))
- extr.session.cookies.clear()
+ extr.cookies.clear()
api = TwitterAPI(extr)
api._authenticate_guest()
headers = api.headers
@@ -2042,5 +1834,5 @@ def _login_impl(extr, username, password):
return {
cookie.name: cookie.value
- for cookie in extr.session.cookies
+ for cookie in extr.cookies
}