aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py149
1 files changed, 88 insertions, 61 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7cabb8c..1e985e3 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -31,6 +31,7 @@ class TwitterExtractor(Extractor):
self.retweets = self.config("retweets", True)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
+ self.quoted = self.config("quoted", True)
self.videos = self.config("videos", True)
self._user_cache = {}
@@ -41,8 +42,9 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
- if not self.retweets and "retweeted_status_id_str" in tweet or \
- not self.replies and "in_reply_to_user_id_str" in tweet:
+ if (not self.retweets and "retweeted_status_id_str" in tweet or
+ not self.replies and "in_reply_to_user_id_str" in tweet or
+ not self.quoted and "quoted" in tweet):
continue
if self.twitpic:
@@ -60,7 +62,7 @@ class TwitterExtractor(Extractor):
tdata["width"] = media["original_info"].get("width", 0)
tdata["height"] = media["original_info"].get("height", 0)
- if "video_info" in media and self.videos:
+ if "video_info" in media:
if self.videos == "ytdl":
url = "ytdl:{}/i/web/status/{}".format(
@@ -68,7 +70,7 @@ class TwitterExtractor(Extractor):
tdata["extension"] = None
yield Message.Url, url, tdata
- else:
+ elif self.videos:
video_info = media["video_info"]
variant = max(
video_info["variants"],
@@ -149,11 +151,10 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
- if "full_text_quoted" in tweet:
- tdata["content_quoted"] = tweet["full_text_quoted"]
-
if "author" in tweet:
tdata["author"] = self._transform_user(tweet["author"])
+ else:
+ tdata["author"] = tdata["user"]
return tdata
@@ -264,6 +265,27 @@ class TwitterMediaExtractor(TwitterExtractor):
return TwitterAPI(self).timeline_media(self.user)
+class TwitterLikesExtractor(TwitterExtractor):
+ """Extractor for liked tweets"""
+ subcategory = "likes"
+ pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
+ r"/(?!search)([^/?&#]+)/likes(?!\w)")
+ test = ("https://twitter.com/supernaturepics/likes",)
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_favorites(self.user)
+
+
+class TwitterBookmarkExtractor(TwitterExtractor):
+ """Extractor for bookmarked tweets"""
+ subcategory = "bookmark"
+ pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
+ test = ("https://twitter.com/i/bookmarks",)
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_bookmark()
+
+
class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for all images from a search timeline"""
subcategory = "search"
@@ -279,7 +301,7 @@ class TwitterSearchExtractor(TwitterExtractor):
return {"search": text.unquote(self.user)}
def tweets(self):
- return TwitterAPI(self).search(self.user)
+ return TwitterAPI(self).search(text.unquote(self.user))
class TwitterTweetExtractor(TwitterExtractor):
@@ -298,7 +320,6 @@ class TwitterTweetExtractor(TwitterExtractor):
}),
# video
("https://twitter.com/perrypumas/status/1065692031626829824", {
- "options": (("videos", True),),
"pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
}),
# content with emoji, newlines, hashtags (#338)
@@ -310,23 +331,25 @@ class TwitterTweetExtractor(TwitterExtractor):
"It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
)},
}),
- # Reply to another tweet (#403)
- ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
- "options": (("videos", "ytdl"),),
- "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
+ # Reply to deleted tweet (#403, #838)
+ ("https://twitter.com/i/web/status/1170041925560258560", {
+ "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_.jpg:orig",
}),
# 'replies' option (#705)
- ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
+ ("https://twitter.com/i/web/status/1170041925560258560", {
"options": (("replies", False),),
"count": 0,
}),
- # /i/web/ URL
- ("https://twitter.com/i/web/status/1155074198240292865", {
- "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
+ # quoted tweet (#526, #854)
+ ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
+ "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+\.jpg",
+ "count": 8,
}),
- # quoted tweet (#526)
- ("https://twitter.com/Pistachio/status/1222690391817932803", {
- "pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg",
+ # "quoted" option (#854)
+ ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
+ "options": (("quoted", False),),
+ "pattern": r"https://pbs\.twimg\.com/media/EaK.+\.jpg",
+ "count": 4,
}),
# TwitPic embeds (#579)
("https://twitter.com/i/web/status/112900228289540096", {
@@ -344,16 +367,6 @@ class TwitterTweetExtractor(TwitterExtractor):
return TwitterAPI(self).tweet(self.tweet_id)
-class TwitterBookmarkExtractor(TwitterExtractor):
- """Extractor for bookmarked tweets"""
- subcategory = "bookmark"
- pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
- test = ("https://twitter.com/i/bookmarks",)
-
- def tweets(self):
- return TwitterAPI(self).bookmarks()
-
-
class TwitterAPI():
def __init__(self, extractor):
@@ -409,16 +422,21 @@ class TwitterAPI():
self.headers["x-twitter-auth-type"] = "OAuth2Session"
else:
# guest token
- guest_token = _guest_token(self.extractor, self.headers)
+ guest_token = self._guest_token()
self.headers["x-guest-token"] = guest_token
cookies.set("gt", guest_token, domain=".twitter.com")
def tweet(self, tweet_id):
endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
+ tweets = []
for tweet in self._pagination(endpoint):
if tweet["id_str"] == tweet_id:
- return (tweet,)
- return ()
+ tweets.append(tweet)
+ if "quoted_status_id_str" in tweet:
+ tweet_id = tweet["quoted_status_id_str"]
+ else:
+ break
+ return tweets
def timeline_profile(self, screen_name):
user = self.user_by_screen_name(screen_name)
@@ -430,17 +448,26 @@ class TwitterAPI():
endpoint = "2/timeline/media/{}.json".format(user["rest_id"])
return self._pagination(endpoint)
+ def timeline_favorites(self, screen_name):
+ user = self.user_by_screen_name(screen_name)
+ endpoint = "2/timeline/favorites/{}.json".format(user["rest_id"])
+ return self._pagination(endpoint)
+
+ def timeline_bookmark(self):
+ endpoint = "2/timeline/bookmark.json"
+ return self._pagination(endpoint)
+
def search(self, query):
endpoint = "2/search/adaptive.json"
params = self.params.copy()
- params["q"] = text.unquote(query)
+ params["q"] = query
+ params["tweet_search_mode"] = "live"
+ params["query_source"] = "typed_query"
+ params["pc"] = "1"
+ params["spelling_corrections"] = "1"
return self._pagination(
endpoint, params, "sq-I-t-", "sq-cursor-bottom")
- def bookmarks(self):
- endpoint = "2/timeline/bookmark.json"
- return self._pagination(endpoint)
-
def user_by_screen_name(self, screen_name):
endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName"
params = {
@@ -449,10 +476,16 @@ class TwitterAPI():
}
return self._call(endpoint, params)["data"]["user"]
- def _call(self, endpoint, params):
+ @cache(maxage=3600)
+ def _guest_token(self):
+ endpoint = "1.1/guest/activate.json"
+ return self._call(endpoint, None, "POST")["guest_token"]
+
+ def _call(self, endpoint, params, method="GET"):
url = "https://api.twitter.com/" + endpoint
response = self.extractor.request(
- url, params=params, headers=self.headers, fatal=None)
+ url, method=method, params=params, headers=self.headers,
+ fatal=None)
if response.status_code < 400:
return response.json()
if response.status_code == 429:
@@ -479,28 +512,30 @@ class TwitterAPI():
for entry in instr[0]["addEntries"]["entries"]:
if entry["entryId"].startswith(entry_tweet):
- tid = entry["content"]["item"]["content"]["tweet"]["id"]
- if tid not in tweets:
+ try:
+ tweet = tweets[
+ entry["content"]["item"]["content"]["tweet"]["id"]]
+ except KeyError:
self.extractor.log.debug(
- "Skipping unavailable Tweet %s", tid)
+ "Skipping unavailable Tweet %s",
+ entry["entryId"][6:])
continue
- tweet = tweets[tid]
tweet["user"] = users[tweet["user_id_str"]]
- if "quoted_status_id_str" in tweet:
- quoted = tweets.get(tweet["quoted_status_id_str"])
- if quoted:
- tweet["full_text_quoted"] = quoted["full_text"]
- if "extended_entities" in quoted:
- tweet["extended_entities"] = \
- quoted["extended_entities"]
- elif "retweeted_status_id_str" in tweet:
+ if "retweeted_status_id_str" in tweet:
retweet = tweets.get(tweet["retweeted_status_id_str"])
if retweet:
tweet["author"] = users[retweet["user_id_str"]]
-
yield tweet
+ if "quoted_status_id_str" in tweet:
+ quoted = tweets.get(tweet["quoted_status_id_str"])
+ if quoted:
+ quoted["author"] = users[quoted["user_id_str"]]
+ quoted["user"] = tweet["user"]
+ quoted["quoted"] = True
+ yield quoted
+
elif entry["entryId"].startswith(entry_cursor):
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse"):
@@ -515,11 +550,3 @@ class TwitterAPI():
if not cursor or not tweet:
return
params["cursor"] = cursor
-
-
-@cache(maxage=3600)
-def _guest_token(extr, headers):
- return extr.request(
- "https://api.twitter.com/1.1/guest/activate.json",
- method="POST", headers=headers,
- ).json().get("guest_token")