summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-08-29 02:17:16 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-08-29 02:17:16 -0400
commita768930761f7f20587ae40a8cacca0e55c85290a (patch)
tree5a4163db912b93fc45f717e5e43fd5be3e66f16c /gallery_dl/extractor/twitter.py
parentae2a0f5622beaa6f402526f8a7b939419283a090 (diff)
New upstream version 1.23.0.upstream/1.23.0
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py155
1 files changed, 91 insertions, 64 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 36b4806..0df4ea2 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
+import itertools
import json
BASE_PATTERN = (
@@ -40,7 +41,7 @@ class TwitterExtractor(Extractor):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
- self._user_id = None
+ self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -90,8 +91,9 @@ class TwitterExtractor(Extractor):
if "in_reply_to_user_id_str" in data and (
not self.replies or (
self.replies == "self" and
- (self._user_id or data["in_reply_to_user_id_str"]) !=
- data["user_id_str"]
+ data["user_id_str"] !=
+ (self._user_obj["rest_id"] if self._user else
+ data["in_reply_to_user_id_str"])
)
):
self.log.debug("Skipping %s (reply)", data["id_str"])
@@ -229,11 +231,13 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _transform_tweet(self, tweet):
- if "core" in tweet:
- user = self._transform_user(
- tweet["core"]["user_results"]["result"])
+ if "author" in tweet:
+ author = tweet["author"]
+ elif "core" in tweet:
+ author = tweet["core"]["user_results"]["result"]
else:
- user = self._transform_user(tweet["user"])
+ author = tweet["user"]
+ author = self._transform_user(author)
if "legacy" in tweet:
tweet = tweet["legacy"]
@@ -245,12 +249,13 @@ class TwitterExtractor(Extractor):
"retweet_id" : text.parse_int(
tget("retweeted_status_id_str")),
"quote_id" : text.parse_int(
- tget("quoted_status_id_str")),
+ tget("quoted_by_id_str")),
"reply_id" : text.parse_int(
tget("in_reply_to_status_id_str")),
"date" : text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "user" : user,
+ "user" : self._user or author,
+ "author" : author,
"lang" : tweet["lang"],
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -280,13 +285,8 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
- if "quoted_by_id_str" in tweet:
- tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
-
- if "author" in tweet:
- tdata["author"] = self._transform_user(tweet["author"])
- else:
- tdata["author"] = tdata["user"]
+ if "quoted_by" in tweet:
+ tdata["quote_by"] = tweet["quoted_by"]
return tdata
@@ -336,6 +336,10 @@ class TwitterExtractor(Extractor):
return udata
+ def _assign_user(self, user):
+ self._user_obj = user
+ self._user = self._transform_user(user)
+
def _users_result(self, users):
userfmt = self.config("users")
if not userfmt or userfmt == "timeline":
@@ -455,33 +459,24 @@ class TwitterTimelineExtractor(TwitterExtractor):
tweet = None
for tweet in self._select_tweet_source()(self.user):
yield tweet
-
if tweet is None:
return
- # get username
- if not self.user.startswith("id:"):
- username = self.user
- elif "core" in tweet:
- username = (tweet["core"]["user_results"]["result"]
- ["legacy"]["screen_name"])
- else:
- username = tweet["user"]["screen_name"]
-
- # get tweet data
- if "legacy" in tweet:
- tweet = tweet["legacy"]
-
# build search query
- query = "from:{} max_id:{}".format(username, tweet["id_str"])
+ query = "from:{} max_id:{}".format(
+ self._user["name"], tweet["rest_id"])
if self.retweets:
query += " include:retweets include:nativeretweets"
+
if not self.textonly:
- query += (" (filter:images OR"
- " filter:native_video OR"
- " card_name:animated_gif)")
+ # try to search for media-only tweets
+ tweet = None
+ for tweet in self.api.search_adaptive(query + " filter:links"):
+ yield tweet
+ if tweet is not None:
+ return
- # yield search results starting from last tweet id
+ # yield unfiltered search results
yield from self.api.search_adaptive(query)
def _select_tweet_source(self):
@@ -625,7 +620,25 @@ class TwitterSearchExtractor(TwitterExtractor):
return {"search": text.unquote(self.user)}
def tweets(self):
- return self.api.search_adaptive(text.unquote(self.user))
+ query = text.unquote(self.user.replace("+", " "))
+
+ user = None
+ for item in query.split():
+ item = item.strip("()")
+ if item.startswith("from:"):
+ if user:
+ user = None
+ break
+ else:
+ user = item[5:]
+
+ if user is not None:
+ try:
+ self._assign_user(self.api.user_by_screen_name(user))
+ except KeyError:
+ pass
+
+ return self.api.search_adaptive(query)
class TwitterEventExtractor(TwitterExtractor):
@@ -693,7 +706,7 @@ class TwitterTweetExtractor(TwitterExtractor):
}),
("https://twitter.com/i/web/status/1424898916156284928", {
"options": (("replies", "self"),),
- "count": 0,
+ "count": 1,
}),
# "quoted" option (#854)
("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
@@ -777,20 +790,38 @@ class TwitterTweetExtractor(TwitterExtractor):
def tweets(self):
if self.config("conversations", False):
- return self.api.tweet_detail(self.tweet_id)
+ return self._tweets_conversation(self.tweet_id)
+ else:
+ return self._tweets_single(self.tweet_id)
+ def _tweets_single(self, tweet_id):
tweets = []
- tweet_id = self.tweet_id
+
for tweet in self.api.tweet_detail(tweet_id):
if tweet["rest_id"] == tweet_id or \
tweet.get("_retweet_id_str") == tweet_id:
+ self._assign_user(tweet["core"]["user_results"]["result"])
tweets.append(tweet)
tweet_id = tweet["legacy"].get("quoted_status_id_str")
if not tweet_id:
break
+
return tweets
+ def _tweets_conversation(self, tweet_id):
+ tweets = self.api.tweet_detail(tweet_id)
+ buffer = []
+
+ for tweet in tweets:
+ buffer.append(tweet)
+ if tweet["rest_id"] == tweet_id or \
+ tweet.get("_retweet_id_str") == tweet_id:
+ self._assign_user(tweet["core"]["user_results"]["result"])
+ break
+
+ return itertools.chain(buffer, tweets)
+
class TwitterImageExtractor(Extractor):
category = "twitter"
@@ -888,7 +919,6 @@ class TwitterAPI():
self._nsfw_warning = True
self._syndication = extractor.config("syndication")
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
- self._user = None
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
@@ -1050,13 +1080,13 @@ class TwitterAPI():
def _user_id_by_screen_name(self, screen_name):
if screen_name.startswith("id:"):
- self._user = util.SENTINEL
user_id = screen_name[3:]
+ user = self.user_by_rest_id(user_id)
else:
user = ()
try:
- user = self._user = self.user_by_screen_name(screen_name)
+ user = self.user_by_screen_name(screen_name)
user_id = user["rest_id"]
except KeyError:
if "unavailable_message" in user:
@@ -1066,7 +1096,7 @@ class TwitterAPI():
else:
raise exception.NotFoundError("user")
- self.extractor._user_id = user_id
+ self.extractor._assign_user(user)
return user_id
@cache(maxage=3600)
@@ -1183,7 +1213,7 @@ class TwitterAPI():
if quoted:
quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
- quoted["user"] = tweet["user"]
+ quoted["quoted_by"] = tweet["user"]["screen_name"]
quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
@@ -1226,17 +1256,10 @@ class TwitterAPI():
except LookupError:
extr.log.debug(data)
- if self._user:
- user = self._user
- if user is util.SENTINEL:
- try:
- user = self.user_by_rest_id(variables["userId"])
- except KeyError:
- raise exception.NotFoundError("user")
- user = user.get("legacy")
- if not user:
- pass
- elif user.get("blocked_by"):
+ user = extr._user_obj
+ if user:
+ user = user["legacy"]
+ if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
guest_token = self._guest_token()
@@ -1322,7 +1345,7 @@ class TwitterAPI():
try:
legacy["retweeted_status_id_str"] = \
retweet["rest_id"]
- legacy["author"] = \
+ tweet["author"] = \
retweet["core"]["user_results"]["result"]
if "extended_entities" in retweet["legacy"] and \
"extended_entities" not in legacy:
@@ -1336,9 +1359,9 @@ class TwitterAPI():
if "quoted_status_result" in tweet:
try:
quoted = tweet["quoted_status_result"]["result"]
- quoted["legacy"]["author"] = \
- quoted["core"]["user_results"]["result"]
- quoted["core"] = tweet["core"]
+ quoted["legacy"]["quoted_by"] = (
+ tweet["core"]["user_results"]["result"]
+ ["legacy"]["screen_name"])
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
yield quoted
except KeyError:
@@ -1374,10 +1397,14 @@ class TwitterAPI():
if instr["type"] == "TimelineAddEntries":
for entry in instr["entries"]:
if entry["entryId"].startswith("user-"):
- user = (entry["content"]["itemContent"]
- ["user_results"]["result"])
- if "rest_id" in user:
- yield user
+ try:
+ user = (entry["content"]["itemContent"]
+ ["user_results"]["result"])
+ except KeyError:
+ pass
+ else:
+ if "rest_id" in user:
+ yield user
elif entry["entryId"].startswith("cursor-bottom-"):
cursor = entry["content"]["value"]
elif instr["type"] == "TimelineTerminateTimeline":
@@ -1439,6 +1466,6 @@ class TwitterAPI():
return {
"rest_id": tweet["id_str"],
"legacy" : tweet,
- "user" : tweet["user"],
+ "core" : {"user_results": {"result": tweet["user"]}},
"_retweet_id_str": retweet_id,
}