diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 155 |
1 files changed, 91 insertions, 64 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 36b4806..0df4ea2 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache +import itertools import json BASE_PATTERN = ( @@ -40,7 +41,7 @@ class TwitterExtractor(Extractor): self.quoted = self.config("quoted", False) self.videos = self.config("videos", True) self.cards = self.config("cards", False) - self._user_id = None + self._user = self._user_obj = None self._user_cache = {} self._init_sizes() @@ -90,8 +91,9 @@ class TwitterExtractor(Extractor): if "in_reply_to_user_id_str" in data and ( not self.replies or ( self.replies == "self" and - (self._user_id or data["in_reply_to_user_id_str"]) != - data["user_id_str"] + data["user_id_str"] != + (self._user_obj["rest_id"] if self._user else + data["in_reply_to_user_id_str"]) ) ): self.log.debug("Skipping %s (reply)", data["id_str"]) @@ -229,11 +231,13 @@ class TwitterExtractor(Extractor): files.append({"url": url}) def _transform_tweet(self, tweet): - if "core" in tweet: - user = self._transform_user( - tweet["core"]["user_results"]["result"]) + if "author" in tweet: + author = tweet["author"] + elif "core" in tweet: + author = tweet["core"]["user_results"]["result"] else: - user = self._transform_user(tweet["user"]) + author = tweet["user"] + author = self._transform_user(author) if "legacy" in tweet: tweet = tweet["legacy"] @@ -245,12 +249,13 @@ class TwitterExtractor(Extractor): "retweet_id" : text.parse_int( tget("retweeted_status_id_str")), "quote_id" : text.parse_int( - tget("quoted_status_id_str")), + tget("quoted_by_id_str")), "reply_id" : text.parse_int( tget("in_reply_to_status_id_str")), "date" : text.parse_datetime( tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"), - "user" : user, + "user" : self._user or author, + "author" : author, "lang" : tweet["lang"], "favorite_count": tget("favorite_count"), "quote_count" : tget("quote_count"), @@ -280,13 +285,8 @@ class TwitterExtractor(Extractor): if "in_reply_to_screen_name" in tweet: tdata["reply_to"] = tweet["in_reply_to_screen_name"] - if "quoted_by_id_str" in tweet: - tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"]) - - if "author" in tweet: - tdata["author"] = self._transform_user(tweet["author"]) - else: - tdata["author"] = tdata["user"] + if "quoted_by" in tweet: + tdata["quote_by"] = tweet["quoted_by"] return tdata @@ -336,6 +336,10 @@ class TwitterExtractor(Extractor): return udata + def _assign_user(self, user): + self._user_obj = user + self._user = self._transform_user(user) + def _users_result(self, users): userfmt = self.config("users") if not userfmt or userfmt == "timeline": @@ -455,33 +459,24 @@ class TwitterTimelineExtractor(TwitterExtractor): tweet = None for tweet in self._select_tweet_source()(self.user): yield tweet - if tweet is None: return - # get username - if not self.user.startswith("id:"): - username = self.user - elif "core" in tweet: - username = (tweet["core"]["user_results"]["result"] - ["legacy"]["screen_name"]) - else: - username = tweet["user"]["screen_name"] - - # get tweet data - if "legacy" in tweet: - tweet = tweet["legacy"] - # build search query - query = "from:{} max_id:{}".format(username, tweet["id_str"]) + query = "from:{} max_id:{}".format( + self._user["name"], tweet["rest_id"]) if self.retweets: query += " include:retweets include:nativeretweets" + if not self.textonly: - query += (" (filter:images OR" - " filter:native_video OR" - " card_name:animated_gif)") + # try to search for media-only tweets + tweet = None + for tweet in self.api.search_adaptive(query + " filter:links"): + yield tweet + if tweet is not None: + return - # yield search results starting from last tweet id + # yield unfiltered search results yield from self.api.search_adaptive(query) def _select_tweet_source(self): @@ -625,7 +620,25 @@ class TwitterSearchExtractor(TwitterExtractor): return {"search": text.unquote(self.user)} def tweets(self): - return self.api.search_adaptive(text.unquote(self.user)) + query = text.unquote(self.user.replace("+", " ")) + + user = None + for item in query.split(): + item = item.strip("()") + if item.startswith("from:"): + if user: + user = None + break + else: + user = item[5:] + + if user is not None: + try: + self._assign_user(self.api.user_by_screen_name(user)) + except KeyError: + pass + + return self.api.search_adaptive(query) class TwitterEventExtractor(TwitterExtractor): @@ -693,7 +706,7 @@ class TwitterTweetExtractor(TwitterExtractor): }), ("https://twitter.com/i/web/status/1424898916156284928", { "options": (("replies", "self"),), - "count": 0, + "count": 1, }), # "quoted" option (#854) ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { @@ -777,20 +790,38 @@ class TwitterTweetExtractor(TwitterExtractor): def tweets(self): if self.config("conversations", False): - return self.api.tweet_detail(self.tweet_id) + return self._tweets_conversation(self.tweet_id) + else: + return self._tweets_single(self.tweet_id) + def _tweets_single(self, tweet_id): tweets = [] - tweet_id = self.tweet_id + for tweet in self.api.tweet_detail(tweet_id): if tweet["rest_id"] == tweet_id or \ tweet.get("_retweet_id_str") == tweet_id: + self._assign_user(tweet["core"]["user_results"]["result"]) tweets.append(tweet) tweet_id = tweet["legacy"].get("quoted_status_id_str") if not tweet_id: break + return tweets + def _tweets_conversation(self, tweet_id): + tweets = self.api.tweet_detail(tweet_id) + buffer = [] + + for tweet in tweets: + buffer.append(tweet) + if tweet["rest_id"] == tweet_id or \ + tweet.get("_retweet_id_str") == tweet_id: + self._assign_user(tweet["core"]["user_results"]["result"]) + break + + return itertools.chain(buffer, tweets) + class TwitterImageExtractor(Extractor): category = "twitter" @@ -888,7 +919,6 @@ class TwitterAPI(): self._nsfw_warning = True self._syndication = extractor.config("syndication") self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode - self._user = None cookies = extractor.session.cookies cookiedomain = extractor.cookiedomain @@ -1050,13 +1080,13 @@ class TwitterAPI(): def _user_id_by_screen_name(self, screen_name): if screen_name.startswith("id:"): - self._user = util.SENTINEL user_id = screen_name[3:] + user = self.user_by_rest_id(user_id) else: user = () try: - user = self._user = self.user_by_screen_name(screen_name) + user = self.user_by_screen_name(screen_name) user_id = user["rest_id"] except KeyError: if "unavailable_message" in user: @@ -1066,7 +1096,7 @@ class TwitterAPI(): else: raise exception.NotFoundError("user") - self.extractor._user_id = user_id + self.extractor._assign_user(user) return user_id @cache(maxage=3600) @@ -1183,7 +1213,7 @@ class TwitterAPI(): if quoted: quoted = quoted.copy() quoted["author"] = users[quoted["user_id_str"]] - quoted["user"] = tweet["user"] + quoted["quoted_by"] = tweet["user"]["screen_name"] quoted["quoted_by_id_str"] = tweet["id_str"] yield quoted @@ -1226,17 +1256,10 @@ class TwitterAPI(): except LookupError: extr.log.debug(data) - if self._user: - user = self._user - if user is util.SENTINEL: - try: - user = self.user_by_rest_id(variables["userId"]) - except KeyError: - raise exception.NotFoundError("user") - user = user.get("legacy") - if not user: - pass - elif user.get("blocked_by"): + user = extr._user_obj + if user: + user = user["legacy"] + if user.get("blocked_by"): if self.headers["x-twitter-auth-type"] and \ extr.config("logout"): guest_token = self._guest_token() @@ -1322,7 +1345,7 @@ class TwitterAPI(): try: legacy["retweeted_status_id_str"] = \ retweet["rest_id"] - legacy["author"] = \ + tweet["author"] = \ retweet["core"]["user_results"]["result"] if "extended_entities" in retweet["legacy"] and \ "extended_entities" not in legacy: @@ -1336,9 +1359,9 @@ class TwitterAPI(): if "quoted_status_result" in tweet: try: quoted = tweet["quoted_status_result"]["result"] - quoted["legacy"]["author"] = \ - quoted["core"]["user_results"]["result"] - quoted["core"] = tweet["core"] + quoted["legacy"]["quoted_by"] = ( + tweet["core"]["user_results"]["result"] + ["legacy"]["screen_name"]) quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"] yield quoted except KeyError: @@ -1374,10 +1397,14 @@ class TwitterAPI(): if instr["type"] == "TimelineAddEntries": for entry in instr["entries"]: if entry["entryId"].startswith("user-"): - user = (entry["content"]["itemContent"] - ["user_results"]["result"]) - if "rest_id" in user: - yield user + try: + user = (entry["content"]["itemContent"] + ["user_results"]["result"]) + except KeyError: + pass + else: + if "rest_id" in user: + yield user elif entry["entryId"].startswith("cursor-bottom-"): cursor = entry["content"]["value"] elif instr["type"] == "TimelineTerminateTimeline": @@ -1439,6 +1466,6 @@ class TwitterAPI(): return { "rest_id": tweet["id_str"], "legacy" : tweet, - "user" : tweet["user"], + "core" : {"user_results": {"result": tweet["user"]}}, "_retweet_id_str": retweet_id, } |
