diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 65 |
1 files changed, 58 insertions, 7 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index c919cb8..ed3cfae 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1447,20 +1447,33 @@ class TwitterAPI(): "includePromotedContent": False, } return self._pagination_tweets( - endpoint, variables, ("bookmark_timeline_v2", "timeline"), False) + endpoint, variables, ("bookmark_timeline_v2", "timeline"), + stop_tweets=128) def search_timeline(self, query, product="Latest"): endpoint = "/graphql/4fpceYZ6-YQCx_JSl_Cn_A/SearchTimeline" variables = { "rawQuery": query, - "count": 100, + "count": self.extractor.config("search-limit", 20), "querySource": "typed_query", "product": product, "withGrokTranslatedBio": False, } + + if self.extractor.config("search-pagination") in ( + "max_id", "maxid", "id"): + update_variables = self._update_variables_search + else: + update_variables = None + + stop_tweets = self.extractor.config("search-stop") + if stop_tweets is None or stop_tweets == "auto": + stop_tweets = 3 if update_variables is None else 0 + return self._pagination_tweets( endpoint, variables, - ("search_by_raw_query", "search_timeline", "timeline")) + ("search_by_raw_query", "search_timeline", "timeline"), + stop_tweets=stop_tweets, update_variables=update_variables) def community_query(self, community_id): endpoint = "/graphql/2W09l7nD7ZbxGQHXvfB22w/CommunityQuery" @@ -1870,11 +1883,12 @@ class TwitterAPI(): params["cursor"] = extr._update_cursor(cursor) def _pagination_tweets(self, endpoint, variables, - path=None, stop_tweets=True, + path=None, stop_tweets=0, update_variables=None, features=None, field_toggles=None): extr = self.extractor original_retweets = (extr.retweets == "original") pinned_tweet = extr.pinned + stop_tweets_max = stop_tweets params = {"variables": None} if cursor := extr._init_cursor(): @@ -2067,11 +2081,24 @@ class TwitterAPI(): tweet.get("rest_id")) continue - if stop_tweets and not tweet: - return extr._update_cursor(None) + if tweet: + stop_tweets = stop_tweets_max + last_tweet = tweet + else: + if stop_tweets <= 0: + return extr._update_cursor(None) + self.log.debug( + "No Tweet results (%s/%s)", + stop_tweets_max - stop_tweets + 1, stop_tweets_max) + stop_tweets -= 1 + if not cursor or cursor == variables.get("cursor"): return extr._update_cursor(None) - variables["cursor"] = extr._update_cursor(cursor) + + if update_variables is None: + variables["cursor"] = extr._update_cursor(cursor) + else: + variables = update_variables(variables, cursor, last_tweet) def _pagination_users(self, endpoint, variables, path=None): extr = self.extractor @@ -2140,6 +2167,30 @@ class TwitterAPI(): self.log.debug("Skipping %s ('%s')", tweet_id, text) + def _update_variables_search(self, variables, cursor, tweet): + try: + tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"] + max_id = f"max_id:{int(tweet_id)-1}" + + query, n = text.re(r"\bmax_id:\d+").subn( + max_id, variables["rawQuery"]) + if n: + variables["rawQuery"] = query + else: + variables["rawQuery"] = f"{query} {max_id}" + + if prefix := self.extractor._cursor_prefix: + self.extractor._cursor_prefix = \ + f"{prefix.partition('_')[0]}_{tweet_id}/" + variables["cursor"] = None + except Exception as exc: + self.extractor.log.debug( + "Failed to update 'max_id' search query (%s: %s). Falling " + "back to 'cursor' pagination", exc.__class__.__name__, exc) + variables["cursor"] = self.extractor._update_cursor(cursor) + + return variables + @cache(maxage=365*86400, keyarg=1) def _login_impl(extr, username, password): |
