diff options
| author | 2024-08-03 20:27:44 -0400 | |
|---|---|---|
| committer | 2024-08-03 20:27:44 -0400 | |
| commit | 032e5bed275a253e122ed9ac86dac7b8c4204172 (patch) | |
| tree | b4eda52ebfe00c4d22e9d633b1ab2d158a9f0573 /gallery_dl/extractor/twitter.py | |
| parent | 80e39a8fc7de105510cbbdca8507f2a4b8c9e01d (diff) | |
New upstream version 1.27.2.upstream/1.27.2
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 157 |
1 files changed, 128 insertions, 29 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ec098aa..9fa5b3f 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -51,6 +51,8 @@ class TwitterExtractor(Extractor): if not self.config("transform", True): self._transform_user = util.identity self._transform_tweet = util.identity + + self._cursor = None self._user = None self._user_obj = None self._user_cache = {} @@ -321,8 +323,17 @@ class TwitterExtractor(Extractor): "quote_count" : tget("quote_count"), "reply_count" : tget("reply_count"), "retweet_count" : tget("retweet_count"), + "bookmark_count": tget("bookmark_count"), } + if "views" in tweet: + try: + tdata["view_count"] = int(tweet["views"]["count"]) + except Exception: + tdata["view_count"] = 0 + else: + tdata["view_count"] = 0 + if "note_tweet" in tweet: note = tweet["note_tweet"]["note_tweet_results"]["result"] content = note["text"] @@ -492,6 +503,14 @@ class TwitterExtractor(Extractor): }, } + def _init_cursor(self): + return self.config("cursor") or None + + def _update_cursor(self, cursor): + self.log.debug("Cursor: %s", cursor) + self._cursor = cursor + return cursor + def metadata(self): """Return general metadata""" return {} @@ -499,6 +518,11 @@ class TwitterExtractor(Extractor): def tweets(self): """Yield all relevant tweet objects""" + def finalize(self): + if self._cursor: + self.log.info("Use '-o cursor=%s' to continue downloading " + "from the current position", self._cursor) + def login(self): if self.cookies_check(self.cookies_names): return @@ -530,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor): def initialize(self): pass + def finalize(self): + pass + def items(self): base = "{}/{}/".format(self.root, self.user) return self._dispatch_extractors(( @@ -549,30 +576,73 @@ class TwitterTimelineExtractor(TwitterExtractor): pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)" example = "https://x.com/USER/timeline" + def _init_cursor(self): + if self._cursor: + return self._cursor.partition("/")[2] or None + return None + + def _update_cursor(self, cursor): + if cursor: + self._cursor = self._cursor_prefix + cursor + self.log.debug("Cursor: %s", self._cursor) + else: + self._cursor = None + return cursor + def tweets(self): - # yield initial batch of (media) tweets - tweet = None - for tweet in self._select_tweet_source()(self.user): - yield tweet - if tweet is None: - return + self._cursor = cursor = self.config("cursor") or None + reset = False - # build search query - query = "from:{} max_id:{}".format( - self._user["name"], tweet["rest_id"]) - if self.retweets: - query += " include:retweets include:nativeretweets" + if cursor: + state = cursor.partition("/")[0] + state, _, tweet_id = state.partition("_") + state = text.parse_int(state, 1) + else: + state = 1 + + if state <= 1: + self._cursor_prefix = "1/" - if not self.textonly: - # try to search for media-only tweets + # yield initial batch of (media) tweets tweet = None - for tweet in self.api.search_timeline(query + " filter:links"): + for tweet in self._select_tweet_source()(self.user): yield tweet - if tweet is not None: + if tweet is None and not cursor: return + tweet_id = tweet["rest_id"] + + state = reset = 2 + else: + self.api._user_id_by_screen_name(self.user) + + # build search query + query = "from:{} max_id:{}".format(self._user["name"], tweet_id) + if self.retweets: + query += " include:retweets include:nativeretweets" - # yield unfiltered search results - yield from self.api.search_timeline(query) + if state <= 2: + self._cursor_prefix = "2_{}/".format(tweet_id) + if reset: + self._cursor = self._cursor_prefix + + if not self.textonly: + # try to search for media-only tweets + tweet = None + for tweet in self.api.search_timeline(query + " filter:links"): + yield tweet + if tweet is not None: + return self._update_cursor(None) + + state = reset = 3 + + if state <= 3: + # yield unfiltered search results + self._cursor_prefix = "3_{}/".format(tweet_id) + if reset: + self._cursor = self._cursor_prefix + + yield from self.api.search_timeline(query) + return self._update_cursor(None) def _select_tweet_source(self): strategy = self.config("strategy") @@ -854,6 +924,24 @@ class TwitterQuotesExtractor(TwitterExtractor): yield Message.Queue, url, data +class TwitterInfoExtractor(TwitterExtractor): + """Extractor for a user's profile data""" + subcategory = "info" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/info" + example = "https://x.com/USER/info" + + def items(self): + api = TwitterAPI(self) + + screen_name = self.user + if screen_name.startswith("id:"): + user = api.user_by_rest_id(screen_name[3:]) + else: + user = api.user_by_screen_name(screen_name) + + return iter(((Message.Directory, self._transform_user(user)),)) + + class TwitterAvatarExtractor(TwitterExtractor): subcategory = "avatar" filename_fmt = "avatar {date}.{extension}" @@ -1388,7 +1476,11 @@ class TwitterAPI(): "%s %s (%s)", response.status_code, response.reason, errors) def _pagination_legacy(self, endpoint, params): - original_retweets = (self.extractor.retweets == "original") + extr = self.extractor + cursor = extr._init_cursor() + if cursor: + params["cursor"] = cursor + original_retweets = (extr.retweets == "original") bottom = ("cursor-bottom-", "sq-cursor-bottom") while True: @@ -1396,7 +1488,7 @@ class TwitterAPI(): instructions = data["timeline"]["instructions"] if not instructions: - return + return extr._update_cursor(None) tweets = data["globalObjects"]["tweets"] users = data["globalObjects"]["users"] @@ -1477,8 +1569,8 @@ class TwitterAPI(): # stop on empty response if not cursor or (not tweets and not tweet_id): - return - params["cursor"] = cursor + return extr._update_cursor(None) + params["cursor"] = extr._update_cursor(cursor) def _pagination_tweets(self, endpoint, variables, path=None, stop_tweets=True, features=None): @@ -1487,6 +1579,9 @@ class TwitterAPI(): pinned_tweet = extr.pinned params = {"variables": None} + cursor = extr._init_cursor() + if cursor: + variables["cursor"] = cursor if features is None: features = self.features_pagination if features: @@ -1523,7 +1618,7 @@ class TwitterAPI(): cursor = entry["content"]["value"] if entries is None: if not cursor: - return + return extr._update_cursor(None) entries = () except LookupError: @@ -1672,12 +1767,16 @@ class TwitterAPI(): continue if stop_tweets and not tweet: - return + return extr._update_cursor(None) if not cursor or cursor == variables.get("cursor"): - return - variables["cursor"] = cursor + return extr._update_cursor(None) + variables["cursor"] = extr._update_cursor(cursor) def _pagination_users(self, endpoint, variables, path=None): + extr = self.extractor + cursor = extr._init_cursor() + if cursor: + variables["cursor"] = cursor params = { "variables": None, "features" : self._json_dumps(self.features_pagination), @@ -1697,7 +1796,7 @@ class TwitterAPI(): data = data[key] instructions = data["instructions"] except KeyError: - return + return extr._update_cursor(None) for instr in instructions: if instr["type"] == "TimelineAddEntries": @@ -1715,8 +1814,8 @@ class TwitterAPI(): cursor = entry["content"]["value"] if not cursor or cursor.startswith(("-1|", "0|")) or not entry: - return - variables["cursor"] = cursor + return extr._update_cursor(None) + variables["cursor"] = extr._update_cursor(cursor) def _handle_ratelimit(self, response): rl = self.extractor.config("ratelimit") @@ -1864,7 +1963,7 @@ def _login_impl(extr, username, password): }, } elif subtask == "LoginEnterAlternateIdentifierSubtask": - alt = extr.config("username_alt") or extr.input( + alt = extr.config("username-alt") or extr.input( "Alternate Identifier (username, email, phone number): ") data = { "enter_text": { |
