aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-08-03 20:27:44 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-08-03 20:27:44 -0400
commit032e5bed275a253e122ed9ac86dac7b8c4204172 (patch)
treeb4eda52ebfe00c4d22e9d633b1ab2d158a9f0573 /gallery_dl/extractor/twitter.py
parent80e39a8fc7de105510cbbdca8507f2a4b8c9e01d (diff)
New upstream version 1.27.2.upstream/1.27.2
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py157
1 files changed, 128 insertions, 29 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ec098aa..9fa5b3f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -51,6 +51,8 @@ class TwitterExtractor(Extractor):
if not self.config("transform", True):
self._transform_user = util.identity
self._transform_tweet = util.identity
+
+ self._cursor = None
self._user = None
self._user_obj = None
self._user_cache = {}
@@ -321,8 +323,17 @@ class TwitterExtractor(Extractor):
"quote_count" : tget("quote_count"),
"reply_count" : tget("reply_count"),
"retweet_count" : tget("retweet_count"),
+ "bookmark_count": tget("bookmark_count"),
}
+ if "views" in tweet:
+ try:
+ tdata["view_count"] = int(tweet["views"]["count"])
+ except Exception:
+ tdata["view_count"] = 0
+ else:
+ tdata["view_count"] = 0
+
if "note_tweet" in tweet:
note = tweet["note_tweet"]["note_tweet_results"]["result"]
content = note["text"]
@@ -492,6 +503,14 @@ class TwitterExtractor(Extractor):
},
}
+ def _init_cursor(self):
+ return self.config("cursor") or None
+
+ def _update_cursor(self, cursor):
+ self.log.debug("Cursor: %s", cursor)
+ self._cursor = cursor
+ return cursor
+
def metadata(self):
"""Return general metadata"""
return {}
@@ -499,6 +518,11 @@ class TwitterExtractor(Extractor):
def tweets(self):
"""Yield all relevant tweet objects"""
+ def finalize(self):
+ if self._cursor:
+ self.log.info("Use '-o cursor=%s' to continue downloading "
+ "from the current position", self._cursor)
+
def login(self):
if self.cookies_check(self.cookies_names):
return
@@ -530,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor):
def initialize(self):
pass
+ def finalize(self):
+ pass
+
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
@@ -549,30 +576,73 @@ class TwitterTimelineExtractor(TwitterExtractor):
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
example = "https://x.com/USER/timeline"
+ def _init_cursor(self):
+ if self._cursor:
+ return self._cursor.partition("/")[2] or None
+ return None
+
+ def _update_cursor(self, cursor):
+ if cursor:
+ self._cursor = self._cursor_prefix + cursor
+ self.log.debug("Cursor: %s", self._cursor)
+ else:
+ self._cursor = None
+ return cursor
+
def tweets(self):
- # yield initial batch of (media) tweets
- tweet = None
- for tweet in self._select_tweet_source()(self.user):
- yield tweet
- if tweet is None:
- return
+ self._cursor = cursor = self.config("cursor") or None
+ reset = False
- # build search query
- query = "from:{} max_id:{}".format(
- self._user["name"], tweet["rest_id"])
- if self.retweets:
- query += " include:retweets include:nativeretweets"
+ if cursor:
+ state = cursor.partition("/")[0]
+ state, _, tweet_id = state.partition("_")
+ state = text.parse_int(state, 1)
+ else:
+ state = 1
+
+ if state <= 1:
+ self._cursor_prefix = "1/"
- if not self.textonly:
- # try to search for media-only tweets
+ # yield initial batch of (media) tweets
tweet = None
- for tweet in self.api.search_timeline(query + " filter:links"):
+ for tweet in self._select_tweet_source()(self.user):
yield tweet
- if tweet is not None:
+ if tweet is None and not cursor:
return
+ tweet_id = tweet["rest_id"]
+
+ state = reset = 2
+ else:
+ self.api._user_id_by_screen_name(self.user)
+
+ # build search query
+ query = "from:{} max_id:{}".format(self._user["name"], tweet_id)
+ if self.retweets:
+ query += " include:retweets include:nativeretweets"
- # yield unfiltered search results
- yield from self.api.search_timeline(query)
+ if state <= 2:
+ self._cursor_prefix = "2_{}/".format(tweet_id)
+ if reset:
+ self._cursor = self._cursor_prefix
+
+ if not self.textonly:
+ # try to search for media-only tweets
+ tweet = None
+ for tweet in self.api.search_timeline(query + " filter:links"):
+ yield tweet
+ if tweet is not None:
+ return self._update_cursor(None)
+
+ state = reset = 3
+
+ if state <= 3:
+ # yield unfiltered search results
+ self._cursor_prefix = "3_{}/".format(tweet_id)
+ if reset:
+ self._cursor = self._cursor_prefix
+
+ yield from self.api.search_timeline(query)
+ return self._update_cursor(None)
def _select_tweet_source(self):
strategy = self.config("strategy")
@@ -854,6 +924,24 @@ class TwitterQuotesExtractor(TwitterExtractor):
yield Message.Queue, url, data
+class TwitterInfoExtractor(TwitterExtractor):
+ """Extractor for a user's profile data"""
+ subcategory = "info"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/info"
+ example = "https://x.com/USER/info"
+
+ def items(self):
+ api = TwitterAPI(self)
+
+ screen_name = self.user
+ if screen_name.startswith("id:"):
+ user = api.user_by_rest_id(screen_name[3:])
+ else:
+ user = api.user_by_screen_name(screen_name)
+
+ return iter(((Message.Directory, self._transform_user(user)),))
+
+
class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
@@ -1388,7 +1476,11 @@ class TwitterAPI():
"%s %s (%s)", response.status_code, response.reason, errors)
def _pagination_legacy(self, endpoint, params):
- original_retweets = (self.extractor.retweets == "original")
+ extr = self.extractor
+ cursor = extr._init_cursor()
+ if cursor:
+ params["cursor"] = cursor
+ original_retweets = (extr.retweets == "original")
bottom = ("cursor-bottom-", "sq-cursor-bottom")
while True:
@@ -1396,7 +1488,7 @@ class TwitterAPI():
instructions = data["timeline"]["instructions"]
if not instructions:
- return
+ return extr._update_cursor(None)
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
@@ -1477,8 +1569,8 @@ class TwitterAPI():
# stop on empty response
if not cursor or (not tweets and not tweet_id):
- return
- params["cursor"] = cursor
+ return extr._update_cursor(None)
+ params["cursor"] = extr._update_cursor(cursor)
def _pagination_tweets(self, endpoint, variables,
path=None, stop_tweets=True, features=None):
@@ -1487,6 +1579,9 @@ class TwitterAPI():
pinned_tweet = extr.pinned
params = {"variables": None}
+ cursor = extr._init_cursor()
+ if cursor:
+ variables["cursor"] = cursor
if features is None:
features = self.features_pagination
if features:
@@ -1523,7 +1618,7 @@ class TwitterAPI():
cursor = entry["content"]["value"]
if entries is None:
if not cursor:
- return
+ return extr._update_cursor(None)
entries = ()
except LookupError:
@@ -1672,12 +1767,16 @@ class TwitterAPI():
continue
if stop_tweets and not tweet:
- return
+ return extr._update_cursor(None)
if not cursor or cursor == variables.get("cursor"):
- return
- variables["cursor"] = cursor
+ return extr._update_cursor(None)
+ variables["cursor"] = extr._update_cursor(cursor)
def _pagination_users(self, endpoint, variables, path=None):
+ extr = self.extractor
+ cursor = extr._init_cursor()
+ if cursor:
+ variables["cursor"] = cursor
params = {
"variables": None,
"features" : self._json_dumps(self.features_pagination),
@@ -1697,7 +1796,7 @@ class TwitterAPI():
data = data[key]
instructions = data["instructions"]
except KeyError:
- return
+ return extr._update_cursor(None)
for instr in instructions:
if instr["type"] == "TimelineAddEntries":
@@ -1715,8 +1814,8 @@ class TwitterAPI():
cursor = entry["content"]["value"]
if not cursor or cursor.startswith(("-1|", "0|")) or not entry:
- return
- variables["cursor"] = cursor
+ return extr._update_cursor(None)
+ variables["cursor"] = extr._update_cursor(cursor)
def _handle_ratelimit(self, response):
rl = self.extractor.config("ratelimit")
@@ -1864,7 +1963,7 @@ def _login_impl(extr, username, password):
},
}
elif subtask == "LoginEnterAlternateIdentifierSubtask":
- alt = extr.config("username_alt") or extr.input(
+ alt = extr.config("username-alt") or extr.input(
"Alternate Identifier (username, email, phone number): ")
data = {
"enter_text": {