aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-12-20 05:49:11 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2025-12-20 05:49:11 -0500
commitc586ea4b3c871f5696626f9820e8c88a4e78f4a6 (patch)
treee6d7bae96282c3d147159f091d451e53bdaa2efe /gallery_dl/extractor/twitter.py
parent01a2bf622c31072d1322884584404b9bd59b28cc (diff)
parenta24ec1647aeac35a63b744ea856011ad6e06be3b (diff)
Update upstream source from tag 'upstream/1.31.1'
Update to upstream version '1.31.1' with Debian dir b5d91c25143175f933b1c69c7e82249cd7e145ab
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py252
1 files changed, 170 insertions, 82 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index bf125a6..546e8e1 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -76,7 +76,7 @@ class TwitterExtractor(Extractor):
seen_tweets = set() if self.config("unique", True) else None
if self.twitpic:
- self._find_twitpic = util.re(
+ self._find_twitpic = text.re(
r"https?(://twitpic\.com/(?!photos/)\w+)").findall
tweets = self.tweets()
@@ -124,12 +124,11 @@ class TwitterExtractor(Extractor):
tdata = self._transform_tweet(tweet)
tdata.update(metadata)
tdata["count"] = len(files)
- yield Message.Directory, tdata
+ yield Message.Directory, "", tdata
- del tdata["source_id"]
- del tdata["sensitive_flags"]
- if "source_user" in tdata:
- del tdata["source_user"]
+ tdata.pop("source_id", None)
+ tdata.pop("source_user", None)
+ tdata.pop("sensitive_flags", None)
for tdata["num"], file in enumerate(files, 1):
file.update(tdata)
@@ -146,7 +145,7 @@ class TwitterExtractor(Extractor):
self._extract_media(
data, data["extended_entities"]["media"], files)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting media files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
@@ -155,7 +154,7 @@ class TwitterExtractor(Extractor):
try:
self._extract_card(tweet, files)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting Card files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
@@ -164,7 +163,7 @@ class TwitterExtractor(Extractor):
try:
self._extract_twitpic(data, files)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting TwitPic files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
@@ -347,32 +346,36 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _transform_tweet(self, tweet):
+ if "legacy" in tweet:
+ legacy = tweet["legacy"]
+ else:
+ legacy = tweet
+ tweet_id = int(legacy["id_str"])
+
if "author" in tweet:
author = tweet["author"]
elif "core" in tweet:
- author = tweet["core"]["user_results"]["result"]
+ try:
+ author = tweet["core"]["user_results"]["result"]
+ except KeyError:
+ self.log.warning("%s: Missing 'author' data", tweet_id)
+ author = util.NONE
else:
author = tweet["user"]
author = self._transform_user(author)
- if "legacy" in tweet:
- legacy = tweet["legacy"]
- else:
- legacy = tweet
- tget = legacy.get
-
- tweet_id = int(legacy["id_str"])
if tweet_id >= 300000000000000:
- date = text.parse_timestamp(
+ date = self.parse_timestamp(
((tweet_id >> 22) + 1288834974657) // 1000)
else:
try:
- date = text.parse_datetime(
+ date = self.parse_datetime(
legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
except Exception:
date = util.NONE
source = tweet.get("source")
+ tget = legacy.get
tdata = {
"tweet_id" : tweet_id,
"retweet_id" : text.parse_int(
@@ -439,6 +442,8 @@ class TwitterExtractor(Extractor):
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
+ if "pinned" in tweet:
+ tdata["pinned"] = True
if "birdwatch_pivot" in tweet:
try:
tdata["birdwatch"] = \
@@ -455,7 +460,7 @@ class TwitterExtractor(Extractor):
tdata, legacy["extended_entities"]["media"][0])
if tdata["retweet_id"]:
tdata["content"] = f"RT @{author['name']}: {tdata['content']}"
- tdata["date_original"] = text.parse_timestamp(
+ tdata["date_original"] = self.parse_timestamp(
((tdata["retweet_id"] >> 22) + 1288834974657) // 1000)
return tdata
@@ -492,7 +497,7 @@ class TwitterExtractor(Extractor):
"id": text.parse_int(cid),
"name": com.get("name"),
"description": com.get("description"),
- "date": text.parse_timestamp(com.get("created_at", 0) // 1000),
+ "date": self.parse_timestamp(com.get("created_at", 0) // 1000),
"nsfw": com.get("is_nsfw"),
"role": com.get("role"),
"member_count": com.get("member_count"),
@@ -528,13 +533,13 @@ class TwitterExtractor(Extractor):
"id" : text.parse_int(uid),
"name" : core.get("screen_name"),
"nick" : core.get("name"),
- "location" : user["location"]["location"],
- "date" : text.parse_datetime(
+ "location" : user["location"].get("location"),
+ "date" : self.parse_datetime(
core["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"verified" : user["verification"]["verified"],
"protected" : user["privacy"]["protected"],
"profile_banner" : lget("profile_banner_url", ""),
- "profile_image" : user["avatar"]["image_url"].replace(
+ "profile_image" : user["avatar"].get("image_url", "").replace(
"_normal.", "."),
"favourites_count": lget("favourites_count"),
"followers_count" : lget("followers_count"),
@@ -591,9 +596,12 @@ class TwitterExtractor(Extractor):
obj = tweet["legacy"] if "legacy" in tweet else tweet
cid = obj.get("conversation_id_str")
if not cid:
- tid = obj["id_str"]
- self.log.warning(
- "Unable to expand %s (no 'conversation_id')", tid)
+ if cid is False:
+ yield tweet
+ else:
+ tid = obj["id_str"]
+ self.log.warning(
+ "Unable to expand %s (no 'conversation_id')", tid)
continue
if cid in seen:
self.log.debug(
@@ -608,6 +616,7 @@ class TwitterExtractor(Extractor):
def _make_tweet(self, user, url, id_str):
return {
"id_str": id_str,
+ "conversation_id_str": False,
"lang": None,
"user": user,
"source": "><",
@@ -658,8 +667,8 @@ class TwitterExtractor(Extractor):
class TwitterHomeExtractor(TwitterExtractor):
"""Extractor for Twitter home timelines"""
subcategory = "home"
- pattern = (BASE_PATTERN +
- r"/(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$")
+ pattern = (rf"{BASE_PATTERN}/"
+ rf"(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$")
example = "https://x.com/home"
def tweets(self):
@@ -671,7 +680,7 @@ class TwitterHomeExtractor(TwitterExtractor):
class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for Twitter search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/search/?\?(?:[^&#]+&)*q=([^&#]+)"
example = "https://x.com/search?q=QUERY"
def metadata(self):
@@ -702,7 +711,7 @@ class TwitterSearchExtractor(TwitterExtractor):
class TwitterHashtagExtractor(TwitterExtractor):
"""Extractor for Twitter hashtags"""
subcategory = "hashtag"
- pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/hashtag/([^/?#]+)"
example = "https://x.com/hashtag/NAME"
def items(self):
@@ -713,7 +722,7 @@ class TwitterHashtagExtractor(TwitterExtractor):
class TwitterUserExtractor(Dispatch, TwitterExtractor):
"""Extractor for a Twitter user"""
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"([^/?#]+)/?(?:$|\?|#)"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
example = "https://x.com/USER"
@@ -890,7 +899,7 @@ class TwitterLikesExtractor(TwitterExtractor):
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
- pattern = BASE_PATTERN + r"/i/bookmarks()"
+ pattern = rf"{BASE_PATTERN}/i/bookmarks()"
example = "https://x.com/i/bookmarks"
def tweets(self):
@@ -898,7 +907,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
def _transform_tweet(self, tweet):
tdata = TwitterExtractor._transform_tweet(self, tweet)
- tdata["date_bookmarked"] = text.parse_timestamp(
+ tdata["date_bookmarked"] = self.parse_timestamp(
(int(tweet["sortIndex"] or 0) >> 20) // 1000)
return tdata
@@ -906,7 +915,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
subcategory = "list"
- pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/i/lists/(\d+)/?$"
example = "https://x.com/i/lists/12345"
def tweets(self):
@@ -916,7 +925,7 @@ class TwitterListExtractor(TwitterExtractor):
class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
- pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
+ pattern = rf"{BASE_PATTERN}/i/lists/(\d+)/members"
example = "https://x.com/i/lists/12345/members"
def items(self):
@@ -952,7 +961,7 @@ class TwitterCommunityExtractor(TwitterExtractor):
directory_fmt = ("{category}", "Communities",
"{community[name]} ({community[id]})")
archive_fmt = "C_{community[id]}_{tweet_id}_{num}"
- pattern = BASE_PATTERN + r"/i/communities/(\d+)"
+ pattern = rf"{BASE_PATTERN}/i/communities/(\d+)"
example = "https://x.com/i/communities/12345"
def tweets(self):
@@ -966,7 +975,7 @@ class TwitterCommunitiesExtractor(TwitterExtractor):
subcategory = "communities"
directory_fmt = TwitterCommunityExtractor.directory_fmt
archive_fmt = TwitterCommunityExtractor.archive_fmt
- pattern = BASE_PATTERN + r"/([^/?#]+)/communities/?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/communities/?$"
example = "https://x.com/i/communities"
def tweets(self):
@@ -978,7 +987,7 @@ class TwitterEventExtractor(TwitterExtractor):
subcategory = "event"
directory_fmt = ("{category}", "Events",
"{event[id]} {event[short_title]}")
- pattern = BASE_PATTERN + r"/i/events/(\d+)"
+ pattern = rf"{BASE_PATTERN}/i/events/(\d+)"
example = "https://x.com/i/events/12345"
def metadata(self):
@@ -991,7 +1000,7 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for individual tweets"""
subcategory = "tweet"
- pattern = (BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
+ pattern = (rf"{BASE_PATTERN}/([^/?#]+|i/web)/status/(\d+)"
r"/?(?:$|\?|#|photo/|video/)")
example = "https://x.com/USER/status/12345"
@@ -1072,7 +1081,7 @@ class TwitterTweetExtractor(TwitterExtractor):
class TwitterQuotesExtractor(TwitterExtractor):
"""Extractor for quotes of a Tweet"""
subcategory = "quotes"
- pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
+ pattern = rf"{BASE_PATTERN}/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
example = "https://x.com/USER/status/12345/quotes"
def items(self):
@@ -1096,7 +1105,7 @@ class TwitterInfoExtractor(TwitterExtractor):
else:
user = api.user_by_screen_name(screen_name)
- return iter(((Message.Directory, self._transform_user(user)),))
+ return iter(((Message.Directory, "", self._transform_user(user)),))
class TwitterAvatarExtractor(TwitterExtractor):
@@ -1162,7 +1171,7 @@ class TwitterImageExtractor(Extractor):
"_fallback": TwitterExtractor._image_fallback(self, base),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, base + self._size_image, data
@@ -1369,7 +1378,7 @@ class TwitterAPI():
endpoint = "/graphql/E8Wq-_jFSaU7hxVcuOPR9g/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withQuickPromoteEligibilityTweetFields": False,
"withVoice": True,
@@ -1384,7 +1393,7 @@ class TwitterAPI():
endpoint = "/graphql/-O3QOHrVn1aOm_cF5wyTCQ/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withCommunity": True,
"withVoice": True,
@@ -1399,7 +1408,7 @@ class TwitterAPI():
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withVoice": True,
}
@@ -1413,7 +1422,7 @@ class TwitterAPI():
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withClientEventToken": False,
"withBirdwatchNotes": False,
@@ -1429,7 +1438,7 @@ class TwitterAPI():
endpoint = "/graphql/TGEKkJG_meudeaFcqaxM-Q/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withClientEventToken": False,
"withBirdwatchNotes": False,
@@ -1444,32 +1453,45 @@ class TwitterAPI():
def user_bookmarks(self):
endpoint = "/graphql/pLtjrO4ubNh996M_Cubwsg/Bookmarks"
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
}
return self._pagination_tweets(
endpoint, variables, ("bookmark_timeline_v2", "timeline"),
stop_tweets=128)
- def search_timeline(self, query, product="Latest"):
+ def search_timeline(self, query, product=None):
+ cfg = self.extractor.config
+
+ if product is None:
+ if product := cfg("search-results"):
+ product = {
+ "top" : "Top",
+ "live" : "Latest",
+ "user" : "People",
+ "media": "Media",
+ "list" : "Lists",
+ }.get(product.lower(), product).capitalize()
+ else:
+ product = "Latest"
+
endpoint = "/graphql/4fpceYZ6-YQCx_JSl_Cn_A/SearchTimeline"
variables = {
"rawQuery": query,
- "count": self.extractor.config("search-limit", 20),
+ "count": cfg("search-limit", 20),
"querySource": "typed_query",
"product": product,
"withGrokTranslatedBio": False,
}
- if self.extractor.config("search-pagination") in (
- "max_id", "maxid", "id"):
+ if cfg("search-pagination") in ("max_id", "maxid", "id"):
update_variables = self._update_variables_search
else:
update_variables = None
- stop_tweets = self.extractor.config("search-stop")
+ stop_tweets = cfg("search-stop")
if stop_tweets is None or stop_tweets == "auto":
- stop_tweets = 3 if update_variables is None else 0
+ stop_tweets = 3
return self._pagination_tweets(
endpoint, variables,
@@ -1494,7 +1516,7 @@ class TwitterAPI():
endpoint = "/graphql/Nyt-88UX4-pPCImZNUl9RQ/CommunityTweetsTimeline"
variables = {
"communityId": community_id,
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"displayLocation": "Community",
"rankingMode": "Recency",
"withCommunity": True,
@@ -1508,7 +1530,7 @@ class TwitterAPI():
endpoint = "/graphql/ZniZ7AAK_VVu1xtSx1V-gQ/CommunityMediaTimeline"
variables = {
"communityId": community_id,
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"withCommunity": True,
}
return self._pagination_tweets(
@@ -1520,7 +1542,7 @@ class TwitterAPI():
endpoint = ("/graphql/p048a9n3hTPppQyK7FQTFw"
"/CommunitiesMainPageTimeline")
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"withCommunity": True,
}
return self._pagination_tweets(
@@ -1530,7 +1552,7 @@ class TwitterAPI():
def home_timeline(self):
endpoint = "/graphql/DXmgQYmIft1oLP6vMkJixw/HomeTimeline"
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"latestControlAvailable": True,
"withCommunity": True,
@@ -1541,7 +1563,7 @@ class TwitterAPI():
def home_latest_timeline(self):
endpoint = "/graphql/SFxmNKWfN9ySJcXG_tjX8g/HomeLatestTimeline"
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"latestControlAvailable": True,
}
@@ -1568,7 +1590,7 @@ class TwitterAPI():
endpoint = "/graphql/06JtmwM8k_1cthpFZITVVA/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
- "count": 100,
+ "count": self.extractor.config("limit", 50),
}
return self._pagination_tweets(
endpoint, variables, ("list", "tweets_timeline", "timeline"))
@@ -1654,10 +1676,8 @@ class TwitterAPI():
self.extractor._assign_user(user)
return user["rest_id"]
except KeyError:
- if "unavailable_message" in user:
- raise exception.NotFoundError(
- f"{user['unavailable_message'].get('text')} "
- f"({user.get('reason')})", False)
+ if user and user.get("__typename") == "UserUnavailable":
+ raise exception.NotFoundError(user["message"], False)
else:
raise exception.NotFoundError("user")
@@ -1700,7 +1720,7 @@ class TwitterAPI():
self.client_transaction.generate_transaction_id(method, path)
def _call(self, endpoint, params, method="GET", auth=True, root=None):
- url = (root or self.root) + endpoint
+ url = (self.root if root is None else root) + endpoint
while True:
if auth:
@@ -1877,8 +1897,17 @@ class TwitterAPI():
features=None, field_toggles=None):
extr = self.extractor
original_retweets = (extr.retweets == "original")
- pinned_tweet = extr.pinned
+ pinned_tweet = True if extr.pinned else None
stop_tweets_max = stop_tweets
+ api_retries = None
+
+ if isinstance(count := variables.get("count"), list):
+ count = count.copy()
+ count.reverse()
+ self.log.debug("Using 'count: %s'", count[-1])
+ variables["count"] = count.pop()
+ else:
+ count = False
params = {"variables": None}
if cursor := extr._init_cursor():
@@ -1892,14 +1921,14 @@ class TwitterAPI():
while True:
params["variables"] = self._json_dumps(variables)
- data = self._call(endpoint, params)["data"]
+ data = self._call(endpoint, params)
try:
if path is None:
- instructions = (data["user"]["result"]["timeline"]
+ instructions = (data["data"]["user"]["result"]["timeline"]
["timeline"]["instructions"])
else:
- instructions = data
+ instructions = data["data"]
for key in path:
instructions = instructions[key]
instructions = instructions["instructions"]
@@ -1916,7 +1945,7 @@ class TwitterAPI():
elif instr_type == "TimelineAddToModule":
entries = instr["moduleItems"]
elif instr_type == "TimelinePinEntry":
- if pinned_tweet:
+ if pinned_tweet is not None:
pinned_tweet = instr["entry"]
elif instr_type == "TimelineReplaceEntry":
entry = instr["entry"]
@@ -1930,6 +1959,26 @@ class TwitterAPI():
except LookupError:
extr.log.debug(data)
+ if errors := data.get("errors"):
+ if api_retries is None:
+ api_tries = 1
+ api_retries = extr.config("retries-api", 9)
+ if api_retries < 0:
+ api_retries = float("inf")
+
+ err = []
+ srv = False
+ for e in errors:
+ err.append(f"- '{e.get('message') or e.get('name')}'")
+ if e.get("source") == "Server":
+ srv = True
+
+ self.log.warning("API errors (%s/%s):\n%s",
+ api_tries, api_retries+1, "\n".join(err))
+ if srv and api_tries <= api_retries:
+ api_tries += 1
+ continue
+
if user := extr._user_obj:
user = user["legacy"]
if user.get("blocked_by"):
@@ -1950,14 +1999,13 @@ class TwitterAPI():
"Unable to retrieve Tweets from this timeline")
tweets = []
- tweet = None
+ tweet = last_tweet = retry = None
+ api_tries = 1
- if pinned_tweet:
- if isinstance(pinned_tweet, dict):
- tweets.append(pinned_tweet)
- elif instructions[-1]["type"] == "TimelinePinEntry":
- tweets.append(instructions[-1]["entry"])
- pinned_tweet = False
+ if pinned_tweet is not None and isinstance(pinned_tweet, dict):
+ pinned_tweet["pinned"] = True
+ tweets.append(pinned_tweet)
+ pinned_tweet = None
for entry in entries:
esw = entry["entryId"].startswith
@@ -1965,6 +2013,7 @@ class TwitterAPI():
if esw("tweet-"):
tweets.append(entry)
elif esw(("profile-grid-",
+ "search-grid-",
"communities-grid-")):
if "content" in entry:
tweets.extend(entry["content"]["items"])
@@ -1988,6 +2037,28 @@ class TwitterAPI():
tweet = True
cursor = cursor.get("value")
+ if pinned_tweet is not None:
+ if extr._user_obj is None:
+ pinned = None
+ elif pinned := extr._user_obj["legacy"].get(
+ "pinned_tweet_ids_str"):
+ pinned = f"-tweet-{pinned[0]}"
+ for idx, entry in enumerate(tweets):
+ if entry["entryId"].endswith(pinned):
+ # mark as pinned / set 'pinned = True'
+ pinned_tweet = (
+ (entry.get("content") or entry["item"])
+ ["itemContent"]["tweet_results"]["result"])
+ if "tweet" in pinned_tweet:
+ pinned_tweet = pinned_tweet["tweet"]
+ pinned_tweet["pinned"] = True
+ # move to front of 'tweets'
+ del tweets[idx]
+ tweets.insert(0, entry)
+ break
+ del pinned
+ pinned_tweet = None
+
for entry in tweets:
try:
item = ((entry.get("content") or entry["item"])
@@ -2015,6 +2086,16 @@ class TwitterAPI():
(entry.get("entryId") or "").rpartition("-")[2])
continue
+ if retry is None:
+ try:
+ tweet["core"]["user_results"]["result"]
+ retry = False
+ except KeyError:
+ self.log.warning("Received Tweet results without "
+ "'core' data ... Retrying")
+ retry = True
+ break
+
if "retweeted_status_result" in legacy:
try:
retweet = legacy["retweeted_status_result"]["result"]
@@ -2071,18 +2152,25 @@ class TwitterAPI():
tweet.get("rest_id"))
continue
- if tweet:
+ if retry:
+ continue
+ elif tweet:
stop_tweets = stop_tweets_max
last_tweet = tweet
- else:
- if stop_tweets <= 0:
+ elif stop_tweets <= 0:
+ if not count:
return extr._update_cursor(None)
+ self.log.debug("Switching to 'count: %s'", count[-1])
+ variables["count"] = count.pop()
+ continue
+ else:
self.log.debug(
"No Tweet results (%s/%s)",
stop_tweets_max - stop_tweets + 1, stop_tweets_max)
stop_tweets -= 1
if not cursor or cursor == variables.get("cursor"):
+ self.log.debug("No continuation cursor")
return extr._update_cursor(None)
if update_variables is None:
@@ -2169,7 +2257,7 @@ class TwitterAPI():
else:
variables["rawQuery"] = f"{query} {max_id}"
- if prefix := self.extractor._cursor_prefix:
+ if prefix := getattr(self.extractor, "_cursor_prefix", None):
self.extractor._cursor_prefix = \
f"{prefix.partition('_')[0]}_{tweet_id}/"
variables["cursor"] = None