summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-03-25 02:57:44 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-03-25 02:57:44 -0400
commit6e662211019a89caec44de8a57c675872b0b5498 (patch)
tree5d9d5a2b7efc3a24dd6074e99b253b639fe5af1d /gallery_dl/extractor/twitter.py
parent01166fa52707cc282467427cf0e65c1b8983c4be (diff)
New upstream version 1.26.9.upstream/1.26.9
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py70
1 files changed, 53 insertions, 17 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ad5bfc6..a5bd984 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -340,6 +340,8 @@ class TwitterExtractor(Extractor):
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
+ if "birdwatch_pivot" in tweet:
+ tdata["birdwatch"] = tweet["birdwatch_pivot"]["subtitle"]["text"]
if "in_reply_to_screen_name" in legacy:
tdata["reply_to"] = legacy["in_reply_to_screen_name"]
if "quoted_by" in legacy:
@@ -380,6 +382,7 @@ class TwitterExtractor(Extractor):
"date" : text.parse_datetime(
uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
"verified" : uget("verified", False),
+ "protected" : uget("protected", False),
"profile_banner" : uget("profile_banner_url", ""),
"profile_image" : uget(
"profile_image_url_https", "").replace("_normal.", "."),
@@ -731,9 +734,9 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
- """Extractor for images from individual tweets"""
+ """Extractor for individual tweets"""
subcategory = "tweet"
- pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$"
example = "https://twitter.com/USER/status/12345"
def __init__(self, match):
@@ -810,6 +813,18 @@ class TwitterTweetExtractor(TwitterExtractor):
return itertools.chain(buffer, tweets)
+class TwitterQuotesExtractor(TwitterExtractor):
+ """Extractor for quotes of a Tweet"""
+ subcategory = "quotes"
+ pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
+ example = "https://twitter.com/USER/status/12345/quotes"
+
+ def items(self):
+ url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
+ data = {"_extractor": TwitterSearchExtractor}
+ yield Message.Queue, url, data
+
+
class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
@@ -882,6 +897,7 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
+ self.log = extractor.log
self.root = "https://twitter.com/i/api"
self._nsfw_warning = True
@@ -1244,7 +1260,7 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
endpoint = "/1.1/guest/activate.json"
- self.extractor.log.info("Requesting guest token")
+ self.log.info("Requesting guest token")
return str(self._call(
endpoint, None, "POST", False, "https://api.twitter.com",
)["guest_token"])
@@ -1274,17 +1290,35 @@ class TwitterAPI():
if response.status_code < 400:
data = response.json()
- if not data.get("errors") or not any(
- (e.get("message") or "").lower().startswith("timeout")
- for e in data["errors"]):
- return data # success or non-timeout errors
- msg = data["errors"][0].get("message") or "Unspecified"
- self.extractor.log.debug("Internal Twitter error: '%s'", msg)
+ errors = data.get("errors")
+ if not errors:
+ return data
- if self.headers["x-twitter-auth-type"]:
- self.extractor.log.debug("Retrying API request")
- continue # retry
+ retry = False
+ for error in errors:
+ msg = error.get("message") or "Unspecified"
+ self.log.debug("API error: '%s'", msg)
+
+ if "this account is temporarily locked" in msg:
+ msg = "Account temporarily locked"
+ if self.extractor.config("locked") != "wait":
+ raise exception.AuthorizationError(msg)
+ self.log.warning("%s. Press ENTER to retry.", msg)
+ try:
+ input()
+ except (EOFError, OSError):
+ pass
+ retry = True
+
+ elif msg.lower().startswith("timeout"):
+ retry = True
+
+ if not retry:
+ return data
+ elif self.headers["x-twitter-auth-type"]:
+ self.log.debug("Retrying API request")
+ continue
# fall through to "Login Required"
response.status_code = 404
@@ -1374,7 +1408,7 @@ class TwitterAPI():
try:
tweet = tweets[tweet_id]
except KeyError:
- self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
+ self.log.debug("Skipping %s (deleted)", tweet_id)
continue
if "retweeted_status_id_str" in tweet:
@@ -1606,8 +1640,10 @@ class TwitterAPI():
variables["cursor"] = cursor
def _pagination_users(self, endpoint, variables, path=None):
- params = {"variables": None,
- "features" : self._json_dumps(self.features_pagination)}
+ params = {
+ "variables": None,
+ "features" : self._json_dumps(self.features_pagination),
+ }
while True:
cursor = entry = None
@@ -1651,9 +1687,9 @@ class TwitterAPI():
if text.startswith("Age-restricted"):
if self._nsfw_warning:
self._nsfw_warning = False
- self.extractor.log.warning('"%s"', text)
+ self.log.warning('"%s"', text)
- self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
+ self.log.debug("Skipping %s ('%s')", tweet_id, text)
@cache(maxage=365*86400, keyarg=1)