summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-01-11 04:09:13 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2023-01-11 04:09:13 -0500
commitfe385c3ff784ba3d19454a35446502c0ec295893 (patch)
tree897982793ef2a0c0f349044bf4cf803ccd483e6e /gallery_dl/extractor/twitter.py
parentebdfcd3cd3f76534a590ba08933ff7ea54813316 (diff)
New upstream version 1.24.3.upstream/1.24.3
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py74
1 files changed, 46 insertions, 28 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22aa78e..c2d8247 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -39,6 +39,7 @@ class TwitterExtractor(Extractor):
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self.cards_blacklist = self.config("cards-blacklist")
+ self.syndication = self.config("syndication")
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -75,11 +76,6 @@ class TwitterExtractor(Extractor):
else:
data = tweet
- if seen_tweets is not None:
- if data["id_str"] in seen_tweets:
- continue
- seen_tweets.add(data["id_str"])
-
if not self.retweets and "retweeted_status_id_str" in data:
self.log.debug("Skipping %s (retweet)", data["id_str"])
continue
@@ -97,6 +93,13 @@ class TwitterExtractor(Extractor):
self.log.debug("Skipping %s (reply)", data["id_str"])
continue
+ if seen_tweets is not None:
+ if data["id_str"] in seen_tweets:
+ self.log.debug(
+ "Skipping %s (previously seen)", data["id_str"])
+ continue
+ seen_tweets.add(data["id_str"])
+
files = []
if "extended_entities" in data:
self._extract_media(
@@ -220,14 +223,16 @@ class TwitterExtractor(Extractor):
def _extract_twitpic(self, tweet, files):
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
- if "//twitpic.com/" in url and "/photos/" not in url:
- response = self.request(url, fatal=False)
- if response.status_code >= 400:
- continue
- url = text.extr(
- response.text, 'name="twitter:image" value="', '"')
- if url:
- files.append({"url": url})
+ if "//twitpic.com/" not in url or "/photos/" in url:
+ continue
+ if url.startswith("http:"):
+ url = "https" + url[4:]
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ continue
+ url = text.extr(response.text, 'name="twitter:image" value="', '"')
+ if url:
+ files.append({"url": url})
def _transform_tweet(self, tweet):
if "author" in tweet:
@@ -299,6 +304,9 @@ class TwitterExtractor(Extractor):
if "legacy" in user:
user = user["legacy"]
+ elif "statuses_count" not in user and self.syndication == "extended":
+ # try to fetch extended user data
+ user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
uget = user.get
entities = user["entities"]
@@ -361,18 +369,22 @@ class TwitterExtractor(Extractor):
def _expand_tweets(self, tweets):
seen = set()
for tweet in tweets:
-
- if "legacy" in tweet:
- cid = tweet["legacy"]["conversation_id_str"]
- else:
- cid = tweet["conversation_id_str"]
-
- if cid not in seen:
- seen.add(cid)
- try:
- yield from self.api.tweet_detail(cid)
- except Exception:
- yield tweet
+ obj = tweet["legacy"] if "legacy" in tweet else tweet
+ cid = obj.get("conversation_id_str")
+ if not cid:
+ tid = obj["id_str"]
+ self.log.warning(
+ "Unable to expand %s (no 'conversation_id')", tid)
+ continue
+ if cid in seen:
+ self.log.debug(
+ "Skipping expansion of %s (previously seen)", cid)
+ continue
+ seen.add(cid)
+ try:
+ yield from self.api.tweet_detail(cid)
+ except Exception:
+ yield tweet
def _make_tweet(self, user, id_str, url, timestamp):
return {
@@ -772,7 +784,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# age-restricted (#2354)
("https://twitter.com/mightbecursed/status/1492954264909479936", {
"options": (("syndication", True),),
- "keywords": {"date": "dt:2022-02-13 20:10:09"},
+ "keyword": {"date": "dt:2022-02-13 20:10:09"},
"count": 1,
}),
# media alt texts / descriptions (#2617)
@@ -991,7 +1003,7 @@ class TwitterAPI():
}
self._nsfw_warning = True
- self._syndication = extractor.config("syndication")
+ self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
cookies = extractor.session.cookies
@@ -1516,6 +1528,12 @@ class TwitterAPI():
else:
retweet_id = None
+ # assume 'conversation_id' is the same as 'id' when the tweet
+ # is not a reply
+ if "conversation_id_str" not in tweet and \
+ "in_reply_to_status_id_str" not in tweet:
+ tweet["conversation_id_str"] = tweet["id_str"]
+
tweet["created_at"] = text.parse_datetime(
tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
"%a %b %d %H:%M:%S +0000 %Y")