summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py31
1 files changed, 26 insertions, 5 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index a0d6194..36b4806 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -64,6 +64,11 @@ class TwitterExtractor(Extractor):
tweets = self._expand_tweets(self.tweets())
self.tweets = lambda : tweets
+ if self.config("unique", True):
+ seen_tweets = set()
+ else:
+ seen_tweets = None
+
for tweet in self.tweets():
if "legacy" in tweet:
@@ -71,6 +76,11 @@ class TwitterExtractor(Extractor):
else:
data = tweet
+ if seen_tweets is not None:
+ if data["id_str"] in seen_tweets:
+ continue
+ seen_tweets.add(data["id_str"])
+
if not self.retweets and "retweeted_status_id_str" in data:
self.log.debug("Skipping %s (retweet)", data["id_str"])
continue
@@ -100,6 +110,7 @@ class TwitterExtractor(Extractor):
tdata = self._transform_tweet(tweet)
tdata.update(metadata)
+ tdata["count"] = len(files)
yield Message.Directory, tdata
for tdata["num"], file in enumerate(files, 1):
file.update(tdata)
@@ -259,7 +270,7 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
- content = tget("full_text") or tget("text") or ""
+ content = text.unescape(tget("full_text") or tget("text") or "")
urls = entities.get("urls")
if urls:
for url in urls:
@@ -440,12 +451,9 @@ class TwitterTimelineExtractor(TwitterExtractor):
self.user = "id:" + user_id
def tweets(self):
- tweets = (self.api.user_tweets if self.retweets else
- self.api.user_media)
-
# yield initial batch of (media) tweets
tweet = None
- for tweet in tweets(self.user):
+ for tweet in self._select_tweet_source()(self.user):
yield tweet
if tweet is None:
@@ -476,6 +484,19 @@ class TwitterTimelineExtractor(TwitterExtractor):
# yield search results starting from last tweet id
yield from self.api.search_adaptive(query)
+ def _select_tweet_source(self):
+ strategy = self.config("strategy")
+ if strategy is None or strategy == "auto":
+ if self.retweets or self.textonly:
+ return self.api.user_tweets
+ else:
+ return self.api.user_media
+ if strategy == "tweets":
+ return self.api.user_tweets
+ if strategy == "with_replies":
+ return self.api.user_tweets_and_replies
+ return self.api.user_media
+
class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""