diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 31 |
1 files changed, 26 insertions, 5 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index a0d6194..36b4806 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -64,6 +64,11 @@ class TwitterExtractor(Extractor): tweets = self._expand_tweets(self.tweets()) self.tweets = lambda : tweets + if self.config("unique", True): + seen_tweets = set() + else: + seen_tweets = None + for tweet in self.tweets(): if "legacy" in tweet: @@ -71,6 +76,11 @@ class TwitterExtractor(Extractor): else: data = tweet + if seen_tweets is not None: + if data["id_str"] in seen_tweets: + continue + seen_tweets.add(data["id_str"]) + if not self.retweets and "retweeted_status_id_str" in data: self.log.debug("Skipping %s (retweet)", data["id_str"]) continue @@ -100,6 +110,7 @@ class TwitterExtractor(Extractor): tdata = self._transform_tweet(tweet) tdata.update(metadata) + tdata["count"] = len(files) yield Message.Directory, tdata for tdata["num"], file in enumerate(files, 1): file.update(tdata) @@ -259,7 +270,7 @@ class TwitterExtractor(Extractor): "nick": u["name"], } for u in mentions] - content = tget("full_text") or tget("text") or "" + content = text.unescape(tget("full_text") or tget("text") or "") urls = entities.get("urls") if urls: for url in urls: @@ -440,12 +451,9 @@ class TwitterTimelineExtractor(TwitterExtractor): self.user = "id:" + user_id def tweets(self): - tweets = (self.api.user_tweets if self.retweets else - self.api.user_media) - # yield initial batch of (media) tweets tweet = None - for tweet in tweets(self.user): + for tweet in self._select_tweet_source()(self.user): yield tweet if tweet is None: @@ -476,6 +484,19 @@ class TwitterTimelineExtractor(TwitterExtractor): # yield search results starting from last tweet id yield from self.api.search_adaptive(query) + def _select_tweet_source(self): + strategy = self.config("strategy") + if strategy is None or strategy == "auto": + if self.retweets or self.textonly: + return self.api.user_tweets + else: + return self.api.user_media + if strategy == "tweets": + return self.api.user_tweets + if strategy == "with_replies": + return self.api.user_tweets_and_replies + return self.api.user_media + class TwitterTweetsExtractor(TwitterExtractor): """Extractor for Tweets from a user's Tweets timeline""" |
