diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 89 |
1 files changed, 74 insertions, 15 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 4c947e7..2737d34 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -15,7 +15,7 @@ import json BASE_PATTERN = ( r"(?:https?://)?(?:www\.|mobile\.)?" - r"(?:(?:fx)?twitter\.com|nitter\.net)" + r"(?:(?:[fv]x)?twitter\.com|nitter\.net)" ) @@ -39,7 +39,7 @@ class TwitterExtractor(Extractor): self.pinned = self.config("pinned", False) self.quoted = self.config("quoted", False) self.videos = self.config("videos", True) - self.cards = self.config("cards", True) + self.cards = self.config("cards", False) self._user_cache = {} self._init_sizes() @@ -104,6 +104,7 @@ class TwitterExtractor(Extractor): def _extract_media(self, tweet, entities, files): for media in entities: + descr = media.get("ext_alt_text") width = media["original_info"].get("width", 0) height = media["original_info"].get("height", 0) @@ -112,9 +113,10 @@ class TwitterExtractor(Extractor): files.append({ "url": "ytdl:{}/i/web/status/{}".format( self.root, tweet["id_str"]), - "width" : width, - "height" : height, - "extension": None, + "width" : width, + "height" : height, + "extension" : None, + "description": descr, }) elif self.videos: video_info = media["video_info"] @@ -123,22 +125,24 @@ class TwitterExtractor(Extractor): key=lambda v: v.get("bitrate", 0), ) files.append({ - "url" : variant["url"], - "width" : width, - "height" : height, - "bitrate" : variant.get("bitrate", 0), - "duration": video_info.get( + "url" : variant["url"], + "width" : width, + "height" : height, + "bitrate" : variant.get("bitrate", 0), + "duration" : video_info.get( "duration_millis", 0) / 1000, + "description": descr, }) elif "media_url_https" in media: url = media["media_url_https"] base, _, fmt = url.rpartition(".") base += "?format=" + fmt + "&name=" files.append(text.nameext_from_url(url, { - "url" : base + self._size_image, - "width" : width, - "height" : height, - "_fallback": self._image_fallback(base), + "url" : base + self._size_image, + "width" : width, + "height" : height, + "_fallback" : self._image_fallback(base), + "description": descr, })) else: files.append({"url": media["media_url"]}) @@ -323,6 +327,9 @@ class TwitterExtractor(Extractor): elif userfmt == "media": cls = TwitterMediaExtractor fmt = (self.root + "/id:{rest_id}/media").format_map + elif userfmt == "tweets": + cls = TwitterTweetsExtractor + fmt = (self.root + "/id:{rest_id}/tweets").format_map else: cls = None fmt = userfmt.format_map @@ -383,7 +390,7 @@ class TwitterExtractor(Extractor): class TwitterTimelineExtractor(TwitterExtractor): - """Extractor for Tweets from a user's timeline""" + """Extractor for a Twitter user timeline""" subcategory = "timeline" pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") @@ -400,6 +407,8 @@ class TwitterTimelineExtractor(TwitterExtractor): ("https://www.twitter.com/id:2976459548"), ("https://twitter.com/i/user/2976459548"), ("https://twitter.com/intent/user?user_id=2976459548"), + ("https://fxtwitter.com/supernaturepics"), + ("https://vxtwitter.com/supernaturepics"), ) def __init__(self, match): @@ -409,6 +418,52 @@ class TwitterTimelineExtractor(TwitterExtractor): self.user = "id:" + user_id def tweets(self): + tweets = (self.api.user_tweets(self.user) if self.retweets else + self.api.user_media(self.user)) + + # yield initial batch of (media) tweets + tweet = None + for tweet in tweets: + yield tweet + + if tweet is None: + return + + # get username + if not self.user.startswith("id:"): + username = self.user + elif "core" in tweet: + username = (tweet["core"]["user_results"]["result"] + ["legacy"]["screen_name"]) + else: + username = tweet["user"]["screen_name"] + + # get tweet data + if "legacy" in tweet: + tweet = tweet["legacy"] + + # yield search results starting from last tweet id + yield from self.api.search_adaptive( + "from:{} include:retweets include:nativeretweets max_id:{} " + "filter:images OR card_name:animated_gif OR filter:native_video" + .format(username, tweet["id_str"]) + ) + + +class TwitterTweetsExtractor(TwitterExtractor): + """Extractor for Tweets from a user's Tweets timeline""" + subcategory = "tweets" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)" + test = ( + ("https://twitter.com/supernaturepics/tweets", { + "range": "1-40", + "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", + }), + ("https://mobile.twitter.com/supernaturepics/tweets#t"), + ("https://www.twitter.com/id:2976459548/tweets"), + ) + + def tweets(self): return self.api.user_tweets(self.user) @@ -662,6 +717,10 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("syndication", True),), "count": 1, }), + # media alt texts / descriptions (#2617) + ("https://twitter.com/my0nruri/status/1528379296041299968", { + "keyword": {"description": "oc"} + }), ) def __init__(self, match): |
