diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 58 |
1 files changed, 40 insertions, 18 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 1e985e3..2530040 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -15,6 +15,12 @@ import hashlib import time +BASE_PATTERN = ( + r"(?:https?://)?(?:www\.|mobile\.)?" + r"(?:twitter\.com|nitter\.net)" +) + + class TwitterExtractor(Extractor): """Base class for twitter extractors""" category = "twitter" @@ -42,9 +48,14 @@ class TwitterExtractor(Extractor): for tweet in self.tweets(): - if (not self.retweets and "retweeted_status_id_str" in tweet or - not self.replies and "in_reply_to_user_id_str" in tweet or - not self.quoted and "quoted" in tweet): + if not self.retweets and "retweeted_status_id_str" in tweet: + self.log.debug("Skipping %s (retweet)", tweet["id_str"]) + continue + if not self.replies and "in_reply_to_user_id_str" in tweet: + self.log.debug("Skipping %s (reply)", tweet["id_str"]) + continue + if not self.quoted and "quoted" in tweet: + self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"]) continue if self.twitpic: @@ -234,8 +245,7 @@ class TwitterExtractor(Extractor): class TwitterTimelineExtractor(TwitterExtractor): """Extractor for all images from a user's timeline""" subcategory = "timeline" - pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/(?!search)([^/?&#]+)/?(?:$|[?#])") + pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/?(?:$|[?#])" test = ( ("https://twitter.com/supernaturepics", { "range": "1-40", @@ -251,8 +261,7 @@ class TwitterTimelineExtractor(TwitterExtractor): class TwitterMediaExtractor(TwitterExtractor): """Extractor for all images from a user's Media Tweets""" subcategory = "media" - pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/(?!search)([^/?&#]+)/media(?!\w)") + pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/media(?!\w)" test = ( ("https://twitter.com/supernaturepics/media", { "range": "1-40", @@ -268,8 +277,7 @@ class TwitterMediaExtractor(TwitterExtractor): class TwitterLikesExtractor(TwitterExtractor): """Extractor for liked tweets""" subcategory = "likes" - pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/(?!search)([^/?&#]+)/likes(?!\w)") + pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/likes(?!\w)" test = ("https://twitter.com/supernaturepics/likes",) def tweets(self): @@ -279,7 +287,7 @@ class TwitterLikesExtractor(TwitterExtractor): class TwitterBookmarkExtractor(TwitterExtractor): """Extractor for bookmarked tweets""" subcategory = "bookmark" - pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()" + pattern = BASE_PATTERN + r"/i/bookmarks()" test = ("https://twitter.com/i/bookmarks",) def tweets(self): @@ -290,8 +298,7 @@ class TwitterSearchExtractor(TwitterExtractor): """Extractor for all images from a search timeline""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/search/?\?(?:[^&#]+&)*q=([^&#]+)") + pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)" test = ("https://twitter.com/search?q=nature", { "range": "1-40", "count": 40, @@ -307,8 +314,7 @@ class TwitterSearchExtractor(TwitterExtractor): class TwitterTweetExtractor(TwitterExtractor): """Extractor for images from individual tweets""" subcategory = "tweet" - pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/([^/?&#]+|i/web)/status/(\d+)") + pattern = BASE_PATTERN + r"/([^/?&#]+|i/web)/status/(\d+)" test = ( ("https://twitter.com/supernaturepics/status/604341487988576256", { "url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580", @@ -357,6 +363,11 @@ class TwitterTweetExtractor(TwitterExtractor): "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg", "count": 3, }), + # Nitter tweet + ("https://nitter.net/ed1conf/status/1163841619336007680", { + "url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98", + "content": "f29501e44d88437fe460f5c927b7543fda0f6e34", + }), ) def __init__(self, match): @@ -474,7 +485,10 @@ class TwitterAPI(): "variables": '{"screen_name":"' + screen_name + '"' ',"withHighlightedLabel":true}' } - return self._call(endpoint, params)["data"]["user"] + try: + return self._call(endpoint, params)["data"]["user"] + except KeyError: + raise exception.NotFoundError("user") @cache(maxage=3600) def _guest_token(self): @@ -491,8 +505,16 @@ class TwitterAPI(): if response.status_code == 429: self.extractor.wait(until=response.headers["x-rate-limit-reset"]) return self._call(endpoint, params) + + try: + msg = ", ".join( + '"' + error["message"] + '"' + for error in response.json()["errors"] + ) + except Exception: + msg = response.text raise exception.StopExtraction( - "%s %s (%s)", response.status_code, response.reason, response.text) + "%s %s (%s)", response.status_code, response.reason, msg) def _pagination(self, endpoint, params=None, entry_tweet="tweet-", entry_cursor="cursor-bottom-"): @@ -517,8 +539,8 @@ class TwitterAPI(): entry["content"]["item"]["content"]["tweet"]["id"]] except KeyError: self.extractor.log.debug( - "Skipping unavailable Tweet %s", - entry["entryId"][6:]) + "Skipping %s (deleted)", + entry["entryId"][len(entry_tweet):]) continue tweet["user"] = users[tweet["user_id_str"]] |
