summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py58
1 files changed, 40 insertions, 18 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 1e985e3..2530040 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,6 +15,12 @@ import hashlib
import time
+BASE_PATTERN = (
+ r"(?:https?://)?(?:www\.|mobile\.)?"
+ r"(?:twitter\.com|nitter\.net)"
+)
+
+
class TwitterExtractor(Extractor):
"""Base class for twitter extractors"""
category = "twitter"
@@ -42,9 +48,14 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
- if (not self.retweets and "retweeted_status_id_str" in tweet or
- not self.replies and "in_reply_to_user_id_str" in tweet or
- not self.quoted and "quoted" in tweet):
+ if not self.retweets and "retweeted_status_id_str" in tweet:
+ self.log.debug("Skipping %s (retweet)", tweet["id_str"])
+ continue
+ if not self.replies and "in_reply_to_user_id_str" in tweet:
+ self.log.debug("Skipping %s (reply)", tweet["id_str"])
+ continue
+ if not self.quoted and "quoted" in tweet:
+ self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
if self.twitpic:
@@ -234,8 +245,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/(?!search)([^/?&#]+)/?(?:$|[?#])")
+ pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/?(?:$|[?#])"
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
@@ -251,8 +261,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for all images from a user's Media Tweets"""
subcategory = "media"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/(?!search)([^/?&#]+)/media(?!\w)")
+ pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/media(?!\w)"
test = (
("https://twitter.com/supernaturepics/media", {
"range": "1-40",
@@ -268,8 +277,7 @@ class TwitterMediaExtractor(TwitterExtractor):
class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/(?!search)([^/?&#]+)/likes(?!\w)")
+ pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/likes(?!\w)"
test = ("https://twitter.com/supernaturepics/likes",)
def tweets(self):
@@ -279,7 +287,7 @@ class TwitterLikesExtractor(TwitterExtractor):
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
- pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
+ pattern = BASE_PATTERN + r"/i/bookmarks()"
test = ("https://twitter.com/i/bookmarks",)
def tweets(self):
@@ -290,8 +298,7 @@ class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for all images from a search timeline"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/search/?\?(?:[^&#]+&)*q=([^&#]+)")
+ pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
test = ("https://twitter.com/search?q=nature", {
"range": "1-40",
"count": 40,
@@ -307,8 +314,7 @@ class TwitterSearchExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/([^/?&#]+|i/web)/status/(\d+)")
+ pattern = BASE_PATTERN + r"/([^/?&#]+|i/web)/status/(\d+)"
test = (
("https://twitter.com/supernaturepics/status/604341487988576256", {
"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
@@ -357,6 +363,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
"count": 3,
}),
+ # Nitter tweet
+ ("https://nitter.net/ed1conf/status/1163841619336007680", {
+ "url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98",
+ "content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
+ }),
)
def __init__(self, match):
@@ -474,7 +485,10 @@ class TwitterAPI():
"variables": '{"screen_name":"' + screen_name + '"'
',"withHighlightedLabel":true}'
}
- return self._call(endpoint, params)["data"]["user"]
+ try:
+ return self._call(endpoint, params)["data"]["user"]
+ except KeyError:
+ raise exception.NotFoundError("user")
@cache(maxage=3600)
def _guest_token(self):
@@ -491,8 +505,16 @@ class TwitterAPI():
if response.status_code == 429:
self.extractor.wait(until=response.headers["x-rate-limit-reset"])
return self._call(endpoint, params)
+
+ try:
+ msg = ", ".join(
+ '"' + error["message"] + '"'
+ for error in response.json()["errors"]
+ )
+ except Exception:
+ msg = response.text
raise exception.StopExtraction(
- "%s %s (%s)", response.status_code, response.reason, response.text)
+ "%s %s (%s)", response.status_code, response.reason, msg)
def _pagination(self, endpoint, params=None,
entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
@@ -517,8 +539,8 @@ class TwitterAPI():
entry["content"]["item"]["content"]["tweet"]["id"]]
except KeyError:
self.extractor.log.debug(
- "Skipping unavailable Tweet %s",
- entry["entryId"][6:])
+ "Skipping %s (deleted)",
+ entry["entryId"][len(entry_tweet):])
continue
tweet["user"] = users[tweet["user_id_str"]]