summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py77
1 files changed, 54 insertions, 23 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 0df4ea2..ba0597e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,6 +41,7 @@ class TwitterExtractor(Extractor):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
+ self.cards_blacklist = self.config("cards-blacklist")
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -154,8 +155,11 @@ class TwitterExtractor(Extractor):
})
elif "media_url_https" in media:
url = media["media_url_https"]
- base, _, fmt = url.rpartition(".")
- base += "?format=" + fmt + "&name="
+ if url[-4] == ".":
+ base, _, fmt = url.rpartition(".")
+ base += "?format=" + fmt + "&name="
+ else:
+ base = url.rpartition("=")[0] + "="
files.append(text.nameext_from_url(url, {
"url" : base + self._size_image,
"width" : width,
@@ -174,15 +178,23 @@ class TwitterExtractor(Extractor):
card = tweet["card"]
if "legacy" in card:
card = card["legacy"]
- name = card["name"]
+
+ name = card["name"].rpartition(":")[2]
+ bvals = card["binding_values"]
+ if isinstance(bvals, list):
+ bvals = {bval["key"]: bval["value"]
+ for bval in card["binding_values"]}
+
+ cbl = self.cards_blacklist
+ if cbl:
+ if name in cbl:
+ return
+ if "vanity_url" in bvals:
+ domain = bvals["vanity_url"]["string_value"]
+ if domain in cbl or name + ":" + domain in cbl:
+ return
if name in ("summary", "summary_large_image"):
- bvals = card["binding_values"]
- if isinstance(bvals, list):
- bvals = {
- bval["key"]: bval["value"]
- for bval in card["binding_values"]
- }
for prefix in ("photo_image_full_size_",
"summary_photo_image_",
"thumbnail_image_"):
@@ -199,19 +211,9 @@ class TwitterExtractor(Extractor):
files.append(value)
return
elif name == "unified_card":
- bvals = card["binding_values"]
- if isinstance(bvals, list):
- for bval in card["binding_values"]:
- if bval["key"] == "unified_card":
- bval = bval["value"]["string_value"]
- break
- else:
- bval = bvals["unified_card"]["string_value"]
- data = json.loads(bval)
- if data.get("type") == "image_carousel_website":
- self._extract_media(
- tweet, data["media_entities"].values(), files)
- return
+ data = json.loads(bvals["unified_card"]["string_value"])
+ self._extract_media(tweet, data["media_entities"].values(), files)
+ return
if self.cards == "ytdl":
tweet_id = tweet.get("rest_id") or tweet["id_str"]
@@ -735,16 +737,33 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("cards", True),),
"pattern": r"https://pbs.twimg.com/card_img/\d+/",
}),
- # unified_card with image_carousel_website
+ # unified_card image_website (#2875)
+ ("https://twitter.com/i/web/status/1561674543323910144", {
+ "options": (("cards", True),),
+ "pattern": r"https://pbs\.twimg\.com/media/F.+=jpg",
+ }),
+ # unified_card image_carousel_website
("https://twitter.com/doax_vv_staff/status/1479438945662685184", {
"options": (("cards", True),),
"pattern": r"https://pbs\.twimg\.com/media/F.+=png",
"count": 6,
}),
+ # unified_card video_website (#2875)
+ ("https://twitter.com/bang_dream_1242/status/1561548715348746241", {
+ "options": (("cards", True),),
+ "pattern": r"https://video\.twimg\.com/amplify_video"
+ r"/1560607284333449216/vid/720x720/\w+\.mp4",
+ }),
# unified_card without type
("https://twitter.com/i/web/status/1466183847628865544", {
"count": 0,
}),
+ # 'cards-blacklist' option
+ ("https://twitter.com/i/web/status/1571141912295243776", {
+ "options": (("cards", "ytdl"),
+ ("cards-blacklist", ("twitch.tv",))),
+ "count": 0,
+ }),
# original retweets (#1026)
("https://twitter.com/jessica_3978/status/1296304589591810048", {
"options": (("retweets", "original"),),
@@ -776,12 +795,20 @@ class TwitterTweetExtractor(TwitterExtractor):
# age-restricted (#2354)
("https://twitter.com/mightbecursed/status/1492954264909479936", {
"options": (("syndication", True),),
+ "keywords": {"date": "dt:2022-02-13 20:10:09"},
"count": 1,
}),
# media alt texts / descriptions (#2617)
("https://twitter.com/my0nruri/status/1528379296041299968", {
"keyword": {"description": "oc"}
}),
+ # '?format=...&name=...'-style URLs
+ ("https://twitter.com/poco_dandy/status/1150646424461176832", {
+ "options": (("cards", True),),
+ "pattern": r"https://pbs.twimg.com/card_img/157\d+/\w+"
+ r"\?format=(jpg|png)&name=orig$",
+ "range": "1-2",
+ }),
)
def __init__(self, match):
@@ -1442,6 +1469,10 @@ class TwitterAPI():
else:
retweet_id = None
+ tweet["created_at"] = text.parse_datetime(
+ tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
+ "%a %b %d %H:%M:%S +0000 %Y")
+
if "video" in tweet:
video = tweet["video"]
video["variants"] = (max(