aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py27
1 files changed, 25 insertions, 2 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 5e68f13..c47021e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -13,6 +13,7 @@ from .. import text, util, exception
from ..cache import cache
import itertools
import json
+import re
BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"
@@ -75,6 +76,10 @@ class TwitterExtractor(Extractor):
else:
seen_tweets = None
+ if self.twitpic:
+ self._find_twitpic = re.compile(
+ r"https?(://twitpic\.com/(?!photos/)\w+)").findall
+
for tweet in self.tweets():
if "legacy" in tweet:
@@ -231,12 +236,24 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _extract_twitpic(self, tweet, files):
- for url in tweet["entities"].get("urls", ()):
+ urls = {}
+
+ # collect URLs from entities
+ for url in tweet["entities"].get("urls") or ():
url = url["expanded_url"]
if "//twitpic.com/" not in url or "/photos/" in url:
continue
if url.startswith("http:"):
url = "https" + url[4:]
+ urls[url] = None
+
+ # collect URLs from text
+ for url in self._find_twitpic(
+ tweet.get("full_text") or tweet.get("text") or ""):
+ urls["https" + url] = None
+
+ # extract actual URLs
+ for url in urls:
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
@@ -781,7 +798,13 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/112900228289540096", {
"options": (("twitpic", True), ("cards", False)),
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
- "count": 3,
+ "count": 2, # 1 duplicate
+ }),
+ # TwitPic URL not in 'urls' (#3792)
+ ("https://twitter.com/shimoigusaP/status/8138669971", {
+ "options": (("twitpic", True),),
+ "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png",
+ "count": 1,
}),
# Twitter card (#1005)
("https://twitter.com/billboard/status/1306599586602135555", {