aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py27
1 files changed, 23 insertions, 4 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ad4dc46..ccba640 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
+import re
class TwitterExtractor(Extractor):
@@ -26,8 +27,13 @@ class TwitterExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.retweets = self.config("retweets", True)
+ self.content = self.config("content", False)
self.videos = self.config("videos", False)
+ if self.content:
+ self._emoji_sub = re.compile(
+ r'<img class="Emoji [^>]+ alt="([^"]+)"[^>]*>').sub
+
def items(self):
self.login()
yield Message.Version, 1
@@ -35,6 +41,7 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
data = self._data_from_tweet(tweet)
+
if not self.retweets and data["retweet_id"]:
continue
@@ -87,10 +94,9 @@ class TwitterExtractor(Extractor):
raise exception.AuthenticationError()
return self.session.cookies
- @staticmethod
- def _data_from_tweet(tweet):
+ def _data_from_tweet(self, tweet):
extr = text.extract_from(tweet)
- return {
+ data = {
"tweet_id" : text.parse_int(extr('data-tweet-id="' , '"')),
"retweet_id": text.parse_int(extr('data-retweet-id="', '"')),
"retweeter" : extr('data-retweeter="' , '"'),
@@ -99,6 +105,14 @@ class TwitterExtractor(Extractor):
"user_id" : text.parse_int(extr('data-user-id="' , '"')),
"date" : text.parse_timestamp(extr('data-time="', '"')),
}
+ if self.content:
+ content = extr('<div class="js-tweet-text-container">', '\n</div>')
+ if '<img class="Emoji ' in content:
+ content = self._emoji_sub(r"\1", content)
+ content = text.unescape(text.remove_html(content, "", ""))
+ cl, _, cr = content.rpartition("pic.twitter.com/")
+ data["content"] = cl if cl and len(cr) < 16 else content
+ return data
def _tweets_from_api(self, url):
params = {
@@ -186,6 +200,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("videos", True),),
"pattern": r"ytdl:https://twitter.com/perrypumas/status/\d+",
}),
+ # content with emoji, newlines, hashtags (#338)
+ ("https://twitter.com/yumi_san0112/status/1151144618936823808", {
+ "options": (("content", True),),
+ "keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e",
+ }),
)
def __init__(self, match):
@@ -199,4 +218,4 @@ class TwitterTweetExtractor(TwitterExtractor):
url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id)
page = self.request(url).text
return (text.extract(
- page, '<div class="tweet ', '<ul class="stats')[0],)
+ page, '<div class="tweet ', 'class="js-tweet-stats-container')[0],)