summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py118
1 files changed, 102 insertions, 16 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index dc558c0..2a04463 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://twitter.com/"""
+"""Extractors for https://twitter.com/"""
from .common import Extractor, Message
from .. import text, exception
@@ -21,8 +21,11 @@ class TwitterExtractor(Extractor):
directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
+ cookiedomain = ".twitter.com"
root = "https://twitter.com"
sizes = (":orig", ":large", ":medium", ":small")
+ user_agent = ("Mozilla/5.0 (Windows NT 6.1; WOW64; "
+ "Trident/7.0; rv:11.0) like Gecko")
def __init__(self, match):
Extractor.__init__(self, match)
@@ -32,7 +35,7 @@ class TwitterExtractor(Extractor):
self.retweets = self.config("retweets", True)
self.twitpic = self.config("twitpic", False)
self.content = self.config("content", False)
- self.videos = self.config("videos", False)
+ self.videos = self.config("videos", True)
if self.content:
self._emoji_sub = re.compile(
@@ -117,7 +120,8 @@ class TwitterExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- page = self.request(self.root + "/login").text
+ headers = {"User-Agent": self.user_agent}
+ page = self.request(self.root + "/login", headers=headers).text
pos = page.index('name="authenticity_token"')
token = text.extract(page, 'value="', '"', pos-80)[0]
@@ -131,11 +135,15 @@ class TwitterExtractor(Extractor):
"redirect_after_login" : "",
"remember_me" : "1",
}
- response = self.request(url, method="POST", data=data)
-
+ response = self.request(url, method="POST", headers=headers, data=data)
if "/error" in response.url:
raise exception.AuthenticationError()
- return self.session.cookies
+
+ return {
+ cookie.name: cookie.value
+ for cookie in self.session.cookies
+ if cookie.domain and "twitter.com" in cookie.domain
+ }
def _data_from_tweet(self, tweet):
extr = text.extract_from(tweet)
@@ -353,7 +361,11 @@ class TwitterTweetExtractor(TwitterExtractor):
# content with emoji, newlines, hashtags (#338)
("https://twitter.com/yumi_san0112/status/1151144618936823808", {
"options": (("content", True),),
- "keyword": "0b7a3d05607b480c1412dfd85f8606478313e7bf",
+ "keyword": {"content": (
+ "re:晴、お誕生日おめでとう🎉!\n実は下の名前が同じなので結構親近感ある"
+ "アイドルです✨\n今年の晴ちゃんめちゃくちゃ可愛い路線攻めてるから、そろ"
+ "そろまたかっこいい晴が見たいですねw\n#結城晴生誕祭2019\n#結城晴生誕祭"
+ )},
}),
# Reply to another tweet (#403)
("https://twitter.com/tyson_hesse/status/1103767554424598528", {
@@ -365,9 +377,12 @@ class TwitterTweetExtractor(TwitterExtractor):
"pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
}),
# quoted tweet (#526)
- ("https://twitter.com/Meiyu_miu/status/1070693241413021696", {
- "count": 4,
- "keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8",
+ ("https://twitter.com/Pistachio/status/1222690391817932803", {
+ "pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg",
+ "keyword": {
+ "author": {"name": "Afro_Herper", "id": 786047748508221440},
+ "user" : {"name": "Pistachio" , "id": 3533231},
+ },
}),
# TwitPic embeds (#579)
("https://twitter.com/i/web/status/112900228289540096", {
@@ -384,11 +399,7 @@ class TwitterTweetExtractor(TwitterExtractor):
def tweets(self):
url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
cookies = {"app_shell_visited": "1"}
- headers = {
- "Referer" : url,
- "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; "
- "Trident/7.0; rv:11.0) like Gecko",
- }
+ headers = {"User-Agent": self.user_agent, "Referer": url}
response = self.request(url, cookies=cookies, headers=headers)
if response.history and response.url == self.root + "/":
@@ -400,6 +411,81 @@ class TwitterTweetExtractor(TwitterExtractor):
return (page[beg:end],)
+class TwitterBookmarkExtractor(TwitterExtractor):
+ """Extractor for bookmarked tweets"""
+ subcategory = "bookmark"
+ pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
+ test = ("https://twitter.com/i/bookmarks",)
+
+ def items(self):
+ self.login()
+ if not self.logged_in:
+ raise exception.AuthorizationError("Login required")
+ for cookie in self.session.cookies:
+ cookie.expires = None
+
+ url = "https://api.twitter.com/2/timeline/bookmark.json"
+ params = {
+ "include_profile_interstitial_type": "1",
+ "include_blocking": "1",
+ "include_blocked_by": "1",
+ "include_followed_by": "1",
+ "include_want_retweets": "1",
+ "include_mute_edge": "1",
+ "include_can_dm": "1",
+ "include_can_media_tag": "1",
+ "skip_status": "1",
+ "cards_platform": "Web-12",
+ "include_cards": "1",
+ "include_composer_source": "true",
+ "include_ext_alt_text": "true",
+ "include_reply_count": "1",
+ "tweet_mode": "extended",
+ "include_entities": "true",
+ "include_user_entities": "true",
+ "include_ext_media_color": "true",
+ "include_ext_media_availability": "true",
+ "send_error_codes": "true",
+ "simple_quoted_tweets": "true",
+ "count": "100",
+ "cursor": None,
+ "ext": "mediaStats%2CcameraMoment",
+ }
+ headers = {
+ "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
+ "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
+ "4FA33AGWWjCpTnA",
+ "Origin": self.root,
+ "Referer": self.root + "/i/bookmarks",
+ "x-csrf-token": self.session.cookies.get("ct0"),
+ "x-twitter-active-user": "yes",
+ "x-twitter-auth-type": "Auth2Session",
+ "x-twitter-client-language": "en",
+ }
+
+ while True:
+ response = self.request(
+ url, params=params, headers=headers, fatal=False)
+ if response.status_code >= 400:
+ raise exception.StopExtraction(response.text)
+ data = response.json()
+ tweets = data["globalObjects"]["tweets"]
+
+ if not tweets:
+ return
+ for tweet_id, tweet_data in tweets.items():
+ tweet_url = "{}/i/web/status/{}".format(self.root, tweet_id)
+ tweet_data["_extractor"] = TwitterTweetExtractor
+ yield Message.Queue, tweet_url, tweet_data
+
+ inst = data["timeline"]["instructions"][0]
+ for entry in inst["addEntries"]["entries"]:
+ if entry["entryId"].startswith("cursor-bottom-"):
+ params["cursor"] = \
+ entry["content"]["operation"]["cursor"]["value"]
+ break
+
+
@memcache()
def _guest_token(extr, headers):
return extr.request(