summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py325
1 files changed, 271 insertions, 54 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ba0597e..3dbadaa 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -14,10 +14,7 @@ from ..cache import cache
import itertools
import json
-BASE_PATTERN = (
- r"(?:https?://)?(?:www\.|mobile\.)?"
- r"(?:(?:[fv]x)?twitter\.com|nitter\.net)"
-)
+BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"
class TwitterExtractor(Extractor):
@@ -227,8 +224,8 @@ class TwitterExtractor(Extractor):
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
- url = text.extract(
- response.text, 'name="twitter:image" value="', '"')[0]
+ url = text.extr(
+ response.text, 'name="twitter:image" value="', '"')
if url:
files.append({"url": url})
@@ -377,6 +374,24 @@ class TwitterExtractor(Extractor):
except Exception:
yield tweet
+ def _make_tweet(self, user, id_str, url, timestamp):
+ return {
+ "created_at": text.parse_timestamp(timestamp).strftime(
+ "%a %b %d %H:%M:%S +0000 %Y"),
+ "id_str": id_str,
+ "lang": None,
+ "user": user,
+ "entities": {},
+ "extended_entities": {
+ "media": [
+ {
+ "original_info": {},
+ "media_url": url,
+ },
+ ],
+ },
+ }
+
def metadata(self):
"""Return general metadata"""
return {}
@@ -388,44 +403,7 @@ class TwitterExtractor(Extractor):
if not self._check_cookies(self.cookienames):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
-
- @cache(maxage=360*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- self.log.info("Logging in as %s", username)
-
- token = util.generate_token()
- self.session.cookies.clear()
- self.request(self.root + "/login")
-
- url = self.root + "/sessions"
- cookies = {
- "_mb_tk": token,
- }
- data = {
- "redirect_after_login" : "/",
- "remember_me" : "1",
- "authenticity_token" : token,
- "wfa" : "1",
- "ui_metrics" : "{}",
- "session[username_or_email]": username,
- "session[password]" : password,
- }
- response = self.request(
- url, method="POST", cookies=cookies, data=data)
-
- if "/account/login_verification" in response.url:
- raise exception.AuthenticationError(
- "Login with two-factor authentication is not supported")
-
- cookies = {
- cookie.name: cookie.value
- for cookie in self.session.cookies
- }
-
- if "/error" in response.url or "auth_token" not in cookies:
- raise exception.AuthenticationError()
- return cookies
+ self._update_cookies(_login_impl(self, username, password))
class TwitterTimelineExtractor(TwitterExtractor):
@@ -727,11 +705,6 @@ class TwitterTweetExtractor(TwitterExtractor):
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
"count": 3,
}),
- # Nitter tweet (#890)
- ("https://nitter.net/ed1conf/status/1163841619336007680", {
- "url": "4a9ea898b14d3c112f98562d0df75c9785e239d9",
- "content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
- }),
# Twitter card (#1005)
("https://twitter.com/billboard/status/1306599586602135555", {
"options": (("cards", True),),
@@ -850,6 +823,76 @@ class TwitterTweetExtractor(TwitterExtractor):
return itertools.chain(buffer, tweets)
+class TwitterAvatarExtractor(TwitterExtractor):
+ subcategory = "avatar"
+ filename_fmt = "avatar {date}.{extension}"
+ archive_fmt = "AV_{user[id]}_{date}"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
+ test = (
+ ("https://twitter.com/supernaturepics/photo", {
+ "pattern": r"https://pbs\.twimg\.com/profile_images"
+ r"/554585280938659841/FLVAlX18\.jpeg",
+ "keyword": {
+ "date": "dt:2015-01-12 10:26:49",
+ "extension": "jpeg",
+ "filename": "FLVAlX18",
+ "tweet_id": 554585280938659841,
+ },
+ }),
+ ("https://twitter.com/User16/photo", {
+ "count": 0,
+ }),
+ )
+
+ def tweets(self):
+ self.api._user_id_by_screen_name(self.user)
+ user = self._user_obj
+ url = user["legacy"]["profile_image_url_https"]
+
+ if url == ("https://abs.twimg.com/sticky"
+ "/default_profile_images/default_profile_normal.png"):
+ return ()
+
+ url = url.replace("_normal.", ".")
+ id_str = url.rsplit("/", 2)[1]
+ timestamp = ((int(id_str) >> 22) + 1288834974657) // 1000
+
+ return (self._make_tweet(user, id_str, url, timestamp),)
+
+
+class TwitterBackgroundExtractor(TwitterExtractor):
+ subcategory = "background"
+ filename_fmt = "background {date}.{extension}"
+ archive_fmt = "BG_{user[id]}_{date}"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
+ test = (
+ ("https://twitter.com/supernaturepics/header_photo", {
+ "pattern": r"https://pbs\.twimg\.com/profile_banners"
+ r"/2976459548/1421058583",
+ "keyword": {
+ "date": "dt:2015-01-12 10:29:43",
+ "filename": "1421058583",
+ "tweet_id": 0,
+ },
+ }),
+ ("https://twitter.com/User16/header_photo", {
+ "count": 0,
+ }),
+ )
+
+ def tweets(self):
+ self.api._user_id_by_screen_name(self.user)
+ user = user = self._user_obj
+
+ try:
+ url = user["legacy"]["profile_banner_url"]
+ _, timestamp = url.rsplit("/", 1)
+ except (KeyError, ValueError):
+ return ()
+
+ return (self._make_tweet(user, None, url, timestamp),)
+
+
class TwitterImageExtractor(Extractor):
category = "twitter"
subcategory = "image"
@@ -1021,7 +1064,7 @@ class TwitterAPI():
"count": 100,
}
return self._pagination_tweets(
- endpoint, variables, ("bookmark_timeline", "timeline"))
+ endpoint, variables, ("bookmark_timeline", "timeline"), False)
def list_latest_tweets_timeline(self, list_id):
endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
@@ -1253,7 +1296,8 @@ class TwitterAPI():
return
params["cursor"] = cursor
- def _pagination_tweets(self, endpoint, variables, path=None):
+ def _pagination_tweets(self, endpoint, variables,
+ path=None, stop_tweets=True):
extr = self.extractor
variables.update(self.variables)
original_retweets = (extr.retweets == "original")
@@ -1397,7 +1441,9 @@ class TwitterAPI():
tweet.get("rest_id"))
continue
- if not tweet or not cursor:
+ if stop_tweets and not tweet:
+ return
+ if not cursor or cursor == variables.get("cursor"):
return
variables["cursor"] = cursor
@@ -1456,8 +1502,8 @@ class TwitterAPI():
self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
def _syndication_tweet(self, tweet_id):
- tweet = self.extractor.request(
- "https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json()
+ base_url = "https://cdn.syndication.twimg.com/tweet-result?id="
+ tweet = self.extractor.request(base_url + tweet_id).json()
tweet["user"]["description"] = ""
tweet["user"]["entities"] = {"description": {}}
@@ -1500,3 +1546,174 @@ class TwitterAPI():
"core" : {"user_results": {"result": tweet["user"]}},
"_retweet_id_str": retweet_id,
}
+
+
+@cache(maxage=360*86400, keyarg=1)
+def _login_impl(extr, username, password):
+
+ import re
+ import random
+
+ if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username):
+ extr.log.warning(
+ "Login with email is no longer possible. "
+ "You need to provide your username or phone number instead.")
+
+ extr.log.info("Logging in as %s", username)
+
+ def process(response):
+ try:
+ data = response.json()
+ except ValueError:
+ data = {"errors": ({"message": "Invalid response"},)}
+ else:
+ if response.status_code < 400:
+ return data["flow_token"]
+
+ errors = []
+ for error in data.get("errors") or ():
+ msg = error.get("message")
+ errors.append('"{}"'.format(msg) if msg else "Unknown error")
+ extr.log.debug(response.text)
+ raise exception.AuthenticationError(", ".join(errors))
+
+ extr.session.cookies.clear()
+ api = TwitterAPI(extr)
+ headers = api.headers
+ headers["Referer"] = "https://twitter.com/i/flow/login"
+
+ # init
+ data = {
+ "input_flow_data": {
+ "flow_context": {
+ "debug_overrides": {},
+ "start_location": {"location": "unknown"},
+ },
+ },
+ "subtask_versions": {
+ "action_list": 2,
+ "alert_dialog": 1,
+ "app_download_cta": 1,
+ "check_logged_in_account": 1,
+ "choice_selection": 3,
+ "contacts_live_sync_permission_prompt": 0,
+ "cta": 7,
+ "email_verification": 2,
+ "end_flow": 1,
+ "enter_date": 1,
+ "enter_email": 2,
+ "enter_password": 5,
+ "enter_phone": 2,
+ "enter_recaptcha": 1,
+ "enter_text": 5,
+ "enter_username": 2,
+ "generic_urt": 3,
+ "in_app_notification": 1,
+ "interest_picker": 3,
+ "js_instrumentation": 1,
+ "menu_dialog": 1,
+ "notifications_permission_prompt": 2,
+ "open_account": 2,
+ "open_home_timeline": 1,
+ "open_link": 1,
+ "phone_verification": 4,
+ "privacy_options": 1,
+ "security_key": 3,
+ "select_avatar": 4,
+ "select_banner": 2,
+ "settings_list": 7,
+ "show_code": 1,
+ "sign_up": 2,
+ "sign_up_review": 4,
+ "tweet_selection_urt": 1,
+ "update_users": 1,
+ "upload_media": 1,
+ "user_recommendations_list": 4,
+ "user_recommendations_urt": 1,
+ "wait_spinner": 3,
+ "web_modal": 1,
+ },
+ }
+ url = "https://twitter.com/i/api/1.1/onboarding/task.json?flow_name=login"
+ response = extr.request(url, method="POST", headers=headers, json=data)
+
+ data = {
+ "flow_token": process(response),
+ "subtask_inputs": [
+ {
+ "subtask_id": "LoginJsInstrumentationSubtask",
+ "js_instrumentation": {
+ "response": "{}",
+ "link": "next_link",
+ },
+ },
+ ],
+ }
+ url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=None)
+
+ # username
+ data = {
+ "flow_token": process(response),
+ "subtask_inputs": [
+ {
+ "subtask_id": "LoginEnterUserIdentifierSSO",
+ "settings_list": {
+ "setting_responses": [
+ {
+ "key": "user_identifier",
+ "response_data": {
+ "text_data": {"result": username},
+ },
+ },
+ ],
+ "link": "next_link",
+ },
+ },
+ ],
+ }
+ # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ extr.sleep(random.uniform(2.0, 4.0), "login (username)")
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=None)
+
+ # password
+ data = {
+ "flow_token": process(response),
+ "subtask_inputs": [
+ {
+ "subtask_id": "LoginEnterPassword",
+ "enter_password": {
+ "password": password,
+ "link": "next_link",
+ },
+ },
+ ],
+ }
+ # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ extr.sleep(random.uniform(2.0, 4.0), "login (password)")
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=None)
+
+ # account duplication check ?
+ data = {
+ "flow_token": process(response),
+ "subtask_inputs": [
+ {
+ "subtask_id": "AccountDuplicationCheck",
+ "check_logged_in_account": {
+ "link": "AccountDuplicationCheck_false",
+ },
+ },
+ ],
+ }
+ # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=None)
+ process(response)
+
+ return {
+ cookie.name: cookie.value
+ for cookie in extr.session.cookies
+ }