diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 325 |
1 files changed, 271 insertions, 54 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ba0597e..3dbadaa 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -14,10 +14,7 @@ from ..cache import cache import itertools import json -BASE_PATTERN = ( - r"(?:https?://)?(?:www\.|mobile\.)?" - r"(?:(?:[fv]x)?twitter\.com|nitter\.net)" -) +BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com" class TwitterExtractor(Extractor): @@ -227,8 +224,8 @@ class TwitterExtractor(Extractor): response = self.request(url, fatal=False) if response.status_code >= 400: continue - url = text.extract( - response.text, 'name="twitter:image" value="', '"')[0] + url = text.extr( + response.text, 'name="twitter:image" value="', '"') if url: files.append({"url": url}) @@ -377,6 +374,24 @@ class TwitterExtractor(Extractor): except Exception: yield tweet + def _make_tweet(self, user, id_str, url, timestamp): + return { + "created_at": text.parse_timestamp(timestamp).strftime( + "%a %b %d %H:%M:%S +0000 %Y"), + "id_str": id_str, + "lang": None, + "user": user, + "entities": {}, + "extended_entities": { + "media": [ + { + "original_info": {}, + "media_url": url, + }, + ], + }, + } + def metadata(self): """Return general metadata""" return {} @@ -388,44 +403,7 @@ class TwitterExtractor(Extractor): if not self._check_cookies(self.cookienames): username, password = self._get_auth_info() if username: - self._update_cookies(self._login_impl(username, password)) - - @cache(maxage=360*24*3600, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - token = util.generate_token() - self.session.cookies.clear() - self.request(self.root + "/login") - - url = self.root + "/sessions" - cookies = { - "_mb_tk": token, - } - data = { - "redirect_after_login" : "/", - "remember_me" : "1", - "authenticity_token" : token, - "wfa" : "1", - "ui_metrics" : "{}", - "session[username_or_email]": username, - "session[password]" : password, - } - response = self.request( - url, method="POST", cookies=cookies, data=data) - - if "/account/login_verification" in response.url: - raise exception.AuthenticationError( - "Login with two-factor authentication is not supported") - - cookies = { - cookie.name: cookie.value - for cookie in self.session.cookies - } - - if "/error" in response.url or "auth_token" not in cookies: - raise exception.AuthenticationError() - return cookies + self._update_cookies(_login_impl(self, username, password)) class TwitterTimelineExtractor(TwitterExtractor): @@ -727,11 +705,6 @@ class TwitterTweetExtractor(TwitterExtractor): "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg", "count": 3, }), - # Nitter tweet (#890) - ("https://nitter.net/ed1conf/status/1163841619336007680", { - "url": "4a9ea898b14d3c112f98562d0df75c9785e239d9", - "content": "f29501e44d88437fe460f5c927b7543fda0f6e34", - }), # Twitter card (#1005) ("https://twitter.com/billboard/status/1306599586602135555", { "options": (("cards", True),), @@ -850,6 +823,76 @@ class TwitterTweetExtractor(TwitterExtractor): return itertools.chain(buffer, tweets) +class TwitterAvatarExtractor(TwitterExtractor): + subcategory = "avatar" + filename_fmt = "avatar {date}.{extension}" + archive_fmt = "AV_{user[id]}_{date}" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo" + test = ( + ("https://twitter.com/supernaturepics/photo", { + "pattern": r"https://pbs\.twimg\.com/profile_images" + r"/554585280938659841/FLVAlX18\.jpeg", + "keyword": { + "date": "dt:2015-01-12 10:26:49", + "extension": "jpeg", + "filename": "FLVAlX18", + "tweet_id": 554585280938659841, + }, + }), + ("https://twitter.com/User16/photo", { + "count": 0, + }), + ) + + def tweets(self): + self.api._user_id_by_screen_name(self.user) + user = self._user_obj + url = user["legacy"]["profile_image_url_https"] + + if url == ("https://abs.twimg.com/sticky" + "/default_profile_images/default_profile_normal.png"): + return () + + url = url.replace("_normal.", ".") + id_str = url.rsplit("/", 2)[1] + timestamp = ((int(id_str) >> 22) + 1288834974657) // 1000 + + return (self._make_tweet(user, id_str, url, timestamp),) + + +class TwitterBackgroundExtractor(TwitterExtractor): + subcategory = "background" + filename_fmt = "background {date}.{extension}" + archive_fmt = "BG_{user[id]}_{date}" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo" + test = ( + ("https://twitter.com/supernaturepics/header_photo", { + "pattern": r"https://pbs\.twimg\.com/profile_banners" + r"/2976459548/1421058583", + "keyword": { + "date": "dt:2015-01-12 10:29:43", + "filename": "1421058583", + "tweet_id": 0, + }, + }), + ("https://twitter.com/User16/header_photo", { + "count": 0, + }), + ) + + def tweets(self): + self.api._user_id_by_screen_name(self.user) + user = user = self._user_obj + + try: + url = user["legacy"]["profile_banner_url"] + _, timestamp = url.rsplit("/", 1) + except (KeyError, ValueError): + return () + + return (self._make_tweet(user, None, url, timestamp),) + + class TwitterImageExtractor(Extractor): category = "twitter" subcategory = "image" @@ -1021,7 +1064,7 @@ class TwitterAPI(): "count": 100, } return self._pagination_tweets( - endpoint, variables, ("bookmark_timeline", "timeline")) + endpoint, variables, ("bookmark_timeline", "timeline"), False) def list_latest_tweets_timeline(self, list_id): endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline" @@ -1253,7 +1296,8 @@ class TwitterAPI(): return params["cursor"] = cursor - def _pagination_tweets(self, endpoint, variables, path=None): + def _pagination_tweets(self, endpoint, variables, + path=None, stop_tweets=True): extr = self.extractor variables.update(self.variables) original_retweets = (extr.retweets == "original") @@ -1397,7 +1441,9 @@ class TwitterAPI(): tweet.get("rest_id")) continue - if not tweet or not cursor: + if stop_tweets and not tweet: + return + if not cursor or cursor == variables.get("cursor"): return variables["cursor"] = cursor @@ -1456,8 +1502,8 @@ class TwitterAPI(): self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text) def _syndication_tweet(self, tweet_id): - tweet = self.extractor.request( - "https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json() + base_url = "https://cdn.syndication.twimg.com/tweet-result?id=" + tweet = self.extractor.request(base_url + tweet_id).json() tweet["user"]["description"] = "" tweet["user"]["entities"] = {"description": {}} @@ -1500,3 +1546,174 @@ class TwitterAPI(): "core" : {"user_results": {"result": tweet["user"]}}, "_retweet_id_str": retweet_id, } + + +@cache(maxage=360*86400, keyarg=1) +def _login_impl(extr, username, password): + + import re + import random + + if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username): + extr.log.warning( + "Login with email is no longer possible. " + "You need to provide your username or phone number instead.") + + extr.log.info("Logging in as %s", username) + + def process(response): + try: + data = response.json() + except ValueError: + data = {"errors": ({"message": "Invalid response"},)} + else: + if response.status_code < 400: + return data["flow_token"] + + errors = [] + for error in data.get("errors") or (): + msg = error.get("message") + errors.append('"{}"'.format(msg) if msg else "Unknown error") + extr.log.debug(response.text) + raise exception.AuthenticationError(", ".join(errors)) + + extr.session.cookies.clear() + api = TwitterAPI(extr) + headers = api.headers + headers["Referer"] = "https://twitter.com/i/flow/login" + + # init + data = { + "input_flow_data": { + "flow_context": { + "debug_overrides": {}, + "start_location": {"location": "unknown"}, + }, + }, + "subtask_versions": { + "action_list": 2, + "alert_dialog": 1, + "app_download_cta": 1, + "check_logged_in_account": 1, + "choice_selection": 3, + "contacts_live_sync_permission_prompt": 0, + "cta": 7, + "email_verification": 2, + "end_flow": 1, + "enter_date": 1, + "enter_email": 2, + "enter_password": 5, + "enter_phone": 2, + "enter_recaptcha": 1, + "enter_text": 5, + "enter_username": 2, + "generic_urt": 3, + "in_app_notification": 1, + "interest_picker": 3, + "js_instrumentation": 1, + "menu_dialog": 1, + "notifications_permission_prompt": 2, + "open_account": 2, + "open_home_timeline": 1, + "open_link": 1, + "phone_verification": 4, + "privacy_options": 1, + "security_key": 3, + "select_avatar": 4, + "select_banner": 2, + "settings_list": 7, + "show_code": 1, + "sign_up": 2, + "sign_up_review": 4, + "tweet_selection_urt": 1, + "update_users": 1, + "upload_media": 1, + "user_recommendations_list": 4, + "user_recommendations_urt": 1, + "wait_spinner": 3, + "web_modal": 1, + }, + } + url = "https://twitter.com/i/api/1.1/onboarding/task.json?flow_name=login" + response = extr.request(url, method="POST", headers=headers, json=data) + + data = { + "flow_token": process(response), + "subtask_inputs": [ + { + "subtask_id": "LoginJsInstrumentationSubtask", + "js_instrumentation": { + "response": "{}", + "link": "next_link", + }, + }, + ], + } + url = "https://twitter.com/i/api/1.1/onboarding/task.json" + response = extr.request( + url, method="POST", headers=headers, json=data, fatal=None) + + # username + data = { + "flow_token": process(response), + "subtask_inputs": [ + { + "subtask_id": "LoginEnterUserIdentifierSSO", + "settings_list": { + "setting_responses": [ + { + "key": "user_identifier", + "response_data": { + "text_data": {"result": username}, + }, + }, + ], + "link": "next_link", + }, + }, + ], + } + # url = "https://twitter.com/i/api/1.1/onboarding/task.json" + extr.sleep(random.uniform(2.0, 4.0), "login (username)") + response = extr.request( + url, method="POST", headers=headers, json=data, fatal=None) + + # password + data = { + "flow_token": process(response), + "subtask_inputs": [ + { + "subtask_id": "LoginEnterPassword", + "enter_password": { + "password": password, + "link": "next_link", + }, + }, + ], + } + # url = "https://twitter.com/i/api/1.1/onboarding/task.json" + extr.sleep(random.uniform(2.0, 4.0), "login (password)") + response = extr.request( + url, method="POST", headers=headers, json=data, fatal=None) + + # account duplication check ? + data = { + "flow_token": process(response), + "subtask_inputs": [ + { + "subtask_id": "AccountDuplicationCheck", + "check_logged_in_account": { + "link": "AccountDuplicationCheck_false", + }, + }, + ], + } + # url = "https://twitter.com/i/api/1.1/onboarding/task.json" + response = extr.request( + url, method="POST", headers=headers, json=data, fatal=None) + process(response) + + return { + cookie.name: cookie.value + for cookie in extr.session.cookies + } |
