summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py582
1 files changed, 303 insertions, 279 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4c7b757..7cabb8c 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -10,9 +10,9 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache, memcache
-import json
-import re
+from ..cache import cache
+import hashlib
+import time
class TwitterExtractor(Extractor):
@@ -24,23 +24,15 @@ class TwitterExtractor(Extractor):
cookiedomain = ".twitter.com"
root = "https://twitter.com"
sizes = (":orig", ":large", ":medium", ":small")
- user_agent = ("Mozilla/5.0 (Windows NT 6.1; WOW64; "
- "Trident/7.0; rv:11.0) like Gecko")
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
- self._user_dict = None
- self.logged_in = False
self.retweets = self.config("retweets", True)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
- self.content = self.config("content", False)
self.videos = self.config("videos", True)
-
- if self.content:
- self._emoji_sub = re.compile(
- r'<img class="Emoji [^>]+ alt="([^"]+)"[^>]*>').sub
+ self._user_cache = {}
def items(self):
self.login()
@@ -48,235 +40,194 @@ class TwitterExtractor(Extractor):
yield Message.Version, 1
for tweet in self.tweets():
- data = self._data_from_tweet(tweet)
- if not data or \
- not self.retweets and data["retweet_id"] or \
- not self.replies and data["reply"]:
+
+ if not self.retweets and "retweeted_status_id_str" in tweet or \
+ not self.replies and "in_reply_to_user_id_str" in tweet:
continue
- data.update(metadata)
- if self.videos and "-videoContainer" in tweet:
- yield Message.Directory, data
+ if self.twitpic:
+ self._extract_twitpic(tweet)
+ if "extended_entities" not in tweet:
+ continue
- if self.videos == "ytdl":
- data["extension"] = None
- url = "ytdl:{}/i/web/status/{}".format(
- self.root, data["tweet_id"])
- else:
- url = self._video_from_tweet(data["tweet_id"])
- if not url:
- continue
- text.nameext_from_url(url, data)
- if data["extension"] == "m3u8":
- url = "ytdl:" + url
- data["extension"] = "mp4"
- data["_ytdl_extra"] = {"protocol": "m3u8_native"}
- data["num"] = 1
- yield Message.Url, url, data
-
- elif "data-image-url=" in tweet:
- yield Message.Directory, data
-
- images = text.extract_iter(
- tweet, 'data-image-url="', '"')
- for data["num"], url in enumerate(images, 1):
- text.nameext_from_url(url, data)
+ tdata = self._transform_tweet(tweet)
+ tdata.update(metadata)
+
+ yield Message.Directory, tdata
+ for tdata["num"], media in enumerate(
+ tweet["extended_entities"]["media"], 1):
+
+ tdata["width"] = media["original_info"].get("width", 0)
+ tdata["height"] = media["original_info"].get("height", 0)
+
+ if "video_info" in media and self.videos:
+
+ if self.videos == "ytdl":
+ url = "ytdl:{}/i/web/status/{}".format(
+ self.root, tweet["id_str"])
+ tdata["extension"] = None
+ yield Message.Url, url, tdata
+
+ else:
+ video_info = media["video_info"]
+ variant = max(
+ video_info["variants"],
+ key=lambda v: v.get("bitrate", 0),
+ )
+ tdata["duration"] = video_info.get(
+ "duration_millis", 0) / 1000
+ tdata["bitrate"] = variant.get("bitrate", 0)
+
+ url = variant["url"]
+ text.nameext_from_url(url, tdata)
+ yield Message.Url, url, tdata
+
+ elif "media_url_https" in media:
+ url = media["media_url_https"]
urls = [url + size for size in self.sizes]
- yield Message.Urllist, urls, data
-
- if self.twitpic and "//twitpic.com/" in tweet:
- urls = [
- url for url in text.extract_iter(
- tweet, 'data-expanded-url="', '"')
- if "//twitpic.com/" in url
- ]
-
- if "num" not in data:
- if urls:
- yield Message.Directory, data
- data["num"] = 0
-
- for data["num"], url in enumerate(urls, data["num"]+1):
- response = self.request(url, fatal=False)
- if response.status_code >= 400:
- continue
- url = text.extract(
- response.text, 'name="twitter:image" value="', '"')[0]
- yield Message.Url, url, text.nameext_from_url(url, data)
+ text.nameext_from_url(url, tdata)
+ yield Message.Urllist, urls, tdata
+
+ else:
+ url = media["media_url"]
+ text.nameext_from_url(url, tdata)
+ yield Message.Url, url, tdata
+
+ def _extract_twitpic(self, tweet):
+ twitpics = []
+ for url in tweet["entities"].get("urls", ()):
+ url = url["expanded_url"]
+ if "//twitpic.com/" in url:
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ continue
+ url = text.extract(
+ response.text, 'name="twitter:image" value="', '"')[0]
+ twitpics.append({
+ "original_info": {},
+ "media_url" : url,
+ })
+ if twitpics:
+ if "extended_entities" in tweet:
+ tweet["extended_entities"]["media"].extend(twitpics)
+ else:
+ tweet["extended_entities"] = {"media": twitpics}
+
+ def _transform_tweet(self, tweet):
+ entities = tweet["entities"]
+ tdata = {
+ "tweet_id" : text.parse_int(tweet["id_str"]),
+ "retweet_id" : text.parse_int(
+ tweet.get("retweeted_status_id_str")),
+ "quote_id" : text.parse_int(
+ tweet.get("quoted_status_id_str")),
+ "reply_id" : text.parse_int(
+ tweet.get("in_reply_to_status_id_str")),
+ "date" : text.parse_datetime(
+ tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
+ "user" : self._transform_user(tweet["user"]),
+ "lang" : tweet["lang"],
+ "content" : tweet["full_text"],
+ "favorite_count": tweet["favorite_count"],
+ "quote_count" : tweet["quote_count"],
+ "reply_count" : tweet["reply_count"],
+ "retweet_count" : tweet["retweet_count"],
+ }
+
+ hashtags = entities.get("hashtags")
+ if hashtags:
+ tdata["hashtags"] = [t["text"] for t in hashtags]
+
+ mentions = entities.get("user_mentions")
+ if mentions:
+ tdata["mentions"] = [{
+ "id": text.parse_int(u["id_str"]),
+ "name": u["screen_name"],
+ "nick": u["name"],
+ } for u in mentions]
+
+ if "in_reply_to_screen_name" in tweet:
+ tdata["reply_to"] = tweet["in_reply_to_screen_name"]
+
+ if "full_text_quoted" in tweet:
+ tdata["content_quoted"] = tweet["full_text_quoted"]
+
+ if "author" in tweet:
+ tdata["author"] = self._transform_user(tweet["author"])
+
+ return tdata
+
+ def _transform_user(self, user):
+ uid = user["id_str"]
+ cache = self._user_cache
+
+ if uid not in cache:
+ cache[uid] = {
+ "id" : text.parse_int(uid),
+ "name" : user["screen_name"],
+ "nick" : user["name"],
+ "description" : user["description"],
+ "location" : user["location"],
+ "date" : text.parse_datetime(
+ user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
+ "verified" : user.get("verified", False),
+ "profile_banner" : user.get("profile_banner_url", ""),
+ "profile_image" : user.get(
+ "profile_image_url_https", "").replace("_normal.", "."),
+ "favourites_count": user["favourites_count"],
+ "followers_count" : user["followers_count"],
+ "friends_count" : user["friends_count"],
+ "listed_count" : user["listed_count"],
+ "media_count" : user["media_count"],
+ "statuses_count" : user["statuses_count"],
+ }
+ return cache[uid]
def metadata(self):
"""Return general metadata"""
return {}
def tweets(self):
- """Yield HTML content of all relevant tweets"""
+ """Yield all relevant tweet objects"""
def login(self):
username, password = self._get_auth_info()
if username:
self._update_cookies(self._login_impl(username, password))
- self.logged_in = True
@cache(maxage=360*24*3600, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- headers = {"User-Agent": self.user_agent}
- page = self.request(self.root + "/login", headers=headers).text
+ url = "https://mobile.twitter.com/i/nojs_router"
+ params = {"path": "/login"}
+ headers = {"Referer": self.root + "/", "Origin": self.root}
+ page = self.request(
+ url, method="POST", params=params, headers=headers, data={}).text
+
pos = page.index('name="authenticity_token"')
- token = text.extract(page, 'value="', '"', pos-80)[0]
+ token = text.extract(page, 'value="', '"', pos)[0]
- url = self.root + "/sessions"
+ url = "https://mobile.twitter.com/sessions"
data = {
+ "authenticity_token" : token,
"session[username_or_email]": username,
"session[password]" : password,
- "authenticity_token" : token,
- "ui_metrics" : '{"rf":{},"s":""}',
- "scribe_log" : "",
- "redirect_after_login" : "",
"remember_me" : "1",
+ "wfa" : "1",
+ "commit" : "+Log+in+",
+ "ui_metrics" : "",
}
- response = self.request(url, method="POST", headers=headers, data=data)
- if "/error" in response.url:
- raise exception.AuthenticationError()
-
- return {
+ response = self.request(url, method="POST", data=data)
+ cookies = {
cookie.name: cookie.value
for cookie in self.session.cookies
- if cookie.domain and "twitter.com" in cookie.domain
- }
-
- def _data_from_tweet(self, tweet):
- extr = text.extract_from(tweet)
- data = {
- "tweet_id" : text.parse_int(extr('data-tweet-id="' , '"')),
- "reply" : bool(extr('data-is-reply-to="' , '"')),
- "retweet_id": text.parse_int(extr('data-retweet-id="', '"')),
- "retweeter" : extr('data-retweeter="' , '"'),
- "author" : {
- "name" : extr('data-screen-name="', '"'),
- "nick" : text.unescape(extr('data-name="' , '"')),
- "id" : text.parse_int(extr('data-user-id="' , '"')),
- },
- }
-
- if not self._user_dict:
- if data["retweet_id"]:
- for user in json.loads(text.unescape(extr(
- 'data-reply-to-users-json="', '"'))):
- if user["screen_name"] == data["retweeter"]:
- break
- else:
- self.log.warning("Unable to extract user info")
- return None
- self._user_dict = {
- "name": user["screen_name"],
- "nick": text.unescape(user["name"]),
- "id" : text.parse_int(user["id_str"]),
- }
- else:
- self._user_dict = data["author"]
-
- data["user"] = self._user_dict
- data["date"] = text.parse_timestamp(extr('data-time="', '"'))
-
- if self.content:
- content = extr('<div class="js-tweet-text-container">', '\n</div>')
- if '<img class="Emoji ' in content:
- content = self._emoji_sub(r"\1", content)
- content = text.unescape(text.remove_html(content, "", ""))
- cl, _, cr = content.rpartition("pic.twitter.com/")
- data["content"] = cl if cl and len(cr) < 16 else content
-
- if extr('<div class="QuoteTweet', '>'):
- data["retweet_id"] = text.parse_int(extr('data-item-id="', '"'))
- data["retweeter"] = data["user"]["name"]
- data["author"] = {
- "name" : extr('data-screen-name="', '"'),
- "id" : text.parse_int(extr('data-user-id="' , '"')),
- "nick" : text.unescape(extr(
- 'QuoteTweet-fullname', '<').partition('>')[2]),
- }
-
- return data
-
- def _video_from_tweet(self, tweet_id):
- url = "https://api.twitter.com/1.1/videos/tweet/config/{}.json".format(
- tweet_id)
- cookies = None
- headers = {
- "Origin" : self.root,
- "Referer" : "{}/i/web/status/{}".format(self.root, tweet_id),
- "x-csrf-token" : self.session.cookies.get("ct0"),
- "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekM"
- "xqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28N"
- "HfOPqkca3qaAxGfsyKCs0wRbw",
- }
-
- if self.logged_in:
- headers["x-twitter-auth-type"] = "OAuth2Session"
- else:
- token = _guest_token(self, headers)
- cookies = {"gt": token}
- headers["x-guest-token"] = token
-
- response = self.request(
- url, cookies=cookies, headers=headers, fatal=None)
-
- if response.status_code == 429 or \
- response.headers.get("x-rate-limit-remaining") == "0":
- if self.logged_in:
- self.wait(until=response.headers.get("x-rate-limit-reset"))
- else:
- _guest_token.invalidate()
- return self._video_from_tweet(tweet_id)
-
- elif response.status_code >= 400:
- self.log.warning("Unable to fetch video data for %s ('%s %s')",
- tweet_id, response.status_code, response.reason)
- return None
-
- return response.json()["track"]["playbackUrl"]
-
- def _tweets_from_api(self, url, max_position=None):
- params = {
- "include_available_features": "1",
- "include_entities": "1",
- "max_position": max_position,
- "reset_error_state": "false",
- "lang": "en",
+ if cookie.domain == self.cookiedomain
}
- headers = {
- "X-Requested-With": "XMLHttpRequest",
- "X-Twitter-Active-User": "yes",
- "Referer": self.root + "/",
- }
-
- while True:
- data = self.request(url, params=params, headers=headers).json()
- if "inner" in data:
- data = data["inner"]
-
- for tweet in text.extract_iter(
- data["items_html"], '<div class="tweet ', '\n</li>'):
- yield tweet
- if data.get("min_position") is None:
- if data["has_more_items"] and "min_position" not in data:
- pass
- else:
- return
-
- if "min_position" in data:
- position = data["min_position"]
- if position == max_position or position is None:
- return
- else:
- position = text.parse_int(text.extract(
- tweet, 'data-tweet-id="', '"')[0])
- if max_position and position >= max_position:
- return
- params["max_position"] = max_position = position
+ if "/error" in response.url or "auth_token" not in cookies:
+ raise exception.AuthenticationError()
+ return cookies
class TwitterTimelineExtractor(TwitterExtractor):
@@ -288,15 +239,12 @@ class TwitterTimelineExtractor(TwitterExtractor):
("https://twitter.com/supernaturepics", {
"range": "1-40",
"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
- "keyword": "4a3d28cc9f7a39e27333d56f3fe19e6e07ee979e",
}),
("https://mobile.twitter.com/supernaturepics?p=i"),
)
def tweets(self):
- url = "{}/i/profiles/show/{}/timeline/tweets".format(
- self.root, self.user)
- return self._tweets_from_api(url)
+ return TwitterAPI(self).timeline_profile(self.user)
class TwitterMediaExtractor(TwitterExtractor):
@@ -313,9 +261,7 @@ class TwitterMediaExtractor(TwitterExtractor):
)
def tweets(self):
- url = "{}/i/profiles/show/{}/media_timeline".format(
- self.root, self.user)
- return self._tweets_from_api(url)
+ return TwitterAPI(self).timeline_media(self.user)
class TwitterSearchExtractor(TwitterExtractor):
@@ -330,12 +276,10 @@ class TwitterSearchExtractor(TwitterExtractor):
})
def metadata(self):
- return {"search": self.user}
+ return {"search": text.unquote(self.user)}
def tweets(self):
- url = "{}/i/search/timeline?f=tweets&q={}".format(
- self.root, self.user)
- return self._tweets_from_api(url, "-1")
+ return TwitterAPI(self).search(self.user)
class TwitterTweetExtractor(TwitterExtractor):
@@ -346,22 +290,19 @@ class TwitterTweetExtractor(TwitterExtractor):
test = (
("https://twitter.com/supernaturepics/status/604341487988576256", {
"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
- "keyword": "76e018cf3f4c8b82d3bdd425e01e28078c98373b",
"content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
}),
# 4 images
("https://twitter.com/perrypumas/status/894001459754180609", {
"url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",
- "keyword": "c9251b1fd79d547b0c6b4577f06c937d0e9b63d2",
}),
# video
("https://twitter.com/perrypumas/status/1065692031626829824", {
"options": (("videos", True),),
- "pattern": r"ytdl:https://video.twimg.com/ext_tw_video/.*.m3u8",
+ "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
}),
# content with emoji, newlines, hashtags (#338)
("https://twitter.com/playpokemon/status/1263832915173048321", {
- "options": (("content", True),),
"keyword": {"content": (
r"re:Gear up for #PokemonSwordShieldEX with special Mystery "
"Gifts! \n\nYou’ll be able to receive four Galarian form "
@@ -386,10 +327,6 @@ class TwitterTweetExtractor(TwitterExtractor):
# quoted tweet (#526)
("https://twitter.com/Pistachio/status/1222690391817932803", {
"pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg",
- "keyword": {
- "author": {"name": "Afro_Herper", "id": 786047748508221440},
- "user" : {"name": "Pistachio" , "id": 3533231},
- },
}),
# TwitPic embeds (#579)
("https://twitter.com/i/web/status/112900228289540096", {
@@ -404,18 +341,7 @@ class TwitterTweetExtractor(TwitterExtractor):
self.tweet_id = match.group(2)
def tweets(self):
- url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
- cookies = {"app_shell_visited": "1"}
- headers = {"User-Agent": self.user_agent, "Referer": url}
-
- response = self.request(url, cookies=cookies, headers=headers)
- if response.history and response.url == self.root + "/":
- raise exception.AuthorizationError()
- page = response.text
-
- end = page.index('class="js-tweet-stats-container')
- beg = page.rindex('<div class="tweet ', 0, end)
- return (page[beg:end],)
+ return TwitterAPI(self).tweet(self.tweet_id)
class TwitterBookmarkExtractor(TwitterExtractor):
@@ -424,15 +350,26 @@ class TwitterBookmarkExtractor(TwitterExtractor):
pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
test = ("https://twitter.com/i/bookmarks",)
- def items(self):
- self.login()
- if not self.logged_in:
- raise exception.AuthorizationError("Login required")
- for cookie in self.session.cookies:
- cookie.expires = None
+ def tweets(self):
+ return TwitterAPI(self).bookmarks()
- url = "https://api.twitter.com/2/timeline/bookmark.json"
- params = {
+
+class TwitterAPI():
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.headers = {
+ "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
+ "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
+ "4FA33AGWWjCpTnA",
+ "x-guest-token": None,
+ "x-twitter-client-language": "en",
+ "x-twitter-active-user": "yes",
+ "x-csrf-token": None,
+ "Origin": "https://twitter.com",
+ "Referer": "https://twitter.com/",
+ }
+ self.params = {
"include_profile_interstitial_type": "1",
"include_blocking": "1",
"include_blocked_by": "1",
@@ -453,47 +390,134 @@ class TwitterBookmarkExtractor(TwitterExtractor):
"include_ext_media_color": "true",
"include_ext_media_availability": "true",
"send_error_codes": "true",
- "simple_quoted_tweets": "true",
+ "simple_quoted_tweet": "true",
+ # "count": "20",
"count": "100",
"cursor": None,
- "ext": "mediaStats%2CcameraMoment",
+ "ext": "mediaStats,highlightedLabel,cameraMoment",
+ "include_quote_count": "true",
}
- headers = {
- "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
- "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
- "4FA33AGWWjCpTnA",
- "Origin": self.root,
- "Referer": self.root + "/i/bookmarks",
- "x-csrf-token": self.session.cookies.get("ct0"),
- "x-twitter-active-user": "yes",
- "x-twitter-auth-type": "OAuth2Session",
- "x-twitter-client-language": "en",
+
+ cookies = self.extractor.session.cookies
+
+ # CSRF
+ csrf = hashlib.md5(str(time.time()).encode()).hexdigest()
+ self.headers["x-csrf-token"] = csrf
+ cookies.set("ct0", csrf, domain=".twitter.com")
+
+ if cookies.get("auth_token", domain=".twitter.com"):
+ self.headers["x-twitter-auth-type"] = "OAuth2Session"
+ else:
+ # guest token
+ guest_token = _guest_token(self.extractor, self.headers)
+ self.headers["x-guest-token"] = guest_token
+ cookies.set("gt", guest_token, domain=".twitter.com")
+
+ def tweet(self, tweet_id):
+ endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
+ for tweet in self._pagination(endpoint):
+ if tweet["id_str"] == tweet_id:
+ return (tweet,)
+ return ()
+
+ def timeline_profile(self, screen_name):
+ user = self.user_by_screen_name(screen_name)
+ endpoint = "2/timeline/profile/{}.json".format(user["rest_id"])
+ return self._pagination(endpoint)
+
+ def timeline_media(self, screen_name):
+ user = self.user_by_screen_name(screen_name)
+ endpoint = "2/timeline/media/{}.json".format(user["rest_id"])
+ return self._pagination(endpoint)
+
+ def search(self, query):
+ endpoint = "2/search/adaptive.json"
+ params = self.params.copy()
+ params["q"] = text.unquote(query)
+ return self._pagination(
+ endpoint, params, "sq-I-t-", "sq-cursor-bottom")
+
+ def bookmarks(self):
+ endpoint = "2/timeline/bookmark.json"
+ return self._pagination(endpoint)
+
+ def user_by_screen_name(self, screen_name):
+ endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName"
+ params = {
+ "variables": '{"screen_name":"' + screen_name + '"'
+ ',"withHighlightedLabel":true}'
}
+ return self._call(endpoint, params)["data"]["user"]
+
+ def _call(self, endpoint, params):
+ url = "https://api.twitter.com/" + endpoint
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+ if response.status_code < 400:
+ return response.json()
+ if response.status_code == 429:
+ self.extractor.wait(until=response.headers["x-rate-limit-reset"])
+ return self._call(endpoint, params)
+ raise exception.StopExtraction(
+ "%s %s (%s)", response.status_code, response.reason, response.text)
+
+ def _pagination(self, endpoint, params=None,
+ entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
+ if params is None:
+ params = self.params.copy()
while True:
- response = self.request(
- url, params=params, headers=headers, fatal=False)
- if response.status_code >= 400:
- raise exception.StopExtraction(response.text)
- data = response.json()
- tweets = data["globalObjects"]["tweets"]
+ cursor = tweet = None
+ data = self._call(endpoint, params)
- if not tweets:
+ instr = data["timeline"]["instructions"]
+ if not instr:
return
- for tweet_id, tweet_data in tweets.items():
- tweet_url = "{}/i/web/status/{}".format(self.root, tweet_id)
- tweet_data["_extractor"] = TwitterTweetExtractor
- yield Message.Queue, tweet_url, tweet_data
+ tweets = data["globalObjects"]["tweets"]
+ users = data["globalObjects"]["users"]
+
+ for entry in instr[0]["addEntries"]["entries"]:
- inst = data["timeline"]["instructions"][0]
- for entry in inst["addEntries"]["entries"]:
- if entry["entryId"].startswith("cursor-bottom-"):
- params["cursor"] = \
- entry["content"]["operation"]["cursor"]["value"]
- break
+ if entry["entryId"].startswith(entry_tweet):
+ tid = entry["content"]["item"]["content"]["tweet"]["id"]
+ if tid not in tweets:
+ self.extractor.log.debug(
+ "Skipping unavailable Tweet %s", tid)
+ continue
+ tweet = tweets[tid]
+ tweet["user"] = users[tweet["user_id_str"]]
+
+ if "quoted_status_id_str" in tweet:
+ quoted = tweets.get(tweet["quoted_status_id_str"])
+ if quoted:
+ tweet["full_text_quoted"] = quoted["full_text"]
+ if "extended_entities" in quoted:
+ tweet["extended_entities"] = \
+ quoted["extended_entities"]
+ elif "retweeted_status_id_str" in tweet:
+ retweet = tweets.get(tweet["retweeted_status_id_str"])
+ if retweet:
+ tweet["author"] = users[retweet["user_id_str"]]
+
+ yield tweet
+
+ elif entry["entryId"].startswith(entry_cursor):
+ cursor = entry["content"]["operation"]["cursor"]
+ if not cursor.get("stopOnEmptyResponse"):
+ # keep going even if there are no tweets
+ tweet = True
+ cursor = cursor["value"]
+
+ if "replaceEntry" in instr[-1] :
+ cursor = (instr[-1]["replaceEntry"]["entry"]
+ ["content"]["operation"]["cursor"]["value"])
+
+ if not cursor or not tweet:
+ return
+ params["cursor"] = cursor
-@memcache()
+@cache(maxage=3600)
def _guest_token(extr, headers):
return extr.request(
"https://api.twitter.com/1.1/guest/activate.json",