summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py75
1 files changed, 49 insertions, 26 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7e78941..2dfcb55 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -208,30 +208,45 @@ class TwitterExtractor(Extractor):
return tdata
def _transform_user(self, user):
+ try:
+ return self._user_cache[user["id_str"]]
+ except KeyError:
+ pass
+
uid = user["id_str"]
- cache = self._user_cache
-
- if uid not in cache:
- cache[uid] = {
- "id" : text.parse_int(uid),
- "name" : user["screen_name"],
- "nick" : user["name"],
- "description" : user["description"],
- "location" : user["location"],
- "date" : text.parse_datetime(
- user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "verified" : user.get("verified", False),
- "profile_banner" : user.get("profile_banner_url", ""),
- "profile_image" : user.get(
- "profile_image_url_https", "").replace("_normal.", "."),
- "favourites_count": user["favourites_count"],
- "followers_count" : user["followers_count"],
- "friends_count" : user["friends_count"],
- "listed_count" : user["listed_count"],
- "media_count" : user["media_count"],
- "statuses_count" : user["statuses_count"],
- }
- return cache[uid]
+ entities = user["entities"]
+
+ self._user_cache[uid] = udata = {
+ "id" : text.parse_int(uid),
+ "name" : user["screen_name"],
+ "nick" : user["name"],
+ "location" : user["location"],
+ "date" : text.parse_datetime(
+ user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
+ "verified" : user.get("verified", False),
+ "profile_banner" : user.get("profile_banner_url", ""),
+ "profile_image" : user.get(
+ "profile_image_url_https", "").replace("_normal.", "."),
+ "favourites_count": user["favourites_count"],
+ "followers_count" : user["followers_count"],
+ "friends_count" : user["friends_count"],
+ "listed_count" : user["listed_count"],
+ "media_count" : user["media_count"],
+ "statuses_count" : user["statuses_count"],
+ }
+
+ descr = user["description"]
+ urls = entities["description"].get("urls")
+ if urls:
+ for url in urls:
+ descr = descr.replace(url["url"], url["expanded_url"])
+ udata["description"] = descr
+
+ if "url" in entities:
+ url = entities["url"]["urls"][0]
+ udata["url"] = url.get("expanded_url") or url.get("url")
+
+ return udata
def _users_result(self, users):
userfmt = self.config("users")
@@ -461,6 +476,11 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/1424882930803908612", {
"options": (("replies", "self"),),
"count": 4,
+ "keyword": {"user": {
+ "description": "re:business email-- rhettaro.bloom@gmail.com "
+ "patreon- http://patreon.com/Princecanary",
+ "url": "http://princecanary.tumblr.com",
+ }},
}),
("https://twitter.com/i/web/status/1424898916156284928", {
"options": (("replies", "self"),),
@@ -749,8 +769,8 @@ class TwitterAPI():
)
except Exception:
msg = data["errors"]
- if response.status_code < 400:
- self.extractor.log.warning(msg)
+ if msg and response.status_code < 400:
+ raise exception.StopExtraction(msg)
else:
msg = ""
@@ -768,12 +788,14 @@ class TwitterAPI():
if response.status_code == 401 and \
"have been blocked from viewing" in msg:
# account blocked
- extr = extr = self.extractor
+ extr = self.extractor
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
guest_token = self._guest_token()
extr.session.cookies.set(
"gt", guest_token, domain=extr.cookiedomain)
+ extr._cookiefile = None
+ del extr.session.cookies["auth_token"]
self.headers["x-guest-token"] = guest_token
self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest")
@@ -861,6 +883,7 @@ class TwitterAPI():
if "quoted_status_id_str" in tweet:
quoted = tweets.get(tweet["quoted_status_id_str"])
if quoted:
+ quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
quoted["user"] = tweet["user"]
quoted["quoted"] = True