diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 62 |
1 files changed, 38 insertions, 24 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 61e871e..4766ae5 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -10,12 +10,13 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import cache +from ..cache import cache, memcache import itertools import json import re -BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:(?:[fv]x)?twitter|x)\.com" +BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?" + r"(?:(?:[fv]x)?twitter|(?:fixup)?x)\.com") class TwitterExtractor(Extractor): @@ -272,25 +273,23 @@ class TwitterExtractor(Extractor): author = tweet["user"] author = self._transform_user(author) - if "note_tweet" in tweet: - note = tweet["note_tweet"]["note_tweet_results"]["result"] - else: - note = None - - source = tweet["source"] - if "legacy" in tweet: - tweet = tweet["legacy"] + legacy = tweet["legacy"] + else: + legacy = tweet + tget = legacy.get - tweet_id = int(tweet["id_str"]) + tweet_id = int(legacy["id_str"]) if tweet_id >= 300000000000000: date = text.parse_timestamp( ((tweet_id >> 22) + 1288834974657) // 1000) else: - date = text.parse_datetime( - tweet["created_at"], "%a %b %d %H:%M:%S %z %Y") + try: + date = text.parse_datetime( + legacy["created_at"], "%a %b %d %H:%M:%S %z %Y") + except Exception: + date = util.NONE - tget = tweet.get tdata = { "tweet_id" : tweet_id, "retweet_id" : text.parse_int( @@ -304,8 +303,8 @@ class TwitterExtractor(Extractor): "date" : date, "author" : author, "user" : self._user or author, - "lang" : tweet["lang"], - "source" : text.extr(source, ">", "<"), + "lang" : legacy["lang"], + "source" : text.extr(tweet["source"], ">", "<"), "sensitive" : tget("possibly_sensitive"), "favorite_count": tget("favorite_count"), "quote_count" : tget("quote_count"), @@ -313,7 +312,13 @@ class TwitterExtractor(Extractor): "retweet_count" : tget("retweet_count"), } - entities = note["entity_set"] if note else tweet["entities"] + if "note_tweet" in tweet: + note = tweet["note_tweet"]["note_tweet_results"]["result"] + content = note["text"] + entities = note["entity_set"] + else: + content = tget("full_text") or tget("text") or "" + entities = legacy["entities"] hashtags = entities.get("hashtags") if hashtags: @@ -327,8 +332,7 @@ class TwitterExtractor(Extractor): "nick": u["name"], } for u in mentions] - content = text.unescape( - note["text"] if note else tget("full_text") or tget("text") or "") + content = text.unescape(content) urls = entities.get("urls") if urls: for url in urls: @@ -336,11 +340,13 @@ class TwitterExtractor(Extractor): txt, _, tco = content.rpartition(" ") tdata["content"] = txt if tco.startswith("https://t.co/") else content - if "in_reply_to_screen_name" in tweet: - tdata["reply_to"] = tweet["in_reply_to_screen_name"] - if "quoted_by" in tweet: - tdata["quote_by"] = tweet["quoted_by"] + if "in_reply_to_screen_name" in legacy: + tdata["reply_to"] = legacy["in_reply_to_screen_name"] + if "quoted_by" in legacy: + tdata["quote_by"] = legacy["quoted_by"] if tdata["retweet_id"]: + tdata["content"] = "RT @{}: {}".format( + author["name"], tdata["content"]) tdata["date_original"] = text.parse_timestamp( ((tdata["retweet_id"] >> 22) + 1288834974657) // 1000) @@ -1194,6 +1200,7 @@ class TwitterAPI(): } return self._pagination_users(endpoint, variables) + @memcache(keyarg=1) def user_by_rest_id(self, rest_id): endpoint = "/graphql/1YAM811Q8Ry4XyPpJclURQ/UserByRestId" features = self.features.copy() @@ -1207,6 +1214,7 @@ class TwitterAPI(): } return self._call(endpoint, params)["data"]["user"]["result"] + @memcache(keyarg=1) def user_by_screen_name(self, screen_name): endpoint = "/graphql/XA6F1nJELYg65hxOC2Ekmg/UserByScreenName" params = { @@ -1527,15 +1535,21 @@ class TwitterAPI(): retweet["core"]["user_results"]["result"] rtlegacy = retweet["legacy"] + + if "note_tweet" in retweet: + tweet["note_tweet"] = retweet["note_tweet"] + if "extended_entities" in rtlegacy and \ "extended_entities" not in legacy: legacy["extended_entities"] = \ rtlegacy["extended_entities"] + if "withheld_scope" in rtlegacy and \ "withheld_scope" not in legacy: legacy["withheld_scope"] = \ rtlegacy["withheld_scope"] - legacy["full_text"] = rtlegacy["full_text"] + + legacy["full_text"] = rtlegacy["full_text"] except KeyError: pass |
