summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
-rw-r--r--gallery_dl/extractor/twitter.py62
1 files changed, 38 insertions, 24 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 61e871e..4766ae5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -10,12 +10,13 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache
+from ..cache import cache, memcache
import itertools
import json
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:(?:[fv]x)?twitter|x)\.com"
+BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?"
+ r"(?:(?:[fv]x)?twitter|(?:fixup)?x)\.com")
class TwitterExtractor(Extractor):
@@ -272,25 +273,23 @@ class TwitterExtractor(Extractor):
author = tweet["user"]
author = self._transform_user(author)
- if "note_tweet" in tweet:
- note = tweet["note_tweet"]["note_tweet_results"]["result"]
- else:
- note = None
-
- source = tweet["source"]
-
if "legacy" in tweet:
- tweet = tweet["legacy"]
+ legacy = tweet["legacy"]
+ else:
+ legacy = tweet
+ tget = legacy.get
- tweet_id = int(tweet["id_str"])
+ tweet_id = int(legacy["id_str"])
if tweet_id >= 300000000000000:
date = text.parse_timestamp(
((tweet_id >> 22) + 1288834974657) // 1000)
else:
- date = text.parse_datetime(
- tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
+ try:
+ date = text.parse_datetime(
+ legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
+ except Exception:
+ date = util.NONE
- tget = tweet.get
tdata = {
"tweet_id" : tweet_id,
"retweet_id" : text.parse_int(
@@ -304,8 +303,8 @@ class TwitterExtractor(Extractor):
"date" : date,
"author" : author,
"user" : self._user or author,
- "lang" : tweet["lang"],
- "source" : text.extr(source, ">", "<"),
+ "lang" : legacy["lang"],
+ "source" : text.extr(tweet["source"], ">", "<"),
"sensitive" : tget("possibly_sensitive"),
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -313,7 +312,13 @@ class TwitterExtractor(Extractor):
"retweet_count" : tget("retweet_count"),
}
- entities = note["entity_set"] if note else tweet["entities"]
+ if "note_tweet" in tweet:
+ note = tweet["note_tweet"]["note_tweet_results"]["result"]
+ content = note["text"]
+ entities = note["entity_set"]
+ else:
+ content = tget("full_text") or tget("text") or ""
+ entities = legacy["entities"]
hashtags = entities.get("hashtags")
if hashtags:
@@ -327,8 +332,7 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
- content = text.unescape(
- note["text"] if note else tget("full_text") or tget("text") or "")
+ content = text.unescape(content)
urls = entities.get("urls")
if urls:
for url in urls:
@@ -336,11 +340,13 @@ class TwitterExtractor(Extractor):
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
- if "in_reply_to_screen_name" in tweet:
- tdata["reply_to"] = tweet["in_reply_to_screen_name"]
- if "quoted_by" in tweet:
- tdata["quote_by"] = tweet["quoted_by"]
+ if "in_reply_to_screen_name" in legacy:
+ tdata["reply_to"] = legacy["in_reply_to_screen_name"]
+ if "quoted_by" in legacy:
+ tdata["quote_by"] = legacy["quoted_by"]
if tdata["retweet_id"]:
+ tdata["content"] = "RT @{}: {}".format(
+ author["name"], tdata["content"])
tdata["date_original"] = text.parse_timestamp(
((tdata["retweet_id"] >> 22) + 1288834974657) // 1000)
@@ -1194,6 +1200,7 @@ class TwitterAPI():
}
return self._pagination_users(endpoint, variables)
+ @memcache(keyarg=1)
def user_by_rest_id(self, rest_id):
endpoint = "/graphql/1YAM811Q8Ry4XyPpJclURQ/UserByRestId"
features = self.features.copy()
@@ -1207,6 +1214,7 @@ class TwitterAPI():
}
return self._call(endpoint, params)["data"]["user"]["result"]
+ @memcache(keyarg=1)
def user_by_screen_name(self, screen_name):
endpoint = "/graphql/XA6F1nJELYg65hxOC2Ekmg/UserByScreenName"
params = {
@@ -1527,15 +1535,21 @@ class TwitterAPI():
retweet["core"]["user_results"]["result"]
rtlegacy = retweet["legacy"]
+
+ if "note_tweet" in retweet:
+ tweet["note_tweet"] = retweet["note_tweet"]
+
if "extended_entities" in rtlegacy and \
"extended_entities" not in legacy:
legacy["extended_entities"] = \
rtlegacy["extended_entities"]
+
if "withheld_scope" in rtlegacy and \
"withheld_scope" not in legacy:
legacy["withheld_scope"] = \
rtlegacy["withheld_scope"]
- legacy["full_text"] = rtlegacy["full_text"]
+
+ legacy["full_text"] = rtlegacy["full_text"]
except KeyError:
pass