diff options
Diffstat (limited to 'gallery_dl/extractor/twitter.py')
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 98 |
1 files changed, 89 insertions, 9 deletions
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 06973b2..fe0b3c5 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -11,7 +11,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache - +import json BASE_PATTERN = ( r"(?:https?://)?(?:www\.|mobile\.)?" @@ -78,8 +78,8 @@ class TwitterExtractor(Extractor): def _extract_media(self, tweet, files): for media in tweet["extended_entities"]["media"]: - width = media["original_info"].get("width", 0), - height = media["original_info"].get("height", 0), + width = media["original_info"].get("width", 0) + height = media["original_info"].get("height", 0) if "video_info" in media: if self.videos == "ytdl": @@ -321,6 +321,35 @@ class TwitterBookmarkExtractor(TwitterExtractor): return TwitterAPI(self).timeline_bookmark() +class TwitterListExtractor(TwitterExtractor): + """Extractor for Twitter lists""" + subcategory = "list" + pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$" + test = ("https://twitter.com/i/lists/784214683683127296", { + "range": "1-40", + "count": 40, + "archive": False, + }) + + def tweets(self): + return TwitterAPI(self).timeline_list(self.user) + + +class TwitterListMembersExtractor(TwitterExtractor): + """Extractor for members of a Twitter list""" + subcategory = "list-members" + pattern = BASE_PATTERN + r"/i/lists/(\d+)/members" + test = ("https://twitter.com/i/lists/784214683683127296/members",) + + def items(self): + self.login() + for user in TwitterAPI(self).list_members(self.user): + user["_extractor"] = TwitterTimelineExtractor + url = "{}/intent/user?user_id={}".format( + self.root, user["rest_id"]) + yield Message.Queue, url, user + + class TwitterSearchExtractor(TwitterExtractor): """Extractor for all images from a search timeline""" subcategory = "search" @@ -399,7 +428,7 @@ class TwitterTweetExtractor(TwitterExtractor): # Twitter card (#1005) ("https://twitter.com/billboard/status/1306599586602135555", { "options": (("cards", True),), - "pattern": r"https://pbs.twimg.com/card_img/1317274761030856707/", + "pattern": r"https://pbs.twimg.com/card_img/\d+/", }), # original retweets (#1026) ("https://twitter.com/jessica_3978/status/1296304589591810048", { @@ -511,6 +540,13 @@ class TwitterAPI(): endpoint = "2/timeline/bookmark.json" return self._pagination(endpoint) + def timeline_list(self, list_id): + endpoint = "2/timeline/list.json" + params = self.params.copy() + params["list_id"] = list_id + params["ranking_mode"] = "reverse_chronological" + return self._pagination(endpoint, params) + def search(self, query): endpoint = "2/search/adaptive.json" params = self.params.copy() @@ -522,12 +558,29 @@ class TwitterAPI(): return self._pagination( endpoint, params, "sq-I-t-", "sq-cursor-bottom") - def user_by_screen_name(self, screen_name): - endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName" - params = { - "variables": '{"screen_name":"' + screen_name + '"' - ',"withHighlightedLabel":true}' + def list_members(self, list_id): + endpoint = "graphql/M74V2EwlxxVYGB4DbyAphQ/ListMembers" + variables = { + "listId": list_id, + "count" : 20, + "withTweetResult": False, + "withUserResult" : False, } + return self._pagination_members(endpoint, variables) + + def list_by_rest_id(self, list_id): + endpoint = "graphql/LXXTUytSX1QY-2p8Xp9BFA/ListByRestId" + params = {"variables": '{"listId":"' + list_id + '"' + ',"withUserResult":false}'} + try: + return self._call(endpoint, params)["data"]["list"] + except KeyError: + raise exception.NotFoundError("list") + + def user_by_screen_name(self, screen_name): + endpoint = "graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName" + params = {"variables": '{"screen_name":"' + screen_name + '"' + ',"withHighlightedLabel":true}'} try: return self._call(endpoint, params)["data"]["user"] except KeyError: @@ -627,3 +680,30 @@ class TwitterAPI(): if not cursor or not tweet: return params["cursor"] = cursor + + def _pagination_members(self, endpoint, variables): + while True: + cursor = entry = stop = None + params = {"variables": json.dumps(variables)} + data = self._call(endpoint, params) + + try: + instructions = (data["data"]["list"]["members_timeline"] + ["timeline"]["instructions"]) + except KeyError: + raise exception.AuthorizationError() + + for instr in instructions: + if instr["type"] == "TimelineAddEntries": + for entry in instr["entries"]: + if entry["entryId"].startswith("user-"): + yield entry["content"]["itemContent"]["user"] + elif entry["entryId"].startswith("cursor-bottom-"): + cursor = entry["content"]["value"] + elif instr["type"] == "TimelineTerminateTimeline": + if instr["direction"] == "Bottom": + stop = True + + if stop or not cursor or not entry: + return + variables["cursor"] = cursor |
