diff options
| author | 2023-03-31 07:24:57 -0400 | |
|---|---|---|
| committer | 2023-03-31 07:24:57 -0400 | |
| commit | 09e426350409d45e7f7a8ff369f8d8aa9eec0fe4 (patch) | |
| tree | 8a8cd3e590675fe6ecb1e5c2b4ad9eecde3dde6d /gallery_dl | |
| parent | 10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 (diff) | |
New upstream version 1.25.1.upstream/1.25.1
Diffstat (limited to 'gallery_dl')
| -rw-r--r-- | gallery_dl/__init__.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/gelbooru.py | 66 | ||||
| -rw-r--r-- | gallery_dl/extractor/hiperdex.py | 50 | ||||
| -rw-r--r-- | gallery_dl/extractor/naverwebtoon.py | 89 | ||||
| -rw-r--r-- | gallery_dl/extractor/nitter.py | 27 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 214 | ||||
| -rw-r--r-- | gallery_dl/extractor/weibo.py | 44 | ||||
| -rw-r--r-- | gallery_dl/formatter.py | 33 | ||||
| -rw-r--r-- | gallery_dl/job.py | 10 | ||||
| -rw-r--r-- | gallery_dl/output.py | 11 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/metadata.py | 4 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 |
12 files changed, 405 insertions, 147 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 116ca5d..a430f13 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -120,7 +120,7 @@ def main(): # eval globals path = config.get((), "globals") if path: - util.GLOBALS = util.import_file(path).__dict__ + util.GLOBALS.update(util.import_file(path).__dict__) # loglevels output.configure_logging(args.loglevel) diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 80b0ae1..e2173de 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -21,18 +21,21 @@ class GelbooruBase(): category = "gelbooru" basecategory = "booru" root = "https://gelbooru.com" + offset = 0 - def _api_request(self, params): + def _api_request(self, params, key="post"): + if "s" not in params: + params["s"] = "post" params["api_key"] = self.api_key params["user_id"] = self.user_id - url = self.root + "/index.php?page=dapi&s=post&q=index&json=1" + url = self.root + "/index.php?page=dapi&q=index&json=1" data = self.request(url, params=params).json() - if "post" not in data: + if key not in data: return () - posts = data["post"] + posts = data[key] if not isinstance(posts, list): return (posts,) return posts @@ -57,7 +60,7 @@ class GelbooruBase(): def _pagination_html(self, params): url = self.root + "/index.php" - params["pid"] = self.page_start * self.per_page + params["pid"] = self.offset data = {} while True: @@ -103,6 +106,10 @@ class GelbooruBase(): "body" : extr(note, 'data-body="', '"')[0], }) + def _skip_offset(self, num): + self.offset += num + return num + class GelbooruTagExtractor(GelbooruBase, gelbooru_v02.GelbooruV02TagExtractor): @@ -133,13 +140,14 @@ class GelbooruPoolExtractor(GelbooruBase, }), ) + skip = GelbooruBase._skip_offset + def metadata(self): url = self.root + "/index.php" self._params = { "page": "pool", "s" : "show", "id" : self.pool_id, - "pid" : self.page_start, } page = self.request(url, params=self._params).text @@ -158,8 +166,52 @@ class GelbooruPoolExtractor(GelbooruBase, class GelbooruFavoriteExtractor(GelbooruBase, gelbooru_v02.GelbooruV02FavoriteExtractor): + """Extractor for gelbooru favorites""" + per_page = 100 pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)" - test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=12345",) + test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=279415", { + "count": 3, + }) + + skip = GelbooruBase._skip_offset + + def posts(self): + # get number of favorites + params = { + "s" : "favorite", + "id" : self.favorite_id, + "limit": "1", + } + count = self._api_request(params, "@attributes")[0]["count"] + + if count <= self.offset: + return + pnum, last = divmod(count + 1, self.per_page) + + if self.offset >= last: + self.offset -= last + diff, self.offset = divmod(self.offset, self.per_page) + pnum -= diff + 1 + skip = self.offset + + # paginate over them in reverse + params["pid"] = pnum + params["limit"] = self.per_page + + while True: + favs = self._api_request(params, "favorite") + + favs.reverse() + if skip: + favs = favs[skip:] + skip = 0 + + for fav in favs: + yield from self._api_request({"id": fav["favorite"]}) + + params["pid"] -= 1 + if params["pid"] < 0: + return class GelbooruPostExtractor(GelbooruBase, diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index d61c139..3aad88c 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://1sthiperdex.com/""" +"""Extractors for https://hiperdex.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text @@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?" class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" - root = "https://1sthiperdex.com" + root = "https://hiperdex.com" @memcache(keyarg=1) def manga_data(self, manga, page=None): @@ -31,7 +31,9 @@ class HiperdexBase(): return { "manga" : text.unescape(extr( - "<title>", "<").rpartition("&")[0].strip()), + "<title>", "<").rpartition(" - ")[0].strip()), + "url" : text.unescape(extr( + 'property="og:url" content="', '"')), "score" : text.parse_float(extr( 'id="averagerate">', '<')), "author" : text.remove_html(extr( @@ -65,10 +67,10 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): - """Extractor for manga chapters from 1sthiperdex.com""" + """Extractor for manga chapters from hiperdex.com""" pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))" test = ( - ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/", { + ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { "pattern": r"https://(1st)?hiperdex\d?.(com|net|info)" r"/wp-content/uploads/WP-manga/data" r"/manga_\w+/[0-9a-f]{32}/\d+\.webp", @@ -86,7 +88,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): "type" : "Manga", }, }), - ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/"), + ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"), ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"), ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"), ("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"), @@ -109,11 +111,11 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): - """Extractor for manga from 1sthiperdex.com""" + """Extractor for manga from hiperdex.com""" chapterclass = HiperdexChapterExtractor pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$" test = ( - ("https://1sthiperdex.com/manga/youre-not-that-special/", { + ("https://hiperdex.com/manga/1603231576-youre-not-that-special/", { "count": 51, "pattern": HiperdexChapterExtractor.pattern, "keyword": { @@ -131,6 +133,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): }, }), ("https://hiperdex.com/manga/youre-not-that-special/"), + ("https://1sthiperdex.com/manga/youre-not-that-special/"), ("https://hiperdex2.com/manga/youre-not-that-special/"), ("https://hiperdex.net/manga/youre-not-that-special/"), ("https://hiperdex.info/manga/youre-not-that-special/"), @@ -142,25 +145,24 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): MangaExtractor.__init__(self, match, self.root + path + "/") def chapters(self, page): - self.manga_data(self.manga, page) - results = [] - - shortlink = text.extr(page, "rel='shortlink' href='", "'") - data = { - "action" : "manga_get_reading_nav", - "manga" : shortlink.rpartition("=")[2], - "chapter" : "", - "volume_id": "", - "style" : "list", - "type" : "manga", + data = self.manga_data(self.manga, page) + self.manga_url = url = data["url"] + + url = self.manga_url + "ajax/chapters/" + headers = { + "Accept": "*/*", + "X-Requested-With": "XMLHttpRequest", + "Origin": self.root, + "Referer": self.manga_url, } - url = self.root + "/wp-admin/admin-ajax.php" - page = self.request(url, method="POST", data=data).text + html = self.request(url, method="POST", headers=headers).text - for url in text.extract_iter(page, 'data-redirect="', '"'): - chapter = url.rpartition("/")[2] + results = [] + for item in text.extract_iter( + html, '<li class="wp-manga-chapter', '</li>'): + url = text.extr(item, 'href="', '"') + chapter = url.rstrip("/").rpartition("/")[2] results.append((url, self.chapter_data(chapter))) - return results diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py index fa91f76..d6292af 100644 --- a/gallery_dl/extractor/naverwebtoon.py +++ b/gallery_dl/extractor/naverwebtoon.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2021 Seonghyeon Cho -# Copyright 2022 Mike Fährmann +# Copyright 2022-2033 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ from .common import GalleryExtractor, Extractor, Message from .. import text -import re BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com" r"/(webtoon|challenge|bestChallenge)") @@ -34,18 +33,44 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): "?titleId=26458&no=1&weekday=tue"), { "url": "47a956ba8c7a837213d5985f50c569fcff986f75", "content": "3806b6e8befbb1920048de9888dfce6220f69a60", - "count": 14 + "count": 14, + "keyword": { + "author": ["김규삼"], + "artist": ["김규삼"], + "comic": "N의등대-눈의등대", + "count": 14, + "episode": "1", + "extension": "jpg", + "num": int, + "tags": ["스릴러", "완결무료", "완결스릴러"], + "title": "n의 등대 - 눈의 등대 1화", + "title_id": "26458", + }, }), (("https://comic.naver.com/challenge/detail" "?titleId=765124&no=1"), { - "pattern": r"https://image-comic\.pstatic\.net/nas" + "pattern": r"https://image-comic\.pstatic\.net" r"/user_contents_data/challenge_comic/2021/01/19" r"/342586/upload_7149856273586337846\.jpeg", "count": 1, + "keyword": { + "author": ["kemi****"], + "artist": [], + "comic": "우니 모두의 이야기", + "count": 1, + "episode": "1", + "extension": "jpeg", + "filename": "upload_7149856273586337846", + "num": 1, + "tags": ["일상툰", "우니모두의이야기", "퇴사", "입사", "신입사원", + "사회초년생", "회사원", "20대"], + "title": "퇴사하다", + "title_id": "765124", + }, }), (("https://comic.naver.com/bestChallenge/detail.nhn" "?titleId=771467&no=3"), { - "pattern": r"https://image-comic\.pstatic\.net/nas" + "pattern": r"https://image-comic\.pstatic\.net" r"/user_contents_data/challenge_comic/2021/04/28" r"/345534/upload_3617293622396203109\.jpeg", "count": 1, @@ -66,12 +91,14 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): return { "title_id": self.title_id, "episode" : self.episode, - "title" : extr('property="og:title" content="', '"'), - "comic" : extr('<h2>', '<span'), - "authors" : extr('class="wrt_nm">', '</span>').strip().split("/"), - "description": extr('<p class="txt">', '</p>'), - "genre" : extr('<span class="genre">', '</span>'), - "date" : extr('<dd class="date">', '</dd>'), + "comic" : extr("titleName: '", "'"), + "tags" : [t.strip() for t in text.extract_iter( + extr("tagList: [", "}],"), '"tagName":"', '"')], + "title" : extr('"subtitle":"', '"'), + "author" : [a.strip() for a in text.extract_iter( + extr('"writers":[', ']'), '"name":"', '"')], + "artist" : [a.strip() for a in text.extract_iter( + extr('"painters":[', ']'), '"name":"', '"')] } @staticmethod @@ -87,7 +114,7 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor): subcategory = "comic" categorytransfer = True - pattern = (BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)") + pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)" test = ( ("https://comic.naver.com/webtoon/list?titleId=22073", { "pattern": NaverwebtoonEpisodeExtractor.pattern, @@ -109,28 +136,30 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor): query = text.parse_query(query) self.title_id = query.get("titleId") self.page_no = text.parse_int(query.get("page"), 1) + self.sort = query.get("sort", "ASC") def items(self): - url = "{}/{}/list".format(self.root, self.path) - params = {"titleId": self.title_id, "page": self.page_no} - data = {"_extractor": NaverwebtoonEpisodeExtractor} + base = "{}/{}/detail?titleId={}&no=".format( + self.root, self.path, self.title_id) + + url = self.root + "/api/article/list" + headers = { + "Accept": "application/json, text/plain, */*", + "Referer": self.root + "/", + } + params = { + "titleId": self.title_id, + "page" : self.page_no, + "sort" : self.sort, + } while True: - page = self.request(url, params=params).text - data["page"] = self.page_no + data = self.request(url, headers=headers, params=params).json() - for episode_url in self.get_episode_urls(page): - yield Message.Queue, episode_url, data + for article in data["articleList"]: + article["_extractor"] = NaverwebtoonEpisodeExtractor + yield Message.Queue, base + str(article["no"]), article - if 'class="next"' not in page: + params["page"] = data["pageInfo"]["nextPage"] + if not params["page"]: return - params["page"] += 1 - - def get_episode_urls(self, page): - """Extract and return all episode urls in page""" - return [ - self.root + path - for path in re.findall( - r'<a href="(/(?:webtoon|challenge|bestChallenge)' - r'/detail\?[^"]+)', page) - ][::2] diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py index 9b69694..725788a 100644 --- a/gallery_dl/extractor/nitter.py +++ b/gallery_dl/extractor/nitter.py @@ -51,6 +51,11 @@ class NitterExtractor(BaseExtractor): for url in text.extract_iter( attachments, 'href="', '"'): + if "/i/broadcasts/" in url: + self.log.debug( + "Skipping unsupported broadcast '%s'", url) + continue + if "/enc/" in url: name = binascii.a2b_base64(url.rpartition( "/")[2]).decode().rpartition("/")[2] @@ -123,7 +128,7 @@ class NitterExtractor(BaseExtractor): "likes" : text.parse_int(extr( 'class="icon-heart', '</div>').rpartition(">")[2]), "retweet" : 'class="retweet-header' in html, - "quoted": False, + "quoted" : False, } def _tweet_from_quote(self, html): @@ -140,18 +145,24 @@ class NitterExtractor(BaseExtractor): "date" : text.parse_datetime( extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"), "tweet_id": link.rpartition("/")[2].partition("#")[0], - "content": extr('class="quote-text', "</div").partition(">")[2], + "content" : extr('class="quote-text', "</div").partition(">")[2], "_attach" : extr('class="attachments', ''' </div>'''), "retweet" : False, - "quoted": True, + "quoted" : True, } def _user_from_html(self, html): extr = text.extract_from(html, html.index('class="profile-tabs')) banner = extr('class="profile-banner"><a href="', '"') + + try: + uid = banner.split("%2F")[4] + except Exception: + uid = 0 + return { - "id" : banner.split("%2F")[4] if banner else None, + "id" : uid, "profile_banner" : self.root + banner if banner else "", "profile_image" : self.root + extr( 'class="profile-card-avatar" href="', '"'), @@ -229,6 +240,10 @@ BASE_PATTERN = NitterExtractor.update({ "root": "https://nitter.unixfox.eu", "pattern": r"nitter\.unixfox\.eu", }, + "nitter.it": { + "root": "https://nitter.it", + "pattern": r"nitter\.it", + }, }) USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)" @@ -443,6 +458,10 @@ class NitterTweetExtractor(NitterExtractor): "keyword": {"date": "dt:2022-02-13 20:10:00"}, "count": 1, }), + # broadcast + ("https://nitter.it/POTUS/status/1639409307878928384", { + "count": 0, + }) ) def tweets(self): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 29b4ac3..89d96d7 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -248,11 +248,15 @@ class TwitterExtractor(Extractor): author = tweet["user"] author = self._transform_user(author) + if "note_tweet" in tweet: + note = tweet["note_tweet"]["note_tweet_results"]["result"] + else: + note = None + if "legacy" in tweet: tweet = tweet["legacy"] tget = tweet.get - entities = tweet["entities"] tdata = { "tweet_id" : text.parse_int(tweet["id_str"]), "retweet_id" : text.parse_int( @@ -272,6 +276,8 @@ class TwitterExtractor(Extractor): "retweet_count" : tget("retweet_count"), } + entities = note["entity_set"] if note else tweet["entities"] + hashtags = entities.get("hashtags") if hashtags: tdata["hashtags"] = [t["text"] for t in hashtags] @@ -284,7 +290,8 @@ class TwitterExtractor(Extractor): "nick": u["name"], } for u in mentions] - content = text.unescape(tget("full_text") or tget("text") or "") + content = text.unescape( + note["text"] if note else tget("full_text") or tget("text") or "") urls = entities.get("urls") if urls: for url in urls: @@ -642,6 +649,21 @@ class TwitterSearchExtractor(TwitterExtractor): return self.api.search_adaptive(query) +class TwitterHashtagExtractor(TwitterExtractor): + """Extractor for Twitter hashtags""" + subcategory = "hashtag" + pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)" + test = ("https://twitter.com/hashtag/nature", { + "pattern": TwitterSearchExtractor.pattern, + "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9", + }) + + def items(self): + url = "{}/search?q=%23{}".format(self.root, self.user) + data = {"_extractor": TwitterSearchExtractor} + yield Message.Queue, url, data + + class TwitterEventExtractor(TwitterExtractor): """Extractor for Tweets from a Twitter Event""" subcategory = "event" @@ -803,6 +825,23 @@ class TwitterTweetExtractor(TwitterExtractor): r"\?format=(jpg|png)&name=orig$", "range": "1-2", }), + # note tweet with long 'content' + ("https://twitter.com/i/web/status/1629193457112686592", { + "keyword": { + "content": """\ +BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \ +just contradicted federal government regulators, saying that toxic air \ +pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \ +Washington Post writes, "Three weeks after the toxic train derailment in \ +Ohio, an analysis of Environmental Protection Agency data has found nine air \ +pollutants at levels that could raise long-term health concerns in and around \ +East Palestine, according to an independent analysis. \n\n\"The analysis by \ +Texas A&M University seems to contradict statements by state and federal \ +regulators that air near the crash site is completely safe, despite residents \ +complaining about rashes, breathing problems and other health effects." \ +Your reaction.""", + }, + }), ) def __init__(self, match): @@ -951,6 +990,10 @@ class TwitterAPI(): self.extractor = extractor self.root = "https://api.twitter.com" + self._nsfw_warning = True + self._syndication = self.extractor.syndication + self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode + cookies = extractor.session.cookies cookiedomain = extractor.cookiedomain @@ -965,7 +1008,11 @@ class TwitterAPI(): auth_token = cookies.get("auth_token", domain=cookiedomain) + if not auth_token: + self.user_media = self.user_media_legacy + self.headers = { + "Accept": "*/*", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR" "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu" "4FA33AGWWjCpTnA", @@ -1019,73 +1066,132 @@ class TwitterAPI(): "collab_control,vibe", } self.variables = { - "includePromotedContent": False, - "withSuperFollowsUserFields": True, - "withBirdwatchPivots": False, "withDownvotePerspective": False, "withReactionsMetadata": False, "withReactionsPerspective": False, - "withSuperFollowsTweetFields": True, - "withClientEventToken": False, - "withBirdwatchNotes": False, - "withVoice": True, - "withV2Timeline": False, - "__fs_interactive_text": False, - "__fs_dont_mention_me_view_api_enabled": False, } - - self._nsfw_warning = True - self._syndication = self.extractor.syndication - self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode + self.features = { + "responsive_web_twitter_blue_verified_badge_is_enabled": True, + "responsive_web_graphql_exclude_directive_enabled": True, + "verified_phone_label_enabled": False, + "responsive_web_graphql_skip_user_profile_" + "image_extensions_enabled": False, + "responsive_web_graphql_timeline_navigation_enabled": True, + } + self.features_pagination = { + "responsive_web_twitter_blue_verified_badge_is_enabled": True, + "responsive_web_graphql_exclude_directive_enabled": True, + "verified_phone_label_enabled": False, + "responsive_web_graphql_timeline_navigation_enabled": True, + "responsive_web_graphql_skip_user_profile_" + "image_extensions_enabled": False, + "tweetypie_unmention_optimization_enabled": True, + "vibe_api_enabled": True, + "responsive_web_edit_tweet_api_enabled": True, + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True, + "view_counts_everywhere_api_enabled": True, + "longform_notetweets_consumption_enabled": True, + "tweet_awards_web_tipping_enabled": False, + "freedom_of_speech_not_reach_fetch_enabled": False, + "standardized_nudges_misinfo": True, + "tweet_with_visibility_results_prefer_gql_" + "limited_actions_policy_enabled": False, + "interactive_text_enabled": True, + "responsive_web_text_conversations_enabled": False, + "longform_notetweets_richtext_consumption_enabled": False, + "responsive_web_enhance_cards_enabled": False, + } def tweet_detail(self, tweet_id): - endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail" + endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail" variables = { "focalTweetId": tweet_id, + "referrer": "profile", "with_rux_injections": False, + "includePromotedContent": True, "withCommunity": True, "withQuickPromoteEligibilityTweetFields": True, "withBirdwatchNotes": False, + "withSuperFollowsUserFields": True, + "withSuperFollowsTweetFields": True, + "withVoice": True, + "withV2Timeline": True, } return self._pagination_tweets( - endpoint, variables, ("threaded_conversation_with_injections",)) + endpoint, variables, ("threaded_conversation_with_injections_v2",)) def user_tweets(self, screen_name): - endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets" + endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, + "includePromotedContent": True, "withQuickPromoteEligibilityTweetFields": True, + "withVoice": True, + "withV2Timeline": True, } return self._pagination_tweets(endpoint, variables) def user_tweets_and_replies(self, screen_name): - endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies" + endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, + "includePromotedContent": True, "withCommunity": True, + "withVoice": True, + "withV2Timeline": True, } return self._pagination_tweets(endpoint, variables) def user_media(self, screen_name): - endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia" + endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, + "includePromotedContent": False, + "withClientEventToken": False, + "withBirdwatchNotes": False, + "withVoice": True, + "withV2Timeline": True, } return self._pagination_tweets(endpoint, variables) + def user_media_legacy(self, screen_name): + endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia" + variables = { + "userId": self._user_id_by_screen_name(screen_name), + "count": 100, + "includePromotedContent": False, + "withSuperFollowsUserFields": True, + "withBirdwatchPivots": False, + "withSuperFollowsTweetFields": True, + "withClientEventToken": False, + "withBirdwatchNotes": False, + "withVoice": True, + "withV2Timeline": False, + "__fs_interactive_text": False, + "__fs_dont_mention_me_view_api_enabled": False, + } + return self._pagination_tweets( + endpoint, variables, ("user", "result", "timeline", "timeline"), + features=False) + def user_likes(self, screen_name): - endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes" + endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, + "includePromotedContent": False, + "withClientEventToken": False, + "withBirdwatchNotes": False, + "withVoice": True, + "withV2Timeline": True, } return self._pagination_tweets(endpoint, variables) def user_bookmarks(self): - endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks" + endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks" variables = { "count": 100, } @@ -1093,7 +1199,7 @@ class TwitterAPI(): endpoint, variables, ("bookmark_timeline", "timeline"), False) def list_latest_tweets_timeline(self, list_id): - endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline" + endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline" variables = { "listId": list_id, "count": 100, @@ -1128,18 +1234,21 @@ class TwitterAPI(): ["twitter_objects"]["live_events"][event_id]) def list_by_rest_id(self, list_id): - endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId" - params = {"variables": self._json_dumps({ - "listId": list_id, - "withSuperFollowsUserFields": True, - })} + endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId" + params = { + "variables": self._json_dumps({ + "listId": list_id, + "withSuperFollowsUserFields": True, + }), + "features": self._json_dumps(self.features), + } try: return self._call(endpoint, params)["data"]["list"] except KeyError: raise exception.NotFoundError("list") def list_members(self, list_id): - endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers" + endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers" variables = { "listId": list_id, "count": 100, @@ -1149,29 +1258,34 @@ class TwitterAPI(): endpoint, variables, ("list", "members_timeline", "timeline")) def user_following(self, screen_name): - endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following" + endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following" variables = { "userId": self._user_id_by_screen_name(screen_name), "count": 100, + "includePromotedContent": False, } return self._pagination_users(endpoint, variables) def user_by_rest_id(self, rest_id): - endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId" - params = {"variables": self._json_dumps({ - "userId": rest_id, - "withSafetyModeUserFields": True, - "withSuperFollowsUserFields": True, - })} + endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId" + params = { + "variables": self._json_dumps({ + "userId": rest_id, + "withSafetyModeUserFields": True, + }), + "features": self._json_dumps(self.features), + } return self._call(endpoint, params)["data"]["user"]["result"] def user_by_screen_name(self, screen_name): - endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName" - params = {"variables": self._json_dumps({ - "screen_name": screen_name, - "withSafetyModeUserFields": True, - "withSuperFollowsUserFields": True, - })} + endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName" + params = { + "variables": self._json_dumps({ + "screen_name": screen_name, + "withSafetyModeUserFields": True, + }), + "features": self._json_dumps(self.features), + } return self._call(endpoint, params)["data"]["user"]["result"] def _user_id_by_screen_name(self, screen_name): @@ -1337,19 +1451,23 @@ class TwitterAPI(): params["cursor"] = cursor def _pagination_tweets(self, endpoint, variables, - path=None, stop_tweets=True): + path=None, stop_tweets=True, features=True): extr = self.extractor variables.update(self.variables) original_retweets = (extr.retweets == "original") pinned_tweet = extr.pinned + params = {"variables": None} + if features: + params["features"] = self._json_dumps(self.features_pagination) + while True: - params = {"variables": self._json_dumps(variables)} + params["variables"] = self._json_dumps(variables) data = self._call(endpoint, params)["data"] try: if path is None: - instructions = (data["user"]["result"]["timeline"] + instructions = (data["user"]["result"]["timeline_v2"] ["timeline"]["instructions"]) else: instructions = data @@ -1487,10 +1605,12 @@ class TwitterAPI(): def _pagination_users(self, endpoint, variables, path=None): variables.update(self.variables) + params = {"variables": None, + "features" : self._json_dumps(self.features_pagination)} while True: cursor = entry = stop = None - params = {"variables": self._json_dumps(variables)} + params["variables"] = self._json_dumps(variables) data = self._call(endpoint, params)["data"] try: diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 68bd136..388ee03 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -79,6 +79,18 @@ class WeiboExtractor(Extractor): def _extract_status(self, status, files): append = files.append + if "mix_media_info" in status: + for item in status["mix_media_info"]["items"]: + type = item.get("type") + if type == "video": + if self.videos: + append(self._extract_video(item["data"]["media_info"])) + elif type == "pic": + append(item["data"]["largest"].copy()) + else: + self.log.warning("Unknown media type '%s'", type) + return + pic_ids = status.get("pic_ids") if pic_ids: pics = status["pic_infos"] @@ -100,18 +112,20 @@ class WeiboExtractor(Extractor): else: append(pic["largest"].copy()) - if "page_info" in status and self.videos: - try: - media = max(status["page_info"]["media_info"]["playback_list"], - key=lambda m: m["meta"]["quality_index"]) - except KeyError: - pass - except ValueError: - info = status["page_info"]["media_info"] - append({"url": (info.get("stream_url_hd") or - info["stream_url"])}) - else: - append(media["play_info"].copy()) + if "page_info" in status: + info = status["page_info"] + if "media_info" in info and self.videos: + append(self._extract_video(info["media_info"])) + + def _extract_video(self, info): + try: + media = max(info["playback_list"], + key=lambda m: m["meta"]["quality_index"]) + except Exception: + return {"url": (info.get("stream_url_hd") or + info["stream_url"])} + else: + return media["play_info"].copy() def _status_by_id(self, status_id): url = "{}/ajax/statuses/show?id={}".format(self.root, status_id) @@ -380,7 +394,7 @@ class WeiboStatusExtractor(WeiboExtractor): }), # missing 'playback_list' (#2792) ("https://weibo.com/2909128931/4409545658754086", { - "count": 9, + "count": 10, }), # empty 'playback_list' (#3301) ("https://weibo.com/1501933722/4142890299009993", { @@ -389,6 +403,10 @@ class WeiboStatusExtractor(WeiboExtractor): r"=0&ps=1CwnkDw1GXwCQx.+&KID=unistore,video", "count": 1, }), + # mix_media_info (#3793) + ("https://weibo.com/2427303621/MxojLlLgQ", { + "count": 9, + }), ("https://m.weibo.cn/status/4339748116375525"), ("https://m.weibo.cn/5746766133/4339748116375525"), ) diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 2c5bd11..fc36fa2 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -34,6 +34,8 @@ def parse(format_string, default=NONE, fmt=format): if kind == "T": cls = TemplateFormatter + elif kind == "TF": + cls = TemplateFStringFormatter elif kind == "E": cls = ExpressionFormatter elif kind == "M": @@ -197,15 +199,6 @@ class StringFormatter(): return lambda obj: fmt(conversion(obj)) -class TemplateFormatter(StringFormatter): - """Read format_string from file""" - - def __init__(self, path, default=NONE, fmt=format): - with open(util.expand_path(path)) as fp: - format_string = fp.read() - StringFormatter.__init__(self, format_string, default, fmt) - - class ExpressionFormatter(): """Generate text by evaluating a Python expression""" @@ -218,7 +211,7 @@ class ModuleFormatter(): def __init__(self, function_spec, default=NONE, fmt=None): module_name, _, function_name = function_spec.partition(":") - module = __import__(module_name) + module = util.import_file(module_name) self.format_map = getattr(module, function_name) @@ -229,6 +222,24 @@ class FStringFormatter(): self.format_map = util.compile_expression('f"""' + fstring + '"""') +class TemplateFormatter(StringFormatter): + """Read format_string from file""" + + def __init__(self, path, default=NONE, fmt=format): + with open(util.expand_path(path)) as fp: + format_string = fp.read() + StringFormatter.__init__(self, format_string, default, fmt) + + +class TemplateFStringFormatter(FStringFormatter): + """Read f-string from file""" + + def __init__(self, path, default=NONE, fmt=format): + with open(util.expand_path(path)) as fp: + format_string = fp.read() + FStringFormatter.__init__(self, format_string, default, fmt) + + def parse_field_name(field_name): first, rest = _string.formatter_field_name_split(field_name) funcs = [] @@ -245,6 +256,8 @@ def parse_field_name(field_name): try: if ":" in key: key = _slice(key) + else: + key = key.strip("\"'") except TypeError: pass # key is an integer diff --git a/gallery_dl/job.py b/gallery_dl/job.py index a64c040..ca5785d 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -633,13 +633,13 @@ class KeywordJob(Job): def print_kwdict(self, kwdict, prefix="", markers=None): """Print key-value pairs in 'kwdict' with formatting""" write = sys.stdout.write - suffix = "]" if prefix else "" + suffix = "']" if prefix else "" markerid = id(kwdict) if markers is None: markers = {markerid} elif markerid in markers: - write("{}\n <circular reference>\n".format(prefix[:-1])) + write("{}\n <circular reference>\n".format(prefix[:-2])) return # ignore circular reference else: markers.add(markerid) @@ -650,13 +650,13 @@ class KeywordJob(Job): key = prefix + key + suffix if isinstance(value, dict): - self.print_kwdict(value, key + "[", markers) + self.print_kwdict(value, key + "['", markers) elif isinstance(value, list): if not value: pass elif isinstance(value[0], dict): - self.print_kwdict(value[0], key + "[N][", markers) + self.print_kwdict(value[0], key + "[N]['", markers) else: fmt = (" {:>%s} {}\n" % len(str(len(value)))).format write(key + "[N]\n") @@ -667,6 +667,8 @@ class KeywordJob(Job): # string or number write("{}\n {}\n".format(key, value)) + markers.remove(markerid) + class UrlJob(Job): """Print download urls""" diff --git a/gallery_dl/output.py b/gallery_dl/output.py index 1d53851..4f2ee26 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -270,16 +270,15 @@ else: def configure_standard_streams(): for name in ("stdout", "stderr", "stdin"): - options = config.get(("output",), name) - if not options: - continue - stream = getattr(sys, name, None) if not stream: continue - if isinstance(options, str): - options = {"encoding": options, "errors": "replace"} + options = config.get(("output",), name) + if not options: + options = {"errors": "replace"} + elif isinstance(options, str): + options = {"errors": "replace", "encoding": options} elif not options.get("errors"): options["errors"] = "replace" diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index 9667a41..714f4fe 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -87,6 +87,7 @@ class MetadataPP(PostProcessor): self.omode = options.get("open", omode) self.encoding = options.get("encoding", "utf-8") self.private = options.get("private", False) + self.skip = options.get("skip", False) def run(self, pathfmt): archive = self.archive @@ -96,6 +97,9 @@ class MetadataPP(PostProcessor): directory = self._directory(pathfmt) path = directory + self._filename(pathfmt) + if self.skip and os.path.exists(path): + return + try: with open(path, self.omode, encoding=self.encoding) as fp: self.write(fp, pathfmt.kwdict) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 494b7f5..93a9148 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.25.0" +__version__ = "1.25.1" |
