New upstream version 1.25.1.upstream/1.25.1

author: Unit 193 <unit193@unit193.net> 2023-03-31 07:24:57 -0400
committer: Unit 193 <unit193@unit193.net> 2023-03-31 07:24:57 -0400
commit: 09e426350409d45e7f7a8ff369f8d8aa9eec0fe4 (patch)
tree: 8a8cd3e590675fe6ecb1e5c2b4ad9eecde3dde6d /gallery_dl
parent: 10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 (diff)
12 files changed, 405 insertions, 147 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 116ca5d..a430f13 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -120,7 +120,7 @@ def main():
         # eval globals
         path = config.get((), "globals")
         if path:
-            util.GLOBALS = util.import_file(path).__dict__
+            util.GLOBALS.update(util.import_file(path).__dict__)
 
         # loglevels
         output.configure_logging(args.loglevel)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 80b0ae1..e2173de 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -21,18 +21,21 @@ class GelbooruBase():
     category = "gelbooru"
     basecategory = "booru"
     root = "https://gelbooru.com"
+    offset = 0
 
-    def _api_request(self, params):
+    def _api_request(self, params, key="post"):
+        if "s" not in params:
+            params["s"] = "post"
         params["api_key"] = self.api_key
         params["user_id"] = self.user_id
 
-        url = self.root + "/index.php?page=dapi&s=post&q=index&json=1"
+        url = self.root + "/index.php?page=dapi&q=index&json=1"
         data = self.request(url, params=params).json()
 
-        if "post" not in data:
+        if key not in data:
             return ()
 
-        posts = data["post"]
+        posts = data[key]
         if not isinstance(posts, list):
             return (posts,)
         return posts
@@ -57,7 +60,7 @@ class GelbooruBase():
 
     def _pagination_html(self, params):
         url = self.root + "/index.php"
-        params["pid"] = self.page_start * self.per_page
+        params["pid"] = self.offset
 
         data = {}
         while True:
@@ -103,6 +106,10 @@ class GelbooruBase():
                 "body"  : extr(note, 'data-body="', '"')[0],
             })
 
+    def _skip_offset(self, num):
+        self.offset += num
+        return num
+
 
 class GelbooruTagExtractor(GelbooruBase,
                            gelbooru_v02.GelbooruV02TagExtractor):
@@ -133,13 +140,14 @@ class GelbooruPoolExtractor(GelbooruBase,
         }),
     )
 
+    skip = GelbooruBase._skip_offset
+
     def metadata(self):
         url = self.root + "/index.php"
         self._params = {
             "page": "pool",
             "s"   : "show",
             "id"  : self.pool_id,
-            "pid" : self.page_start,
         }
         page = self.request(url, params=self._params).text
 
@@ -158,8 +166,52 @@ class GelbooruPoolExtractor(GelbooruBase,
 
 class GelbooruFavoriteExtractor(GelbooruBase,
                                 gelbooru_v02.GelbooruV02FavoriteExtractor):
+    """Extractor for gelbooru favorites"""
+    per_page = 100
     pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
-    test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=12345",)
+    test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=279415", {
+        "count": 3,
+    })
+
+    skip = GelbooruBase._skip_offset
+
+    def posts(self):
+        # get number of favorites
+        params = {
+            "s"    : "favorite",
+            "id"   : self.favorite_id,
+            "limit": "1",
+        }
+        count = self._api_request(params, "@attributes")[0]["count"]
+
+        if count <= self.offset:
+            return
+        pnum, last = divmod(count + 1, self.per_page)
+
+        if self.offset >= last:
+            self.offset -= last
+            diff, self.offset = divmod(self.offset, self.per_page)
+            pnum -= diff + 1
+        skip = self.offset
+
+        # paginate over them in reverse
+        params["pid"] = pnum
+        params["limit"] = self.per_page
+
+        while True:
+            favs = self._api_request(params, "favorite")
+
+            favs.reverse()
+            if skip:
+                favs = favs[skip:]
+                skip = 0
+
+            for fav in favs:
+                yield from self._api_request({"id": fav["favorite"]})
+
+            params["pid"] -= 1
+            if params["pid"] < 0:
+                return
 
 
 class GelbooruPostExtractor(GelbooruBase,
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index d61c139..3aad88c 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -6,7 +6,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://1sthiperdex.com/"""
+"""Extractors for https://hiperdex.com/"""
 
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
@@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
 class HiperdexBase():
     """Base class for hiperdex extractors"""
     category = "hiperdex"
-    root = "https://1sthiperdex.com"
+    root = "https://hiperdex.com"
 
     @memcache(keyarg=1)
     def manga_data(self, manga, page=None):
@@ -31,7 +31,9 @@ class HiperdexBase():
 
         return {
             "manga"  : text.unescape(extr(
-                "<title>", "<").rpartition("&")[0].strip()),
+                "<title>", "<").rpartition(" - ")[0].strip()),
+            "url"    : text.unescape(extr(
+                'property="og:url" content="', '"')),
             "score"  : text.parse_float(extr(
                 'id="averagerate">', '<')),
             "author" : text.remove_html(extr(
@@ -65,10 +67,10 @@ class HiperdexBase():
 
 
 class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
-    """Extractor for manga chapters from 1sthiperdex.com"""
+    """Extractor for manga chapters from hiperdex.com"""
     pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
     test = (
-        ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/", {
+        ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
             "pattern": r"https://(1st)?hiperdex\d?.(com|net|info)"
                        r"/wp-content/uploads/WP-manga/data"
                        r"/manga_\w+/[0-9a-f]{32}/\d+\.webp",
@@ -86,7 +88,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
                 "type"   : "Manga",
             },
         }),
-        ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/"),
+        ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"),
         ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
         ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
         ("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
@@ -109,11 +111,11 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
 
 
 class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
-    """Extractor for manga from 1sthiperdex.com"""
+    """Extractor for manga from hiperdex.com"""
     chapterclass = HiperdexChapterExtractor
     pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
     test = (
-        ("https://1sthiperdex.com/manga/youre-not-that-special/", {
+        ("https://hiperdex.com/manga/1603231576-youre-not-that-special/", {
             "count": 51,
             "pattern": HiperdexChapterExtractor.pattern,
             "keyword": {
@@ -131,6 +133,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
             },
         }),
         ("https://hiperdex.com/manga/youre-not-that-special/"),
+        ("https://1sthiperdex.com/manga/youre-not-that-special/"),
         ("https://hiperdex2.com/manga/youre-not-that-special/"),
         ("https://hiperdex.net/manga/youre-not-that-special/"),
         ("https://hiperdex.info/manga/youre-not-that-special/"),
@@ -142,25 +145,24 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
         MangaExtractor.__init__(self, match, self.root + path + "/")
 
     def chapters(self, page):
-        self.manga_data(self.manga, page)
-        results = []
-
-        shortlink = text.extr(page, "rel='shortlink' href='", "'")
-        data = {
-            "action"   : "manga_get_reading_nav",
-            "manga"    : shortlink.rpartition("=")[2],
-            "chapter"  : "",
-            "volume_id": "",
-            "style"    : "list",
-            "type"     : "manga",
+        data = self.manga_data(self.manga, page)
+        self.manga_url = url = data["url"]
+
+        url = self.manga_url + "ajax/chapters/"
+        headers = {
+            "Accept": "*/*",
+            "X-Requested-With": "XMLHttpRequest",
+            "Origin": self.root,
+            "Referer": self.manga_url,
         }
-        url = self.root + "/wp-admin/admin-ajax.php"
-        page = self.request(url, method="POST", data=data).text
+        html = self.request(url, method="POST", headers=headers).text
 
-        for url in text.extract_iter(page, 'data-redirect="', '"'):
-            chapter = url.rpartition("/")[2]
+        results = []
+        for item in text.extract_iter(
+                html, '<li class="wp-manga-chapter', '</li>'):
+            url = text.extr(item, 'href="', '"')
+            chapter = url.rstrip("/").rpartition("/")[2]
             results.append((url, self.chapter_data(chapter)))
-
         return results
 
 
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index fa91f76..d6292af 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # Copyright 2021 Seonghyeon Cho
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2033 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
 
 from .common import GalleryExtractor, Extractor, Message
 from .. import text
-import re
 
 BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com"
                 r"/(webtoon|challenge|bestChallenge)")
@@ -34,18 +33,44 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
           "?titleId=26458&no=1&weekday=tue"), {
             "url": "47a956ba8c7a837213d5985f50c569fcff986f75",
             "content": "3806b6e8befbb1920048de9888dfce6220f69a60",
-            "count": 14
+            "count": 14,
+            "keyword": {
+                "author": ["김규삼"],
+                "artist": ["김규삼"],
+                "comic": "N의등대-눈의등대",
+                "count": 14,
+                "episode": "1",
+                "extension": "jpg",
+                "num": int,
+                "tags": ["스릴러", "완결무료", "완결스릴러"],
+                "title": "n의 등대 - 눈의 등대 1화",
+                "title_id": "26458",
+            },
         }),
         (("https://comic.naver.com/challenge/detail"
           "?titleId=765124&no=1"), {
-            "pattern": r"https://image-comic\.pstatic\.net/nas"
+            "pattern": r"https://image-comic\.pstatic\.net"
                        r"/user_contents_data/challenge_comic/2021/01/19"
                        r"/342586/upload_7149856273586337846\.jpeg",
             "count": 1,
+            "keyword": {
+                "author": ["kemi****"],
+                "artist": [],
+                "comic": "우니 모두의 이야기",
+                "count": 1,
+                "episode": "1",
+                "extension": "jpeg",
+                "filename": "upload_7149856273586337846",
+                "num": 1,
+                "tags": ["일상툰", "우니모두의이야기", "퇴사", "입사", "신입사원",
+                         "사회초년생", "회사원", "20대"],
+                "title": "퇴사하다",
+                "title_id": "765124",
+            },
         }),
         (("https://comic.naver.com/bestChallenge/detail.nhn"
           "?titleId=771467&no=3"), {
-            "pattern": r"https://image-comic\.pstatic\.net/nas"
+            "pattern": r"https://image-comic\.pstatic\.net"
                        r"/user_contents_data/challenge_comic/2021/04/28"
                        r"/345534/upload_3617293622396203109\.jpeg",
             "count": 1,
@@ -66,12 +91,14 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
         return {
             "title_id": self.title_id,
             "episode" : self.episode,
-            "title"   : extr('property="og:title" content="', '"'),
-            "comic"   : extr('<h2>', '<span'),
-            "authors" : extr('class="wrt_nm">', '</span>').strip().split("/"),
-            "description": extr('<p class="txt">', '</p>'),
-            "genre"   : extr('<span class="genre">', '</span>'),
-            "date"    : extr('<dd class="date">', '</dd>'),
+            "comic"   : extr("titleName: '", "'"),
+            "tags"    : [t.strip() for t in text.extract_iter(
+                extr("tagList: [", "}],"), '"tagName":"', '"')],
+            "title"   : extr('"subtitle":"', '"'),
+            "author"  : [a.strip() for a in text.extract_iter(
+                extr('"writers":[', ']'), '"name":"', '"')],
+            "artist"  : [a.strip() for a in text.extract_iter(
+                extr('"painters":[', ']'), '"name":"', '"')]
         }
 
     @staticmethod
@@ -87,7 +114,7 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
 class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
     subcategory = "comic"
     categorytransfer = True
-    pattern = (BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)")
+    pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)"
     test = (
         ("https://comic.naver.com/webtoon/list?titleId=22073", {
             "pattern": NaverwebtoonEpisodeExtractor.pattern,
@@ -109,28 +136,30 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
         query = text.parse_query(query)
         self.title_id = query.get("titleId")
         self.page_no = text.parse_int(query.get("page"), 1)
+        self.sort = query.get("sort", "ASC")
 
     def items(self):
-        url = "{}/{}/list".format(self.root, self.path)
-        params = {"titleId": self.title_id, "page": self.page_no}
-        data = {"_extractor": NaverwebtoonEpisodeExtractor}
+        base = "{}/{}/detail?titleId={}&no=".format(
+            self.root, self.path, self.title_id)
+
+        url = self.root + "/api/article/list"
+        headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Referer": self.root + "/",
+        }
+        params = {
+            "titleId": self.title_id,
+            "page"   : self.page_no,
+            "sort"   : self.sort,
+        }
 
         while True:
-            page = self.request(url, params=params).text
-            data["page"] = self.page_no
+            data = self.request(url, headers=headers, params=params).json()
 
-            for episode_url in self.get_episode_urls(page):
-                yield Message.Queue, episode_url, data
+            for article in data["articleList"]:
+                article["_extractor"] = NaverwebtoonEpisodeExtractor
+                yield Message.Queue, base + str(article["no"]), article
 
-            if 'class="next"' not in page:
+            params["page"] = data["pageInfo"]["nextPage"]
+            if not params["page"]:
                 return
-            params["page"] += 1
-
-    def get_episode_urls(self, page):
-        """Extract and return all episode urls in page"""
-        return [
-            self.root + path
-            for path in re.findall(
-                r'<a href="(/(?:webtoon|challenge|bestChallenge)'
-                r'/detail\?[^"]+)', page)
-        ][::2]
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 9b69694..725788a 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -51,6 +51,11 @@ class NitterExtractor(BaseExtractor):
                 for url in text.extract_iter(
                         attachments, 'href="', '"'):
 
+                    if "/i/broadcasts/" in url:
+                        self.log.debug(
+                            "Skipping unsupported broadcast '%s'", url)
+                        continue
+
                     if "/enc/" in url:
                         name = binascii.a2b_base64(url.rpartition(
                             "/")[2]).decode().rpartition("/")[2]
@@ -123,7 +128,7 @@ class NitterExtractor(BaseExtractor):
             "likes"   : text.parse_int(extr(
                 'class="icon-heart', '</div>').rpartition(">")[2]),
             "retweet" : 'class="retweet-header' in html,
-            "quoted": False,
+            "quoted"  : False,
         }
 
     def _tweet_from_quote(self, html):
@@ -140,18 +145,24 @@ class NitterExtractor(BaseExtractor):
             "date"    : text.parse_datetime(
                 extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
             "tweet_id": link.rpartition("/")[2].partition("#")[0],
-            "content": extr('class="quote-text', "</div").partition(">")[2],
+            "content" : extr('class="quote-text', "</div").partition(">")[2],
             "_attach" : extr('class="attachments', '''
                 </div>'''),
             "retweet" : False,
-            "quoted": True,
+            "quoted"  : True,
         }
 
     def _user_from_html(self, html):
         extr = text.extract_from(html, html.index('class="profile-tabs'))
         banner = extr('class="profile-banner"><a href="', '"')
+
+        try:
+            uid = banner.split("%2F")[4]
+        except Exception:
+            uid = 0
+
         return {
-            "id"              : banner.split("%2F")[4] if banner else None,
+            "id"              : uid,
             "profile_banner"  : self.root + banner if banner else "",
             "profile_image"   : self.root + extr(
                 'class="profile-card-avatar" href="', '"'),
@@ -229,6 +240,10 @@ BASE_PATTERN = NitterExtractor.update({
         "root": "https://nitter.unixfox.eu",
         "pattern": r"nitter\.unixfox\.eu",
     },
+    "nitter.it": {
+        "root": "https://nitter.it",
+        "pattern": r"nitter\.it",
+    },
 })
 
 USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)"
@@ -443,6 +458,10 @@ class NitterTweetExtractor(NitterExtractor):
             "keyword": {"date": "dt:2022-02-13 20:10:00"},
             "count": 1,
         }),
+        # broadcast
+        ("https://nitter.it/POTUS/status/1639409307878928384", {
+            "count": 0,
+        })
     )
 
     def tweets(self):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 29b4ac3..89d96d7 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -248,11 +248,15 @@ class TwitterExtractor(Extractor):
             author = tweet["user"]
         author = self._transform_user(author)
 
+        if "note_tweet" in tweet:
+            note = tweet["note_tweet"]["note_tweet_results"]["result"]
+        else:
+            note = None
+
         if "legacy" in tweet:
             tweet = tweet["legacy"]
 
         tget = tweet.get
-        entities = tweet["entities"]
         tdata = {
             "tweet_id"      : text.parse_int(tweet["id_str"]),
             "retweet_id"    : text.parse_int(
@@ -272,6 +276,8 @@ class TwitterExtractor(Extractor):
             "retweet_count" : tget("retweet_count"),
         }
 
+        entities = note["entity_set"] if note else tweet["entities"]
+
         hashtags = entities.get("hashtags")
         if hashtags:
             tdata["hashtags"] = [t["text"] for t in hashtags]
@@ -284,7 +290,8 @@ class TwitterExtractor(Extractor):
                 "nick": u["name"],
             } for u in mentions]
 
-        content = text.unescape(tget("full_text") or tget("text") or "")
+        content = text.unescape(
+            note["text"] if note else tget("full_text") or tget("text") or "")
         urls = entities.get("urls")
         if urls:
             for url in urls:
@@ -642,6 +649,21 @@ class TwitterSearchExtractor(TwitterExtractor):
         return self.api.search_adaptive(query)
 
 
+class TwitterHashtagExtractor(TwitterExtractor):
+    """Extractor for Twitter hashtags"""
+    subcategory = "hashtag"
+    pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+    test = ("https://twitter.com/hashtag/nature", {
+        "pattern": TwitterSearchExtractor.pattern,
+        "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
+    })
+
+    def items(self):
+        url = "{}/search?q=%23{}".format(self.root, self.user)
+        data = {"_extractor": TwitterSearchExtractor}
+        yield Message.Queue, url, data
+
+
 class TwitterEventExtractor(TwitterExtractor):
     """Extractor for Tweets from a Twitter Event"""
     subcategory = "event"
@@ -803,6 +825,23 @@ class TwitterTweetExtractor(TwitterExtractor):
                        r"\?format=(jpg|png)&name=orig$",
             "range": "1-2",
         }),
+        # note tweet with long 'content'
+        ("https://twitter.com/i/web/status/1629193457112686592", {
+            "keyword": {
+                "content": """\
+BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
+just contradicted federal government regulators, saying that toxic air \
+pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
+Washington Post writes, "Three weeks after the toxic train derailment in \
+Ohio, an analysis of Environmental Protection Agency data has found nine air \
+pollutants at levels that could raise long-term health concerns in and around \
+East Palestine, according to an independent analysis. \n\n\"The analysis by \
+Texas A&M University seems to contradict statements by state and federal \
+regulators that air near the crash site is completely safe, despite residents \
+complaining about rashes, breathing problems and other health effects." \
+Your reaction.""",
+            },
+        }),
     )
 
     def __init__(self, match):
@@ -951,6 +990,10 @@ class TwitterAPI():
         self.extractor = extractor
 
         self.root = "https://api.twitter.com"
+        self._nsfw_warning = True
+        self._syndication = self.extractor.syndication
+        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+
         cookies = extractor.session.cookies
         cookiedomain = extractor.cookiedomain
 
@@ -965,7 +1008,11 @@ class TwitterAPI():
 
         auth_token = cookies.get("auth_token", domain=cookiedomain)
 
+        if not auth_token:
+            self.user_media = self.user_media_legacy
+
         self.headers = {
+            "Accept": "*/*",
             "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
                              "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
                              "4FA33AGWWjCpTnA",
@@ -1019,73 +1066,132 @@ class TwitterAPI():
                    "collab_control,vibe",
         }
         self.variables = {
-            "includePromotedContent": False,
-            "withSuperFollowsUserFields": True,
-            "withBirdwatchPivots": False,
             "withDownvotePerspective": False,
             "withReactionsMetadata": False,
             "withReactionsPerspective": False,
-            "withSuperFollowsTweetFields": True,
-            "withClientEventToken": False,
-            "withBirdwatchNotes": False,
-            "withVoice": True,
-            "withV2Timeline": False,
-            "__fs_interactive_text": False,
-            "__fs_dont_mention_me_view_api_enabled": False,
         }
-
-        self._nsfw_warning = True
-        self._syndication = self.extractor.syndication
-        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+        self.features = {
+            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+            "responsive_web_graphql_exclude_directive_enabled": True,
+            "verified_phone_label_enabled": False,
+            "responsive_web_graphql_skip_user_profile_"
+            "image_extensions_enabled": False,
+            "responsive_web_graphql_timeline_navigation_enabled": True,
+        }
+        self.features_pagination = {
+            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+            "responsive_web_graphql_exclude_directive_enabled": True,
+            "verified_phone_label_enabled": False,
+            "responsive_web_graphql_timeline_navigation_enabled": True,
+            "responsive_web_graphql_skip_user_profile_"
+            "image_extensions_enabled": False,
+            "tweetypie_unmention_optimization_enabled": True,
+            "vibe_api_enabled": True,
+            "responsive_web_edit_tweet_api_enabled": True,
+            "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
+            "view_counts_everywhere_api_enabled": True,
+            "longform_notetweets_consumption_enabled": True,
+            "tweet_awards_web_tipping_enabled": False,
+            "freedom_of_speech_not_reach_fetch_enabled": False,
+            "standardized_nudges_misinfo": True,
+            "tweet_with_visibility_results_prefer_gql_"
+            "limited_actions_policy_enabled": False,
+            "interactive_text_enabled": True,
+            "responsive_web_text_conversations_enabled": False,
+            "longform_notetweets_richtext_consumption_enabled": False,
+            "responsive_web_enhance_cards_enabled": False,
+        }
 
     def tweet_detail(self, tweet_id):
-        endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
+        endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
         variables = {
             "focalTweetId": tweet_id,
+            "referrer": "profile",
             "with_rux_injections": False,
+            "includePromotedContent": True,
             "withCommunity": True,
             "withQuickPromoteEligibilityTweetFields": True,
             "withBirdwatchNotes": False,
+            "withSuperFollowsUserFields": True,
+            "withSuperFollowsTweetFields": True,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(
-            endpoint, variables, ("threaded_conversation_with_injections",))
+            endpoint, variables, ("threaded_conversation_with_injections_v2",))
 
     def user_tweets(self, screen_name):
-        endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets"
+        endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": True,
             "withQuickPromoteEligibilityTweetFields": True,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
     def user_tweets_and_replies(self, screen_name):
-        endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies"
+        endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": True,
             "withCommunity": True,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
     def user_media(self, screen_name):
-        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+        endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": False,
+            "withClientEventToken": False,
+            "withBirdwatchNotes": False,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
+    def user_media_legacy(self, screen_name):
+        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+        variables = {
+            "userId": self._user_id_by_screen_name(screen_name),
+            "count": 100,
+            "includePromotedContent": False,
+            "withSuperFollowsUserFields": True,
+            "withBirdwatchPivots": False,
+            "withSuperFollowsTweetFields": True,
+            "withClientEventToken": False,
+            "withBirdwatchNotes": False,
+            "withVoice": True,
+            "withV2Timeline": False,
+            "__fs_interactive_text": False,
+            "__fs_dont_mention_me_view_api_enabled": False,
+        }
+        return self._pagination_tweets(
+            endpoint, variables, ("user", "result", "timeline", "timeline"),
+            features=False)
+
     def user_likes(self, screen_name):
-        endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes"
+        endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": False,
+            "withClientEventToken": False,
+            "withBirdwatchNotes": False,
+            "withVoice": True,
+            "withV2Timeline": True,
         }
         return self._pagination_tweets(endpoint, variables)
 
     def user_bookmarks(self):
-        endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks"
+        endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks"
         variables = {
             "count": 100,
         }
@@ -1093,7 +1199,7 @@ class TwitterAPI():
             endpoint, variables, ("bookmark_timeline", "timeline"), False)
 
     def list_latest_tweets_timeline(self, list_id):
-        endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
+        endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline"
         variables = {
             "listId": list_id,
             "count": 100,
@@ -1128,18 +1234,21 @@ class TwitterAPI():
                 ["twitter_objects"]["live_events"][event_id])
 
     def list_by_rest_id(self, list_id):
-        endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId"
-        params = {"variables": self._json_dumps({
-            "listId": list_id,
-            "withSuperFollowsUserFields": True,
-        })}
+        endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId"
+        params = {
+            "variables": self._json_dumps({
+                "listId": list_id,
+                "withSuperFollowsUserFields": True,
+            }),
+            "features": self._json_dumps(self.features),
+        }
         try:
             return self._call(endpoint, params)["data"]["list"]
         except KeyError:
             raise exception.NotFoundError("list")
 
     def list_members(self, list_id):
-        endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers"
+        endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers"
         variables = {
             "listId": list_id,
             "count": 100,
@@ -1149,29 +1258,34 @@ class TwitterAPI():
             endpoint, variables, ("list", "members_timeline", "timeline"))
 
     def user_following(self, screen_name):
-        endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following"
+        endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following"
         variables = {
             "userId": self._user_id_by_screen_name(screen_name),
             "count": 100,
+            "includePromotedContent": False,
         }
         return self._pagination_users(endpoint, variables)
 
     def user_by_rest_id(self, rest_id):
-        endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId"
-        params = {"variables": self._json_dumps({
-            "userId": rest_id,
-            "withSafetyModeUserFields": True,
-            "withSuperFollowsUserFields": True,
-        })}
+        endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId"
+        params = {
+            "variables": self._json_dumps({
+                "userId": rest_id,
+                "withSafetyModeUserFields": True,
+            }),
+            "features": self._json_dumps(self.features),
+        }
         return self._call(endpoint, params)["data"]["user"]["result"]
 
     def user_by_screen_name(self, screen_name):
-        endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName"
-        params = {"variables": self._json_dumps({
-            "screen_name": screen_name,
-            "withSafetyModeUserFields": True,
-            "withSuperFollowsUserFields": True,
-        })}
+        endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName"
+        params = {
+            "variables": self._json_dumps({
+                "screen_name": screen_name,
+                "withSafetyModeUserFields": True,
+            }),
+            "features": self._json_dumps(self.features),
+        }
         return self._call(endpoint, params)["data"]["user"]["result"]
 
     def _user_id_by_screen_name(self, screen_name):
@@ -1337,19 +1451,23 @@ class TwitterAPI():
             params["cursor"] = cursor
 
     def _pagination_tweets(self, endpoint, variables,
-                           path=None, stop_tweets=True):
+                           path=None, stop_tweets=True, features=True):
         extr = self.extractor
         variables.update(self.variables)
         original_retweets = (extr.retweets == "original")
         pinned_tweet = extr.pinned
 
+        params = {"variables": None}
+        if features:
+            params["features"] = self._json_dumps(self.features_pagination)
+
         while True:
-            params = {"variables": self._json_dumps(variables)}
+            params["variables"] = self._json_dumps(variables)
             data = self._call(endpoint, params)["data"]
 
             try:
                 if path is None:
-                    instructions = (data["user"]["result"]["timeline"]
+                    instructions = (data["user"]["result"]["timeline_v2"]
                                     ["timeline"]["instructions"])
                 else:
                     instructions = data
@@ -1487,10 +1605,12 @@ class TwitterAPI():
 
     def _pagination_users(self, endpoint, variables, path=None):
         variables.update(self.variables)
+        params = {"variables": None,
+                  "features" : self._json_dumps(self.features_pagination)}
 
         while True:
             cursor = entry = stop = None
-            params = {"variables": self._json_dumps(variables)}
+            params["variables"] = self._json_dumps(variables)
             data = self._call(endpoint, params)["data"]
 
             try:
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 68bd136..388ee03 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -79,6 +79,18 @@ class WeiboExtractor(Extractor):
     def _extract_status(self, status, files):
         append = files.append
 
+        if "mix_media_info" in status:
+            for item in status["mix_media_info"]["items"]:
+                type = item.get("type")
+                if type == "video":
+                    if self.videos:
+                        append(self._extract_video(item["data"]["media_info"]))
+                elif type == "pic":
+                    append(item["data"]["largest"].copy())
+                else:
+                    self.log.warning("Unknown media type '%s'", type)
+            return
+
         pic_ids = status.get("pic_ids")
         if pic_ids:
             pics = status["pic_infos"]
@@ -100,18 +112,20 @@ class WeiboExtractor(Extractor):
                 else:
                     append(pic["largest"].copy())
 
-        if "page_info" in status and self.videos:
-            try:
-                media = max(status["page_info"]["media_info"]["playback_list"],
-                            key=lambda m: m["meta"]["quality_index"])
-            except KeyError:
-                pass
-            except ValueError:
-                info = status["page_info"]["media_info"]
-                append({"url": (info.get("stream_url_hd") or
-                                info["stream_url"])})
-            else:
-                append(media["play_info"].copy())
+        if "page_info" in status:
+            info = status["page_info"]
+            if "media_info" in info and self.videos:
+                append(self._extract_video(info["media_info"]))
+
+    def _extract_video(self, info):
+        try:
+            media = max(info["playback_list"],
+                        key=lambda m: m["meta"]["quality_index"])
+        except Exception:
+            return {"url": (info.get("stream_url_hd") or
+                            info["stream_url"])}
+        else:
+            return media["play_info"].copy()
 
     def _status_by_id(self, status_id):
         url = "{}/ajax/statuses/show?id={}".format(self.root, status_id)
@@ -380,7 +394,7 @@ class WeiboStatusExtractor(WeiboExtractor):
         }),
         # missing 'playback_list' (#2792)
         ("https://weibo.com/2909128931/4409545658754086", {
-            "count": 9,
+            "count": 10,
         }),
         # empty 'playback_list' (#3301)
         ("https://weibo.com/1501933722/4142890299009993", {
@@ -389,6 +403,10 @@ class WeiboStatusExtractor(WeiboExtractor):
                        r"=0&ps=1CwnkDw1GXwCQx.+&KID=unistore,video",
             "count": 1,
         }),
+        # mix_media_info (#3793)
+        ("https://weibo.com/2427303621/MxojLlLgQ", {
+            "count": 9,
+        }),
         ("https://m.weibo.cn/status/4339748116375525"),
         ("https://m.weibo.cn/5746766133/4339748116375525"),
     )
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 2c5bd11..fc36fa2 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -34,6 +34,8 @@ def parse(format_string, default=NONE, fmt=format):
 
         if kind == "T":
             cls = TemplateFormatter
+        elif kind == "TF":
+            cls = TemplateFStringFormatter
         elif kind == "E":
             cls = ExpressionFormatter
         elif kind == "M":
@@ -197,15 +199,6 @@ class StringFormatter():
             return lambda obj: fmt(conversion(obj))
 
 
-class TemplateFormatter(StringFormatter):
-    """Read format_string from file"""
-
-    def __init__(self, path, default=NONE, fmt=format):
-        with open(util.expand_path(path)) as fp:
-            format_string = fp.read()
-        StringFormatter.__init__(self, format_string, default, fmt)
-
-
 class ExpressionFormatter():
     """Generate text by evaluating a Python expression"""
 
@@ -218,7 +211,7 @@ class ModuleFormatter():
 
     def __init__(self, function_spec, default=NONE, fmt=None):
         module_name, _, function_name = function_spec.partition(":")
-        module = __import__(module_name)
+        module = util.import_file(module_name)
         self.format_map = getattr(module, function_name)
 
 
@@ -229,6 +222,24 @@ class FStringFormatter():
         self.format_map = util.compile_expression('f"""' + fstring + '"""')
 
 
+class TemplateFormatter(StringFormatter):
+    """Read format_string from file"""
+
+    def __init__(self, path, default=NONE, fmt=format):
+        with open(util.expand_path(path)) as fp:
+            format_string = fp.read()
+        StringFormatter.__init__(self, format_string, default, fmt)
+
+
+class TemplateFStringFormatter(FStringFormatter):
+    """Read f-string from file"""
+
+    def __init__(self, path, default=NONE, fmt=format):
+        with open(util.expand_path(path)) as fp:
+            format_string = fp.read()
+        FStringFormatter.__init__(self, format_string, default, fmt)
+
+
 def parse_field_name(field_name):
     first, rest = _string.formatter_field_name_split(field_name)
     funcs = []
@@ -245,6 +256,8 @@ def parse_field_name(field_name):
             try:
                 if ":" in key:
                     key = _slice(key)
+                else:
+                    key = key.strip("\"'")
             except TypeError:
                 pass  # key is an integer
 
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index a64c040..ca5785d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -633,13 +633,13 @@ class KeywordJob(Job):
     def print_kwdict(self, kwdict, prefix="", markers=None):
         """Print key-value pairs in 'kwdict' with formatting"""
         write = sys.stdout.write
-        suffix = "]" if prefix else ""
+        suffix = "']" if prefix else ""
 
         markerid = id(kwdict)
         if markers is None:
             markers = {markerid}
         elif markerid in markers:
-            write("{}\n  <circular reference>\n".format(prefix[:-1]))
+            write("{}\n  <circular reference>\n".format(prefix[:-2]))
             return  # ignore circular reference
         else:
             markers.add(markerid)
@@ -650,13 +650,13 @@ class KeywordJob(Job):
             key = prefix + key + suffix
 
             if isinstance(value, dict):
-                self.print_kwdict(value, key + "[", markers)
+                self.print_kwdict(value, key + "['", markers)
 
             elif isinstance(value, list):
                 if not value:
                     pass
                 elif isinstance(value[0], dict):
-                    self.print_kwdict(value[0], key + "[N][", markers)
+                    self.print_kwdict(value[0], key + "[N]['", markers)
                 else:
                     fmt = ("  {:>%s} {}\n" % len(str(len(value)))).format
                     write(key + "[N]\n")
@@ -667,6 +667,8 @@ class KeywordJob(Job):
                 # string or number
                 write("{}\n  {}\n".format(key, value))
 
+        markers.remove(markerid)
+
 
 class UrlJob(Job):
     """Print download urls"""
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 1d53851..4f2ee26 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -270,16 +270,15 @@ else:
 
 def configure_standard_streams():
     for name in ("stdout", "stderr", "stdin"):
-        options = config.get(("output",), name)
-        if not options:
-            continue
-
         stream = getattr(sys, name, None)
         if not stream:
             continue
 
-        if isinstance(options, str):
-            options = {"encoding": options, "errors": "replace"}
+        options = config.get(("output",), name)
+        if not options:
+            options = {"errors": "replace"}
+        elif isinstance(options, str):
+            options = {"errors": "replace", "encoding": options}
         elif not options.get("errors"):
             options["errors"] = "replace"
 
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 9667a41..714f4fe 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -87,6 +87,7 @@ class MetadataPP(PostProcessor):
         self.omode = options.get("open", omode)
         self.encoding = options.get("encoding", "utf-8")
         self.private = options.get("private", False)
+        self.skip = options.get("skip", False)
 
     def run(self, pathfmt):
         archive = self.archive
@@ -96,6 +97,9 @@ class MetadataPP(PostProcessor):
         directory = self._directory(pathfmt)
         path = directory + self._filename(pathfmt)
 
+        if self.skip and os.path.exists(path):
+            return
+
         try:
             with open(path, self.omode, encoding=self.encoding) as fp:
                 self.write(fp, pathfmt.kwdict)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 494b7f5..93a9148 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.25.0"
+__version__ = "1.25.1"
author	Unit 193 <unit193@unit193.net>	2023-03-31 07:24:57 -0400
committer	Unit 193 <unit193@unit193.net>	2023-03-31 07:24:57 -0400
commit	09e426350409d45e7f7a8ff369f8d8aa9eec0fe4 (patch)
tree	8a8cd3e590675fe6ecb1e5c2b4ad9eecde3dde6d /gallery_dl
parent	10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 (diff)