diff options
Diffstat (limited to 'gallery_dl/extractor/weibo.py')
| -rw-r--r-- | gallery_dl/extractor/weibo.py | 69 |
1 files changed, 49 insertions, 20 deletions
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 07bed79..3c0f077 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -14,7 +14,7 @@ from ..cache import cache import random BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)" -USER_PATTERN = BASE_PATTERN + r"/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?" +USER_PATTERN = rf"{BASE_PATTERN}/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?" class WeiboExtractor(Extractor): @@ -22,6 +22,8 @@ class WeiboExtractor(Extractor): directory_fmt = ("{category}", "{user[screen_name]}") filename_fmt = "{status[id]}_{num:>02}.{extension}" archive_fmt = "{status[id]}_{num}" + cookies_domain = ".weibo.com" + cookies_names = ("SUB", "SUBP") root = "https://weibo.com" request_interval = (1.0, 2.0) @@ -38,8 +40,23 @@ class WeiboExtractor(Extractor): self.gifs_video = (self.gifs == "video") cookies = _cookie_cache() - if cookies is not None: - self.cookies.update(cookies) + if cookies is None: + self.logged_in = self.cookies_check( + self.cookies_names, self.cookies_domain) + return + + domain = self.cookies_domain + cookies = {c.name: c for c in cookies if c.domain == domain} + for cookie in self.cookies: + if cookie.domain == domain and cookie.name in cookies: + del cookies[cookie.name] + if not cookies: + self.logged_in = True + return + + self.logged_in = False + for cookie in cookies.values(): + self.cookies.set_cookie(cookie) def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) @@ -148,6 +165,10 @@ class WeiboExtractor(Extractor): self.log.debug("%s: Ignoring 'movie' video", status["id"]) def _extract_video(self, info): + if info.get("live_status") == 1: + self.log.debug("Skipping ongoing live stream") + return {"url": ""} + try: media = max(info["playback_list"], key=lambda m: m["meta"]["quality_index"]) @@ -182,7 +203,7 @@ class WeiboExtractor(Extractor): return self.request_json(url)["data"]["user"]["idstr"] def _pagination(self, endpoint, params): - url = self.root + "/ajax" + endpoint + url = f"{self.root}/ajax{endpoint}" headers = { "X-Requested-With": "XMLHttpRequest", "X-XSRF-TOKEN": None, @@ -201,8 +222,12 @@ class WeiboExtractor(Extractor): raise exception.AbortExtraction( f'"{data.get("msg") or "unknown error"}"') - data = data["data"] - statuses = data["list"] + try: + data = data["data"] + statuses = data["list"] + except KeyError: + return + yield from statuses # videos, newvideo @@ -215,6 +240,8 @@ class WeiboExtractor(Extractor): # album if since_id := data.get("since_id"): params["sinceid"] = since_id + if "page" in params: + params["page"] += 1 continue # home, article @@ -263,7 +290,7 @@ class WeiboExtractor(Extractor): class WeiboUserExtractor(WeiboExtractor): """Extractor for weibo user profiles""" subcategory = "user" - pattern = USER_PATTERN + r"(?:$|#)" + pattern = rf"{USER_PATTERN}(?:$|#)" example = "https://weibo.com/USER" # do NOT override 'initialize()' @@ -274,18 +301,18 @@ class WeiboUserExtractor(WeiboExtractor): def items(self): base = f"{self.root}/u/{self._user_id()}?tabtype=" return Dispatch._dispatch_extractors(self, ( - (WeiboHomeExtractor , base + "home"), - (WeiboFeedExtractor , base + "feed"), - (WeiboVideosExtractor , base + "video"), - (WeiboNewvideoExtractor, base + "newVideo"), - (WeiboAlbumExtractor , base + "album"), + (WeiboHomeExtractor , f"{base}home"), + (WeiboFeedExtractor , f"{base}feed"), + (WeiboVideosExtractor , f"{base}video"), + (WeiboNewvideoExtractor, f"{base}newVideo"), + (WeiboAlbumExtractor , f"{base}album"), ), ("feed",)) class WeiboHomeExtractor(WeiboExtractor): """Extractor for weibo 'home' listings""" subcategory = "home" - pattern = USER_PATTERN + r"\?tabtype=home" + pattern = rf"{USER_PATTERN}\?tabtype=home" example = "https://weibo.com/USER?tabtype=home" def statuses(self): @@ -297,19 +324,21 @@ class WeiboHomeExtractor(WeiboExtractor): class WeiboFeedExtractor(WeiboExtractor): """Extractor for weibo user feeds""" subcategory = "feed" - pattern = USER_PATTERN + r"\?tabtype=feed" + pattern = rf"{USER_PATTERN}\?tabtype=feed" example = "https://weibo.com/USER?tabtype=feed" def statuses(self): endpoint = "/statuses/mymblog" params = {"uid": self._user_id(), "feature": "0"} + if self.logged_in: + params["page"] = 1 return self._pagination(endpoint, params) class WeiboVideosExtractor(WeiboExtractor): """Extractor for weibo 'video' listings""" subcategory = "videos" - pattern = USER_PATTERN + r"\?tabtype=video" + pattern = rf"{USER_PATTERN}\?tabtype=video" example = "https://weibo.com/USER?tabtype=video" def statuses(self): @@ -323,7 +352,7 @@ class WeiboVideosExtractor(WeiboExtractor): class WeiboNewvideoExtractor(WeiboExtractor): """Extractor for weibo 'newVideo' listings""" subcategory = "newvideo" - pattern = USER_PATTERN + r"\?tabtype=newVideo" + pattern = rf"{USER_PATTERN}\?tabtype=newVideo" example = "https://weibo.com/USER?tabtype=newVideo" def statuses(self): @@ -335,7 +364,7 @@ class WeiboNewvideoExtractor(WeiboExtractor): class WeiboArticleExtractor(WeiboExtractor): """Extractor for weibo 'article' listings""" subcategory = "article" - pattern = USER_PATTERN + r"\?tabtype=article" + pattern = rf"{USER_PATTERN}\?tabtype=article" example = "https://weibo.com/USER?tabtype=article" def statuses(self): @@ -347,7 +376,7 @@ class WeiboArticleExtractor(WeiboExtractor): class WeiboAlbumExtractor(WeiboExtractor): """Extractor for weibo 'album' listings""" subcategory = "album" - pattern = USER_PATTERN + r"\?tabtype=album" + pattern = rf"{USER_PATTERN}\?tabtype=album" example = "https://weibo.com/USER?tabtype=album" def statuses(self): @@ -367,9 +396,9 @@ class WeiboAlbumExtractor(WeiboExtractor): class WeiboStatusExtractor(WeiboExtractor): - """Extractor for images from a status on weibo.cn""" + """Extractor for a weibo status""" subcategory = "status" - pattern = BASE_PATTERN + r"/(detail|status|\d+)/(\w+)" + pattern = rf"{BASE_PATTERN}/(detail|status|\d+)/(\w+)" example = "https://weibo.com/detail/12345" def statuses(self): |
