diff options
Diffstat (limited to 'gallery_dl/extractor/weibo.py')
| -rw-r--r-- | gallery_dl/extractor/weibo.py | 96 |
1 files changed, 49 insertions, 47 deletions
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index aa9bdae..d1ad388 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -10,6 +10,7 @@ from .common import Extractor, Message from .. import text, exception +import itertools import json @@ -30,53 +31,53 @@ class WeiboExtractor(Extractor): for status in self.statuses(): - yield Message.Directory, status - obj = status - num = 1 - - while True: - - if "pics" in obj: - for image in obj["pics"]: - pid = image["pid"] - if "large" in image: - image = image["large"] - geo = image.get("geo") or {} - data = text.nameext_from_url(image["url"], { - "num" : num, - "pid" : pid, - "url" : image["url"], - "width" : text.parse_int(geo.get("width")), - "height": text.parse_int(geo.get("height")), - "status": status, - }) - yield Message.Url, image["url"], data - num += 1 - - if self.videos and "media_info" in obj.get("page_info", ()): - info = obj["page_info"]["media_info"] - url = info.get("stream_url_hd") or info.get("stream_url") - - if url: - data = text.nameext_from_url(url, { - "num" : num, - "pid" : 0, - "url" : url, - "width" : 0, - "height": 0, - "status": status, - }) - if data["extension"] == "m3u8": - url = "ytdl:" + url - data["extension"] = "mp4" - data["_ytdl_extra"] = {"protocol": "m3u8_native"} - yield Message.Url, url, data - num += 1 - - if self.retweets and "retweeted_status" in obj: - obj = obj["retweeted_status"] - else: - break + files = self._files_from_status(status) + if self.retweets and "retweeted_status" in status: + files = itertools.chain( + files, + self._files_from_status(status["retweeted_status"]), + ) + + for num, file in enumerate(files, 1): + if num == 1: + status["date"] = text.parse_datetime( + status["created_at"], "%a %b %d %H:%M:%S %z %Y") + yield Message.Directory, status + file["status"] = status + file["num"] = num + yield Message.Url, file["url"], file + + def _files_from_status(self, status): + images = status.pop("pics", ()) + page_info = status.pop("page_info", ()) + + for image in images: + pid = image["pid"] + if "large" in image: + image = image["large"] + geo = image.get("geo") or {} + yield text.nameext_from_url(image["url"], { + "url" : image["url"], + "pid" : pid, + "width" : text.parse_int(geo.get("width")), + "height": text.parse_int(geo.get("height")), + }) + + if self.videos and "media_info" in page_info: + info = page_info["media_info"] + url = info.get("stream_url_hd") or info.get("stream_url") + if url: + data = text.nameext_from_url(url, { + "url" : url, + "pid" : 0, + "width" : 0, + "height": 0, + }) + if data["extension"] == "m3u8": + data["extension"] = "mp4" + data["url"] = "ytdl:" + url + data["_ytdl_extra"] = {"protocol": "m3u8_native"} + yield data def statuses(self): """Returns an iterable containing all relevant 'status' objects""" @@ -124,6 +125,7 @@ class WeiboStatusExtractor(WeiboExtractor): test = ( ("https://m.weibo.cn/detail/4323047042991618", { "pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg", + "keyword": {"status": {"date": "dt:2018-12-30 13:56:36"}}, }), ("https://m.weibo.cn/detail/4339748116375525", { "pattern": r"https?://f.us.sinaimg.cn/\w+\.mp4\?label=mp4_hd", |
