summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/weibo.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/weibo.py')
-rw-r--r--gallery_dl/extractor/weibo.py96
1 files changed, 49 insertions, 47 deletions
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index aa9bdae..d1ad388 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
+import itertools
import json
@@ -30,53 +31,53 @@ class WeiboExtractor(Extractor):
for status in self.statuses():
- yield Message.Directory, status
- obj = status
- num = 1
-
- while True:
-
- if "pics" in obj:
- for image in obj["pics"]:
- pid = image["pid"]
- if "large" in image:
- image = image["large"]
- geo = image.get("geo") or {}
- data = text.nameext_from_url(image["url"], {
- "num" : num,
- "pid" : pid,
- "url" : image["url"],
- "width" : text.parse_int(geo.get("width")),
- "height": text.parse_int(geo.get("height")),
- "status": status,
- })
- yield Message.Url, image["url"], data
- num += 1
-
- if self.videos and "media_info" in obj.get("page_info", ()):
- info = obj["page_info"]["media_info"]
- url = info.get("stream_url_hd") or info.get("stream_url")
-
- if url:
- data = text.nameext_from_url(url, {
- "num" : num,
- "pid" : 0,
- "url" : url,
- "width" : 0,
- "height": 0,
- "status": status,
- })
- if data["extension"] == "m3u8":
- url = "ytdl:" + url
- data["extension"] = "mp4"
- data["_ytdl_extra"] = {"protocol": "m3u8_native"}
- yield Message.Url, url, data
- num += 1
-
- if self.retweets and "retweeted_status" in obj:
- obj = obj["retweeted_status"]
- else:
- break
+ files = self._files_from_status(status)
+ if self.retweets and "retweeted_status" in status:
+ files = itertools.chain(
+ files,
+ self._files_from_status(status["retweeted_status"]),
+ )
+
+ for num, file in enumerate(files, 1):
+ if num == 1:
+ status["date"] = text.parse_datetime(
+ status["created_at"], "%a %b %d %H:%M:%S %z %Y")
+ yield Message.Directory, status
+ file["status"] = status
+ file["num"] = num
+ yield Message.Url, file["url"], file
+
+ def _files_from_status(self, status):
+ images = status.pop("pics", ())
+ page_info = status.pop("page_info", ())
+
+ for image in images:
+ pid = image["pid"]
+ if "large" in image:
+ image = image["large"]
+ geo = image.get("geo") or {}
+ yield text.nameext_from_url(image["url"], {
+ "url" : image["url"],
+ "pid" : pid,
+ "width" : text.parse_int(geo.get("width")),
+ "height": text.parse_int(geo.get("height")),
+ })
+
+ if self.videos and "media_info" in page_info:
+ info = page_info["media_info"]
+ url = info.get("stream_url_hd") or info.get("stream_url")
+ if url:
+ data = text.nameext_from_url(url, {
+ "url" : url,
+ "pid" : 0,
+ "width" : 0,
+ "height": 0,
+ })
+ if data["extension"] == "m3u8":
+ data["extension"] = "mp4"
+ data["url"] = "ytdl:" + url
+ data["_ytdl_extra"] = {"protocol": "m3u8_native"}
+ yield data
def statuses(self):
"""Returns an iterable containing all relevant 'status' objects"""
@@ -124,6 +125,7 @@ class WeiboStatusExtractor(WeiboExtractor):
test = (
("https://m.weibo.cn/detail/4323047042991618", {
"pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg",
+ "keyword": {"status": {"date": "dt:2018-12-30 13:56:36"}},
}),
("https://m.weibo.cn/detail/4339748116375525", {
"pattern": r"https?://f.us.sinaimg.cn/\w+\.mp4\?label=mp4_hd",