diff options
Diffstat (limited to 'gallery_dl/extractor/bcy.py')
| -rw-r--r-- | gallery_dl/extractor/bcy.py | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py new file mode 100644 index 0000000..c3049a4 --- /dev/null +++ b/gallery_dl/extractor/bcy.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bcy.net/""" + +from .common import Extractor, Message +from .. import text +import json +import re + + +class BcyExtractor(Extractor): + """Base class for bcy extractors""" + category = "bcy" + directory_fmt = ("{category}", "{user[id]} {user[name]}") + filename_fmt = "{post[id]} {id}.{extension}" + archive_fmt = "{post[id]}_{id}" + root = "https://bcy.net" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item_id = match.group(1) + + def items(self): + sub = re.compile(r"^https?://p\d+-bcy\.byteimg\.com/img/banciyuan").sub + iroot = "https://img-bcy-qn.pstatp.com" + noop = self.config("noop") + + for post in self.posts(): + if not post["image_list"]: + continue + + multi = None + tags = post.get("post_tags") or () + data = { + "user": { + "id" : post["uid"], + "name" : post["uname"], + "avatar" : sub(iroot, post["avatar"].partition("~")[0]), + }, + "post": { + "id" : text.parse_int(post["item_id"]), + "tags" : [t["tag_name"] for t in tags], + "date" : text.parse_timestamp(post["ctime"]), + "parody" : post["work"], + "content": post["plain"], + "likes" : post["like_count"], + "shares" : post["share_count"], + "replies": post["reply_count"], + }, + } + + yield Message.Directory, data + for data["num"], image in enumerate(post["image_list"], 1): + data["id"] = image["mid"] + data["width"] = image["w"] + data["height"] = image["h"] + + url = image["path"].partition("~")[0] + text.nameext_from_url(url, data) + + if data["extension"]: + if not url.startswith(iroot): + url = sub(iroot, url) + data["filter"] = "" + yield Message.Url, url, data + + else: + if not multi: + if len(post["multi"]) < len(post["image_list"]): + multi = self._data_from_post(post["item_id"]) + multi = multi["post_data"]["multi"] + else: + multi = post["multi"] + image = multi[data["num"] - 1] + + if image["origin"]: + data["filter"] = "watermark" + yield Message.Url, image["origin"], data + + if noop: + data["extension"] = "" + data["filter"] = "noop" + yield Message.Url, image["original_path"], data + + def posts(self): + """Returns an iterable with all relevant 'post' objects""" + + def _data_from_post(self, post_id): + url = "{}/item/detail/{}".format(self.root, post_id) + page = self.request(url).text + return json.loads( + text.extract(page, 'JSON.parse("', '");')[0] + .replace('\\\\u002F', '/') + .replace('\\"', '"') + )["detail"] + + +class BcyUserExtractor(BcyExtractor): + """Extractor for user timelines""" + subcategory = "user" + pattern = r"(?:https?://)?bcy\.net/u/(\d+)" + test = ( + ("https://bcy.net/u/1933712", { + "pattern": r"https://img-bcy-qn.pstatp.com/\w+/\d+/post/\w+/.+jpg", + "count": ">= 25", + }), + ("https://bcy.net/u/109282764041", { + "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+" + r"~tplv-banciyuan-logo-v3:.+\.image", + "range": "1-25", + "count": 25, + }), + ) + + def posts(self): + url = self.root + "/apiv3/user/selfPosts" + params = {"uid": self.item_id, "since": None} + + while True: + data = self.request(url, params=params).json() + + item = None + for item in data["data"]["items"]: + yield item["item_detail"] + + if not item: + return + params["since"] = item["since"] + + +class BcyPostExtractor(BcyExtractor): + """Extractor for individual posts""" + subcategory = "post" + pattern = r"(?:https?://)?bcy\.net/item/detail/(\d+)" + test = ( + ("https://bcy.net/item/detail/6355835481002893070", { + "url": "301202375e61fd6e0e2e35de6c3ac9f74885dec3", + "count": 1, + "keyword": { + "user": { + "id" : 1933712, + "name" : "wukloo", + "avatar" : "re:https://img-bcy-qn.pstatp.com/Public/", + }, + "post": { + "id" : 6355835481002893070, + "tags" : list, + "date" : "dt:2016-11-22 08:47:46", + "parody" : "东方PROJECT", + "content": "re:根据微博的建议稍微做了点修改", + "likes" : int, + "shares" : int, + "replies": int, + }, + "id": 8330182, + "num": 1, + "width" : 3000, + "height": 1687, + "filename": "712e0780b09011e696f973c3d1568337", + "extension": "jpg", + }, + }), + # only watermarked images available + ("https://bcy.net/item/detail/6780546160802143236", { + "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+" + r"~tplv-banciyuan-logo-v3:.+\.image", + "count": 8, + "keyword": {"filter": "watermark"} + }), + # only visible to logged in users + ("https://bcy.net/item/detail/6747523535150783495", { + "count": 0, + }), + ) + + def posts(self): + data = self._data_from_post(self.item_id) + post = data["post_data"] + post["image_list"] = post["multi"] + post["plain"] = text.parse_unicode_escapes(post["plain"]) + post.update(data["detail_user"]) + return (post,) |
