summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/bcy.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/bcy.py')
-rw-r--r--gallery_dl/extractor/bcy.py188
1 files changed, 188 insertions, 0 deletions
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py
new file mode 100644
index 0000000..c3049a4
--- /dev/null
+++ b/gallery_dl/extractor/bcy.py
@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bcy.net/"""
+
+from .common import Extractor, Message
+from .. import text
+import json
+import re
+
+
+class BcyExtractor(Extractor):
+ """Base class for bcy extractors"""
+ category = "bcy"
+ directory_fmt = ("{category}", "{user[id]} {user[name]}")
+ filename_fmt = "{post[id]} {id}.{extension}"
+ archive_fmt = "{post[id]}_{id}"
+ root = "https://bcy.net"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item_id = match.group(1)
+
+ def items(self):
+ sub = re.compile(r"^https?://p\d+-bcy\.byteimg\.com/img/banciyuan").sub
+ iroot = "https://img-bcy-qn.pstatp.com"
+ noop = self.config("noop")
+
+ for post in self.posts():
+ if not post["image_list"]:
+ continue
+
+ multi = None
+ tags = post.get("post_tags") or ()
+ data = {
+ "user": {
+ "id" : post["uid"],
+ "name" : post["uname"],
+ "avatar" : sub(iroot, post["avatar"].partition("~")[0]),
+ },
+ "post": {
+ "id" : text.parse_int(post["item_id"]),
+ "tags" : [t["tag_name"] for t in tags],
+ "date" : text.parse_timestamp(post["ctime"]),
+ "parody" : post["work"],
+ "content": post["plain"],
+ "likes" : post["like_count"],
+ "shares" : post["share_count"],
+ "replies": post["reply_count"],
+ },
+ }
+
+ yield Message.Directory, data
+ for data["num"], image in enumerate(post["image_list"], 1):
+ data["id"] = image["mid"]
+ data["width"] = image["w"]
+ data["height"] = image["h"]
+
+ url = image["path"].partition("~")[0]
+ text.nameext_from_url(url, data)
+
+ if data["extension"]:
+ if not url.startswith(iroot):
+ url = sub(iroot, url)
+ data["filter"] = ""
+ yield Message.Url, url, data
+
+ else:
+ if not multi:
+ if len(post["multi"]) < len(post["image_list"]):
+ multi = self._data_from_post(post["item_id"])
+ multi = multi["post_data"]["multi"]
+ else:
+ multi = post["multi"]
+ image = multi[data["num"] - 1]
+
+ if image["origin"]:
+ data["filter"] = "watermark"
+ yield Message.Url, image["origin"], data
+
+ if noop:
+ data["extension"] = ""
+ data["filter"] = "noop"
+ yield Message.Url, image["original_path"], data
+
+ def posts(self):
+ """Returns an iterable with all relevant 'post' objects"""
+
+ def _data_from_post(self, post_id):
+ url = "{}/item/detail/{}".format(self.root, post_id)
+ page = self.request(url).text
+ return json.loads(
+ text.extract(page, 'JSON.parse("', '");')[0]
+ .replace('\\\\u002F', '/')
+ .replace('\\"', '"')
+ )["detail"]
+
+
+class BcyUserExtractor(BcyExtractor):
+ """Extractor for user timelines"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?bcy\.net/u/(\d+)"
+ test = (
+ ("https://bcy.net/u/1933712", {
+ "pattern": r"https://img-bcy-qn.pstatp.com/\w+/\d+/post/\w+/.+jpg",
+ "count": ">= 25",
+ }),
+ ("https://bcy.net/u/109282764041", {
+ "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
+ r"~tplv-banciyuan-logo-v3:.+\.image",
+ "range": "1-25",
+ "count": 25,
+ }),
+ )
+
+ def posts(self):
+ url = self.root + "/apiv3/user/selfPosts"
+ params = {"uid": self.item_id, "since": None}
+
+ while True:
+ data = self.request(url, params=params).json()
+
+ item = None
+ for item in data["data"]["items"]:
+ yield item["item_detail"]
+
+ if not item:
+ return
+ params["since"] = item["since"]
+
+
+class BcyPostExtractor(BcyExtractor):
+ """Extractor for individual posts"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?bcy\.net/item/detail/(\d+)"
+ test = (
+ ("https://bcy.net/item/detail/6355835481002893070", {
+ "url": "301202375e61fd6e0e2e35de6c3ac9f74885dec3",
+ "count": 1,
+ "keyword": {
+ "user": {
+ "id" : 1933712,
+ "name" : "wukloo",
+ "avatar" : "re:https://img-bcy-qn.pstatp.com/Public/",
+ },
+ "post": {
+ "id" : 6355835481002893070,
+ "tags" : list,
+ "date" : "dt:2016-11-22 08:47:46",
+ "parody" : "东方PROJECT",
+ "content": "re:根据微博的建议稍微做了点修改",
+ "likes" : int,
+ "shares" : int,
+ "replies": int,
+ },
+ "id": 8330182,
+ "num": 1,
+ "width" : 3000,
+ "height": 1687,
+ "filename": "712e0780b09011e696f973c3d1568337",
+ "extension": "jpg",
+ },
+ }),
+ # only watermarked images available
+ ("https://bcy.net/item/detail/6780546160802143236", {
+ "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
+ r"~tplv-banciyuan-logo-v3:.+\.image",
+ "count": 8,
+ "keyword": {"filter": "watermark"}
+ }),
+ # only visible to logged in users
+ ("https://bcy.net/item/detail/6747523535150783495", {
+ "count": 0,
+ }),
+ )
+
+ def posts(self):
+ data = self._data_from_post(self.item_id)
+ post = data["post_data"]
+ post["image_list"] = post["multi"]
+ post["plain"] = text.parse_unicode_escapes(post["plain"])
+ post.update(data["detail_user"])
+ return (post,)