diff options
Diffstat (limited to 'gallery_dl/extractor/kabeuchi.py')
| -rw-r--r-- | gallery_dl/extractor/kabeuchi.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/gallery_dl/extractor/kabeuchi.py b/gallery_dl/extractor/kabeuchi.py new file mode 100644 index 0000000..a8702f1 --- /dev/null +++ b/gallery_dl/extractor/kabeuchi.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://kabe-uchiroom.com/""" + +from .common import Extractor, Message +from .. import text, exception + + +class KabeuchiUserExtractor(Extractor): + """Extractor for all posts of a user on kabe-uchiroom.com""" + category = "kabeuchi" + subcategory = "user" + directory_fmt = ("{category}", "{twitter_user_id} {twitter_id}") + filename_fmt = "{id}_{num:>02}{title:?_//}.{extension}" + archive_fmt = "{id}_{num}" + root = "https://kabe-uchiroom.com" + pattern = r"(?:https?://)?kabe-uchiroom\.com/mypage/?\?id=(\d+)" + test = ( + ("https://kabe-uchiroom.com/mypage/?id=919865303848255493", { + "pattern": (r"https://kabe-uchiroom\.com/accounts/upfile/3/" + r"919865303848255493/\w+\.jpe?g"), + "count": ">= 24", + }), + ("https://kabe-uchiroom.com/mypage/?id=123456789", { + "exception": exception.NotFoundError, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_id = match.group(1) + + def items(self): + base = "{}/accounts/upfile/{}/{}/".format( + self.root, self.user_id[-1], self.user_id) + keys = ("image1", "image2", "image3", "image4", "image5", "image6") + + for post in self.posts(): + if post.get("is_ad") or not post["image1"]: + continue + + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%d %H:%M:%S") + yield Message.Directory, post + + for key in keys: + name = post[key] + if not name: + break + url = base + name + post["num"] = ord(key[-1]) - 48 + yield Message.Url, url, text.nameext_from_url(name, post) + + def posts(self): + url = "{}/mypage/?id={}".format(self.root, self.user_id) + response = self.request(url) + if response.history and response.url == self.root + "/": + raise exception.NotFoundError("user") + target_id = text.extract(response.text, 'user_friend_id = "', '"')[0] + return self._pagination(target_id) + + def _pagination(self, target_id): + url = "{}/get_posts.php".format(self.root) + data = { + "user_id" : "0", + "target_id" : target_id, + "type" : "uploads", + "sort_type" : "0", + "category_id": "all", + "latest_post": "", + "page_num" : 0, + } + + while True: + info = self.request(url, method="POST", data=data).json() + datas = info["datas"] + + if not datas or not isinstance(datas, list): + return + yield from datas + + last_id = datas[-1]["id"] + if last_id == info["last_data"]: + return + data["latest_post"] = last_id + data["page_num"] += 1 |
