diff options
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
| -rw-r--r-- | gallery_dl/extractor/zerochan.py | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py new file mode 100644 index 0000000..2b5acd8 --- /dev/null +++ b/gallery_dl/extractor/zerochan.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.zerochan.net/""" + +from .booru import BooruExtractor +from ..cache import cache +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net" + + +class ZerochanExtractor(BooruExtractor): + """Base class for zerochan extractors""" + category = "zerochan" + root = "https://www.zerochan.net" + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + cookiedomain = ".zerochan.net" + cookienames = ("z_id", "z_hash") + + def login(self): + if not self._check_cookies(self.cookienames): + username, password = self._get_auth_info() + if username: + self._update_cookies(self._login_impl(username, password)) + # force legacy layout + self.session.cookies.set("v3", "0", domain=self.cookiedomain) + + @cache(maxage=90*86400, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/login" + headers = { + "Origin" : self.root, + "Referer" : url, + } + data = { + "ref" : "/", + "name" : username, + "password": password, + "login" : "Login", + } + + response = self.request(url, method="POST", headers=headers, data=data) + if not response.history: + raise exception.AuthenticationError() + + return response.cookies + + def _parse_entry_page(self, entry_id): + url = "{}/{}".format(self.root, entry_id) + extr = text.extract_from(self.request(url).text) + + return { + "id" : entry_id, + "author": extr('"author": "', '"'), + "file_url": extr('"contentUrl": "', '"'), + "date" : text.parse_datetime(extr( + '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"), + "width" : extr('"width": "', ' '), + "height": extr('"height": "', ' '), + "size" : extr('"contentSize": "', 'B'), + "path" : text.split_html(extr( + 'class="breadcrumbs', '</p>'))[3::2], + "tags" : extr('alt="Tags: ', '"').split(", ") + } + + +class ZerochanTagExtractor(ZerochanExtractor): + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?" + test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", { + "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)", + "count": "> 24", + "keywords": { + "extension": r"re:jpg|png", + "file_url": "", + "filename": r"re:Perth.\(Kantai.Collection\).full.\d+", + "height": r"re:^\d+$", + "id": r"re:^\d+$", + "name": "Perth (Kantai Collection)", + "search_tags": "Perth (Kantai Collection)", + "size": r"re:^\d+k$", + "width": r"re:^\d+$", + }, + }) + + def __init__(self, match): + ZerochanExtractor.__init__(self, match) + self.search_tag, self.query = match.groups() + + def metadata(self): + return {"search_tags": text.unquote( + self.search_tag.replace("+", " "))} + + def posts(self): + url = self.root + "/" + self.search_tag + params = text.parse_query(self.query) + params["p"] = text.parse_int(params.get("p"), 1) + + while True: + page = self.request(url, params=params).text + thumbs = text.extract(page, '<ul id="thumbs', '</ul>')[0] + extr = text.extract_from(thumbs) + + while True: + post = extr('<li class="', '>') + if not post: + break + yield { + "id" : extr('href="/', '"'), + "name" : extr('alt="', '"'), + "width" : extr('title="', 'x'), + "height": extr('', ' '), + "size" : extr('', 'B'), + "file_url": "https://static." + extr( + '<a href="https://static.', '"'), + } + + if 'rel="next"' not in page: + break + params["p"] += 1 + + +class ZerochanImageExtractor(ZerochanExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"/(\d+)" + test = ("https://www.zerochan.net/2920445", { + "pattern": r"https://static\.zerochan\.net/" + r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg", + "keyword": { + "author": "YukinoTokisaki", + "date": "dt:2020-04-24 21:33:44", + "file_url": str, + "filename": "Perth.(Kantai.Collection).full.2920445", + "height": "1366", + "id": "2920445", + "size": "1929k", + "width": "1920", + }, + }) + + def __init__(self, match): + ZerochanExtractor.__init__(self, match) + self.image_id = match.group(1) + + def posts(self): + return (self._parse_entry_page(self.image_id),) |
