summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/zerochan.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
-rw-r--r--gallery_dl/extractor/zerochan.py156
1 files changed, 156 insertions, 0 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
new file mode 100644
index 0000000..2b5acd8
--- /dev/null
+++ b/gallery_dl/extractor/zerochan.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.zerochan.net/"""
+
+from .booru import BooruExtractor
+from ..cache import cache
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
+
+
+class ZerochanExtractor(BooruExtractor):
+ """Base class for zerochan extractors"""
+ category = "zerochan"
+ root = "https://www.zerochan.net"
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ cookiedomain = ".zerochan.net"
+ cookienames = ("z_id", "z_hash")
+
+ def login(self):
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+ # force legacy layout
+ self.session.cookies.set("v3", "0", domain=self.cookiedomain)
+
+ @cache(maxage=90*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/login"
+ headers = {
+ "Origin" : self.root,
+ "Referer" : url,
+ }
+ data = {
+ "ref" : "/",
+ "name" : username,
+ "password": password,
+ "login" : "Login",
+ }
+
+ response = self.request(url, method="POST", headers=headers, data=data)
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return response.cookies
+
+ def _parse_entry_page(self, entry_id):
+ url = "{}/{}".format(self.root, entry_id)
+ extr = text.extract_from(self.request(url).text)
+
+ return {
+ "id" : entry_id,
+ "author": extr('"author": "', '"'),
+ "file_url": extr('"contentUrl": "', '"'),
+ "date" : text.parse_datetime(extr(
+ '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"),
+ "width" : extr('"width": "', ' '),
+ "height": extr('"height": "', ' '),
+ "size" : extr('"contentSize": "', 'B'),
+ "path" : text.split_html(extr(
+ 'class="breadcrumbs', '</p>'))[3::2],
+ "tags" : extr('alt="Tags: ', '"').split(", ")
+ }
+
+
+class ZerochanTagExtractor(ZerochanExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
+ test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
+ "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
+ "count": "> 24",
+ "keywords": {
+ "extension": r"re:jpg|png",
+ "file_url": "",
+ "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+ "height": r"re:^\d+$",
+ "id": r"re:^\d+$",
+ "name": "Perth (Kantai Collection)",
+ "search_tags": "Perth (Kantai Collection)",
+ "size": r"re:^\d+k$",
+ "width": r"re:^\d+$",
+ },
+ })
+
+ def __init__(self, match):
+ ZerochanExtractor.__init__(self, match)
+ self.search_tag, self.query = match.groups()
+
+ def metadata(self):
+ return {"search_tags": text.unquote(
+ self.search_tag.replace("+", " "))}
+
+ def posts(self):
+ url = self.root + "/" + self.search_tag
+ params = text.parse_query(self.query)
+ params["p"] = text.parse_int(params.get("p"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+ thumbs = text.extract(page, '<ul id="thumbs', '</ul>')[0]
+ extr = text.extract_from(thumbs)
+
+ while True:
+ post = extr('<li class="', '>')
+ if not post:
+ break
+ yield {
+ "id" : extr('href="/', '"'),
+ "name" : extr('alt="', '"'),
+ "width" : extr('title="', 'x'),
+ "height": extr('', ' '),
+ "size" : extr('', 'B'),
+ "file_url": "https://static." + extr(
+ '<a href="https://static.', '"'),
+ }
+
+ if 'rel="next"' not in page:
+ break
+ params["p"] += 1
+
+
+class ZerochanImageExtractor(ZerochanExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/(\d+)"
+ test = ("https://www.zerochan.net/2920445", {
+ "pattern": r"https://static\.zerochan\.net/"
+ r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
+ "keyword": {
+ "author": "YukinoTokisaki",
+ "date": "dt:2020-04-24 21:33:44",
+ "file_url": str,
+ "filename": "Perth.(Kantai.Collection).full.2920445",
+ "height": "1366",
+ "id": "2920445",
+ "size": "1929k",
+ "width": "1920",
+ },
+ })
+
+ def __init__(self, match):
+ ZerochanExtractor.__init__(self, match)
+ self.image_id = match.group(1)
+
+ def posts(self):
+ return (self._parse_entry_page(self.image_id),)