summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/toyhouse.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/toyhouse.py')
-rw-r--r--gallery_dl/extractor/toyhouse.py173
1 files changed, 173 insertions, 0 deletions
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
new file mode 100644
index 0000000..c6be38d
--- /dev/null
+++ b/gallery_dl/extractor/toyhouse.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://toyhou.se/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?toyhou\.se"
+
+
+class ToyhouseExtractor(Extractor):
+ """Base class for toyhouse extractors"""
+ category = "toyhouse"
+ root = "https://toyhou.se"
+ directory_fmt = ("{category}", "{user|artists!S}")
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+ self.offset = 0
+
+ def items(self):
+ metadata = self.metadata()
+
+ for post in util.advance(self.posts(), self.offset):
+ if metadata:
+ post.update(metadata)
+ text.nameext_from_url(post["url"], post)
+ post["id"], _, post["hash"] = post["filename"].partition("_")
+ yield Message.Directory, post
+ yield Message.Url, post["url"], post
+
+ def posts(self):
+ return ()
+
+ def metadata(self):
+ return None
+
+ def skip(self, num):
+ self.offset += num
+ return num
+
+ def _parse_post(self, post, needle='<a href="'):
+ extr = text.extract_from(post)
+ return {
+ "url": extr(needle, '"'),
+ "date": text.parse_datetime(extr(
+ 'Credits\n</h2>\n<div class="mb-1">', '<'),
+ "%d %b %Y, %I:%M:%S %p"),
+ "artists": [
+ text.remove_html(artist)
+ for artist in extr(
+ '<div class="artist-credit">', '</div>\n</div>').split(
+ '<div class="artist-credit">')
+ ],
+ "characters": text.split_html(extr(
+ '<div class="image-characters', '</div>\n</div>'))[2:],
+ }
+
+ def _pagination(self, path):
+ url = self.root + path
+ params = {"page": 1}
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for post in text.extract_iter(
+ page, '<li class="gallery-item">', '</li>'):
+ cnt += 1
+ yield self._parse_post(post)
+
+ if cnt == 0 and params["page"] == 1:
+ token, pos = text.extract(
+ page, '<input name="_token" type="hidden" value="', '"')
+ if not token:
+ return
+ data = {
+ "_token": token,
+ "user" : text.extract(page, 'value="', '"', pos)[0],
+ }
+ self.request(self.root + "/~account/warnings/accept",
+ method="POST", data=data, allow_redirects=False)
+ continue
+
+ if cnt < 18:
+ return
+ params["page"] += 1
+
+
+class ToyhouseArtExtractor(ToyhouseExtractor):
+ """Extractor for artworks of a toyhouse user"""
+ subcategory = "art"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/art"
+
+ test = (
+ ("https://www.toyhou.se/d-floe/art", {
+ "range": "1-30",
+ "count": 30,
+ "pattern": r"https://f\d+\.toyhou\.se/file/f\d+-toyhou-se"
+ r"/images/\d+_\w+\.\w+$",
+ "keyword": {
+ "artists": list,
+ "characters": list,
+ "date": "type:datetime",
+ "hash": r"re:\w+",
+ "id": r"re:\d+",
+ "url": str,
+ "user": "d-floe",
+ },
+ }),
+ # protected by Content Warning
+ ("https://www.toyhou.se/kroksoc/art", {
+ "count": ">= 19",
+ }),
+ )
+
+ def posts(self):
+ return self._pagination("/{}/art".format(self.user))
+
+ def metadata(self):
+ return {"user": self.user}
+
+
+class ToyhouseImageExtractor(ToyhouseExtractor):
+ """Extractor for individual toyhouse images"""
+ subcategory = "image"
+ pattern = (r"(?:https?://)?(?:"
+ r"(?:www\.)?toyhou\.se/~images|"
+ r"f\d+\.toyhou\.se/file/[^/?#]+/(?:image|watermark)s"
+ r")/(\d+)")
+ test = (
+ ("https://toyhou.se/~images/40587320", {
+ "content": "058ec8427977ab432c4cc5be5a6dd39ce18713ef",
+ "keyword": {
+ "artists": ["d-floe"],
+ "characters": ["Sumi"],
+ "date": "dt:2021-10-08 01:32:47",
+ "extension": "png",
+ "filename": "40587320_TT1NaBUr3FLkS1p",
+ "hash": "TT1NaBUr3FLkS1p",
+ "id": "40587320",
+ "url": "https://f2.toyhou.se/file/f2-toyhou-se/images"
+ "/40587320_TT1NaBUr3FLkS1p.png",
+ },
+ }),
+ # direct link, multiple artists
+ (("https://f2.toyhou.se/file/f2-toyhou-se"
+ "/watermarks/36817425_bqhGcwcnU.png?1625561467"), {
+ "keyword": {
+ "artists": [
+ "http://aminoapps.com/p/92sf3z",
+ "kroksoc (Color)"],
+ "characters": ["❀Reiichi❀"],
+ "date": "dt:2021-07-03 20:02:02",
+ "hash": "bqhGcwcnU",
+ "id": "36817425",
+ },
+ }),
+ ("https://f2.toyhou.se/file/f2-toyhou-se"
+ "/images/40587320_TT1NaBUr3FLkS1p.png"),
+ )
+
+ def posts(self):
+ url = "{}/~images/{}".format(self.root, self.user)
+ return (self._parse_post(self.request(url).text, '<img src="'),)