# -*- coding: utf-8 -*- # Copyright 2014-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://sankaku.app/""" from .booru import BooruExtractor from .common import Message from .. import text, util, exception from ..cache import cache import collections import re BASE_PATTERN = r"(?:https?://)?" \ r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \ r"(?:/[a-z]{2})?" class SankakuExtractor(BooruExtractor): """Base class for sankaku channel extractors""" basecategory = "booru" category = "sankaku" root = "https://sankaku.app" filename_fmt = "{category}_{id}_{md5}.{extension}" cookies_domain = None _warning = True TAG_TYPES = { 0: "general", 1: "artist", 2: "studio", 3: "copyright", 4: "character", 5: "genre", 6: "", 7: "", 8: "medium", 9: "meta", } def skip(self, num): return 0 def _init(self): self.api = SankakuAPI(self) if self.config("tags") == "extended": self._tags = self._tags_extended self._tags_findall = re.compile( r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall def _file_url(self, post): url = post["file_url"] if not url: if post["status"] != "active": self.log.warning( "Unable to download post %s (%s)", post["id"], post["status"]) elif self._warning: self.log.warning( "Login required to download 'contentious_content' posts") SankakuExtractor._warning = False elif url[8] == "v": url = "https://s.sankakucomplex.com" + url[url.index("/", 8):] return url def _prepare(self, post): post["created_at"] = post["created_at"]["s"] post["date"] = text.parse_timestamp(post["created_at"]) post["tags"] = post.pop("tag_names", ()) post["tag_string"] = " ".join(post["tags"]) post["_http_validate"] = self._check_expired def _check_expired(self, response): return not response.history or '.com/expired.png' not in response.url def _tags(self, post, page): tags = collections.defaultdict(list) for tag in self.api.tags(post["id"]): name = tag["name"] if name: tags[tag["type"]].append(name.lower().replace(" ", "_")) types = self.TAG_TYPES for type, values in tags.items(): name = types[type] post["tags_" + name] = values post["tag_string_" + name] = " ".join(values) def _tags_extended(self, post, page): try: url = "https://chan.sankakucomplex.com/posts/" + post["id"] page = self.request(url).text except Exception as exc: return self.log.warning( "%s: Failed to extract extended tag categories (%s: %s)", post["id"], exc.__class__.__name__, exc) tags = collections.defaultdict(list) tag_sidebar = text.extr(page, '