# -*- coding: utf-8 -*- # Copyright 2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://sizebooru.com/""" from .booru import BooruExtractor from .. import text BASE_PATTERN = r"(?:https?://)?(?:www\.)?sizebooru\.com" class SizebooruExtractor(BooruExtractor): """Base class for sizebooru extractors""" category = "sizebooru" root = "https://sizebooru.com" filename_fmt = "{id}.{extension}" archive_fmt = "{id}" page_start = 1 request_interval = (0.5, 1.5) def _init(self): if self.config("metadata", False): self._prepare = self._prepare_metadata def _file_url(self, post): post["file_url"] = url = f"{self.root}/Picture/{post['id']}" return url def _prepare(self, post): post_id = post["id"] post["id"] = text.parse_int(post_id) post["filename"] = post_id if not post["extension"]: post["extension"] = "jpg" def _prepare_metadata(self, post): post_id = post["id"] url = f"{self.root}/Details/{post_id}" extr = text.extract_from(self.request(url).text) post.update({ "id" : text.parse_int(post_id), "date" : self.parse_datetime( extr("Posted Date: ", "<"), "%m/%d/%Y"), "date_approved": self.parse_datetime( extr("Approved Date: ", "<"), "%m/%d/%Y"), "approver" : text.remove_html(extr("Approved By:", "Posted By:", "Artist: ", "Views:", "<")), "source" : text.extr(extr( "Source Link:", "Related Tags", "")), "favorite" : text.split_html(extr( "
Favorited By
", "")), }) post["filename"], _, ext = extr('" alt="', '"').rpartition(".") if not post["extension"]: post["extension"] = ext.lower() return post def _pagination(self, url, callback=None): params = { "pageNo" : self.page_start, "pageSize": self.per_page, } page = self.request(url, params=params).text if callback is not None: callback(page) while True: thumb = None for thumb in text.extract_iter( page, '") or \ thumb is None: return params["pageNo"] += 1 page = self.request(url, params=params).text class SizebooruPostExtractor(SizebooruExtractor): """Extractor for sizebooru posts""" subcategory = "post" pattern = BASE_PATTERN + r"/Details/(\d+)" example = "https://sizebooru.com/Details/12345" def posts(self): return ({"id": self.groups[0], "extension": None},) class SizebooruTagExtractor(SizebooruExtractor): """Extractor for sizebooru tag searches""" subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") pattern = BASE_PATTERN + r"/Search/([^/?#]+)" example = "https://sizebooru.com/Search/TAG" def posts(self): tag = self.groups[0] self.kwdict["search_tags"] = text.unquote(tag) return self._pagination(f"{self.root}/Search/{tag}") class SizebooruGalleryExtractor(SizebooruExtractor): """Extractor for sizebooru galleries""" subcategory = "gallery" directory_fmt = ("{category}", "{gallery_name} ({gallery_id})") pattern = BASE_PATTERN + r"/Galleries/List/(\d+)" example = "https://sizebooru.com/Galleries/List/123" def posts(self): gid = self.groups[0] self.kwdict["gallery_id"] = text.parse_int(gid) return self._pagination( f"{self.root}/Galleries/List/{gid}", self._extract_name) def _extract_name(self, page): self.kwdict["gallery_name"] = text.unescape(text.extr( page, "Gallery: ", " - Size Booru<")) class SizebooruUserExtractor(SizebooruExtractor): """Extractor for a sizebooru user's uploads""" subcategory = "user" directory_fmt = ("{category}", "Uploads {user}") pattern = BASE_PATTERN + r"/Profile/Uploads/([^/?#]+)" example = "https://sizebooru.com/Profile/Uploads/USER" def posts(self): user = self.groups[0] self.kwdict["user"] = text.unquote(user) return self._pagination(f"{self.root}/Profile/Uploads/{user}",) class SizebooruFavoriteExtractor(SizebooruExtractor): """Extractor for a sizebooru user's favorites""" subcategory = "favorite" directory_fmt = ("{category}", "Favorites {user}") pattern = BASE_PATTERN + r"/Profile/Favorites/([^/?#]+)" example = "https://sizebooru.com/Profile/Favorites/USER" def posts(self): user = self.groups[0] self.kwdict["user"] = text.unquote(user) return self._pagination(f"{self.root}/Profile/Favorites/{user}",)