# -*- coding: utf-8 -*- # Copyright 2020-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.furaffinity.net/""" from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net" class FuraffinityExtractor(Extractor): """Base class for furaffinity extractors""" category = "furaffinity" directory_fmt = ("{category}", "{user!l}") filename_fmt = "{id}{title:? //}.{extension}" archive_fmt = "{id}" cookies_domain = ".furaffinity.net" cookies_names = ("a", "b") root = "https://www.furaffinity.net" request_interval = 1.0 _warning = True def __init__(self, match): Extractor.__init__(self, match) self.user = match[1] self.offset = 0 def _init(self): self.external = self.config("external", False) if self.config("descriptions") == "html": self._process_description = str.strip layout = self.config("layout") if layout and layout != "auto": self._new_layout = False if layout == "old" else True else: self._new_layout = None if self._warning: if not self.cookies_check(self.cookies_names): self.log.warning("no 'a' and 'b' session cookies set") FuraffinityExtractor._warning = False def items(self): metadata = self.metadata() for post_id in util.advance(self.posts(), self.offset): if post := self._parse_post(post_id): if metadata: post.update(metadata) yield Message.Directory, "", post yield Message.Url, post["url"], post if self.external: for url in text.extract_iter( post["_description"], 'href="http', '"'): yield Message.Queue, "http" + url, post def metadata(self): return None def skip(self, num): self.offset += num return num def _parse_post(self, post_id): url = f"{self.root}/view/{post_id}/" extr = text.extract_from(self.request(url).text) if self._new_layout is None: self._new_layout = ("http-equiv=" not in extr("")) path = extr('href="//d', '"') if not path: msg = text.remove_html( extr('System Message', '') or extr('System Message', '') ).partition(" . Continue ")[0] return self.log.warning( "Unable to download post %s (\"%s\")", post_id, msg) pi = text.parse_int rh = text.remove_html data = text.nameext_from_url(path, { "id" : pi(post_id), "url": "https://d" + path, }) if self._new_layout: data["tags"] = text.split_html(extr( "

Keywords

", "")) data["scraps"] = (extr(' submissions">', "<") == "Scraps") data["title"] = text.unescape(extr("

", "

")) data["artist_url"] = extr('title="', '"').strip() data["artist"] = extr(">", "<") data["_description"] = extr( 'class="submission-description user-submitted-links">', ' ') data["views"] = pi(rh(extr('class="views">', ''))) data["favorites"] = pi(rh(extr('class="favorites">', ''))) data["comments"] = pi(rh(extr('class="comments">', ''))) data["rating"] = rh(extr('class="rating">', '')) data["fa_category"] = rh(extr('>Category', '')) data["theme"] = rh(extr('>', '<')) data["species"] = rh(extr('>Species', '')) data["gender"] = rh(extr('>Gender', '')) data["width"] = pi(extr("", "x")) data["height"] = pi(extr("", "p")) data["folders"] = folders = [] for folder in extr( "

Listed in Folders

", "").split(""): if folder := rh(folder): folders.append(folder) else: # old site layout data["scraps"] = ( "/scraps/" in extr('class="minigallery-title', "")) data["title"] = text.unescape(extr("

", "

")) data["artist_url"] = extr('title="', '"').strip() data["artist"] = extr(">", "<") data["fa_category"] = extr("Category:", "<").strip() data["theme"] = extr("Theme:", "<").strip() data["species"] = extr("Species:", "<").strip() data["gender"] = extr("Gender:", "<").strip() data["favorites"] = pi(extr("Favorites:", "<")) data["comments"] = pi(extr("Comments:", "<")) data["views"] = pi(extr("Views:", "<")) data["width"] = pi(extr("Resolution:", "x")) data["height"] = pi(extr("", "<")) data["tags"] = text.split_html(extr( 'id="keywords">', ''))[::2] data["rating"] = extr('', ' ')
            data[', ' ') data["folders"] = () # folders not present in old layout data["user"] = self.user or data["artist_url"] data["date"] = self.parse_timestamp(data["filename"].partition(".")[0]) data["description"] = self._process_description(data["_description"]) data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-" f"{path.rsplit('/', 2)[1]}.jpg") return data def _process_description(self, description): return text.unescape(text.remove_html(description, "", "")) def _pagination(self, path, folder=None): num = 1 folder = "" if folder is None else f"/folder/{folder}/a" while True: url = f"{self.root}/{path}/{self.user}{folder}/{num}/" page = self.request(url).text post_id = None for post_id in text.extract_iter(page, 'id="sid-', '"'): yield post_id if not post_id: return num += 1 def _pagination_favorites(self): path = f"/favorites/{self.user}/" while path: page = self.request(self.root + path).text extr = text.extract_from(page) while True: post_id = extr('id="sid-', '"') if not post_id: break self._favorite_id = text.parse_int(extr('data-fav-id="', '"')) yield post_id pos = page.find('type="submit">Next') if pos >= 0: path = text.rextr(page, '
Next 48")) < 0 and \ (pos := page.find(">>>> Next 48 >>")) < 0: return path = text.rextr(page, 'href="', '"', pos) url = self.root + text.unescape(path)