# -*- coding: utf-8 -*- # Copyright 2020-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.furaffinity.net/""" from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net" class FuraffinityExtractor(Extractor): """Base class for furaffinity extractors""" category = "furaffinity" directory_fmt = ("{category}", "{user!l}") filename_fmt = "{id}{title:? //}.{extension}" archive_fmt = "{id}" cookies_domain = ".furaffinity.net" cookies_names = ("a", "b") root = "https://www.furaffinity.net" request_interval = 1.0 _warning = True def __init__(self, match): Extractor.__init__(self, match) self.user = match[1] self.offset = 0 def _init(self): self.external = self.config("external", False) if self.config("descriptions") == "html": self._process_description = str.strip layout = self.config("layout") if layout and layout != "auto": self._new_layout = False if layout == "old" else True else: self._new_layout = None if self._warning: if not self.cookies_check(self.cookies_names): self.log.warning("no 'a' and 'b' session cookies set") FuraffinityExtractor._warning = False def items(self): metadata = self.metadata() for post_id in util.advance(self.posts(), self.offset): if post := self._parse_post(post_id): if metadata: post.update(metadata) yield Message.Directory, "", post yield Message.Url, post["url"], post if self.external: for url in text.extract_iter( post["_description"], 'href="http', '"'): yield Message.Queue, "http" + url, post def metadata(self): return None def skip(self, num): self.offset += num return num def _parse_post(self, post_id): url = f"{self.root}/view/{post_id}/" extr = text.extract_from(self.request(url).text) if self._new_layout is None: self._new_layout = ("http-equiv=" not in extr("")) path = extr('href="//d', '"') if not path: msg = text.remove_html( extr('System Message', '') or extr('System Message', '') ).partition(" . Continue ")[0] return self.log.warning( "Unable to download post %s (\"%s\")", post_id, msg) pi = text.parse_int rh = text.remove_html data = text.nameext_from_url(path, { "id" : pi(post_id), "url": "https://d" + path, }) if self._new_layout: data["tags"] = text.split_html(extr( "
", "
")) data["artist_url"] = extr('title="', '"').strip() data["artist"] = extr(">", "<") data["_description"] = extr( 'class="submission-description user-submitted-links">', ' ') data["views"] = pi(rh(extr('class="views">', ''))) data["favorites"] = pi(rh(extr('class="favorites">', ''))) data["comments"] = pi(rh(extr('class="comments">', ''))) data["rating"] = rh(extr('class="rating">', '')) data["fa_category"] = rh(extr('>Category', '')) data["theme"] = rh(extr('>', '<')) data["species"] = rh(extr('>Species', '')) data["gender"] = rh(extr('>Gender', '')) data["width"] = pi(extr("", "x")) data["height"] = pi(extr("", "p")) data["folders"] = folders = [] for folder in extr( "