summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/furaffinity.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2020-03-16 23:20:15 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2020-03-16 23:20:15 -0400
commite8cc000750de972384f2f34d02d42222b4018ae9 (patch)
tree26eb0bacedff7480d29bafcf184ca529cf9f1d9f /gallery_dl/extractor/furaffinity.py
parent4366125d2580982abb57bc65a26fc1fb8ef2a5df (diff)
New upstream version 1.13.2upstream/1.13.2
Diffstat (limited to 'gallery_dl/extractor/furaffinity.py')
-rw-r--r--gallery_dl/extractor/furaffinity.py235
1 files changed, 235 insertions, 0 deletions
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
new file mode 100644
index 0000000..ba60e19
--- /dev/null
+++ b/gallery_dl/extractor/furaffinity.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.furaffinity.net/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net"
+
+
+class FuraffinityExtractor(Extractor):
+ """Base class for furaffinity extractors"""
+ category = "furaffinity"
+ directory_fmt = ("{category}", "{user!l}")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ cookiedomain = ".furaffinity.net"
+ root = "https://www.furaffinity.net"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+ self.offset = 0
+
+ def items(self):
+ for post_id in util.advance(self.posts(), self.offset):
+ post = self._parse_post(post_id)
+ if post:
+ yield Message.Directory, post
+ yield Message.Url, post["url"], post
+
+ def posts(self):
+ return self._pagination()
+
+ def skip(self, num):
+ self.offset += num
+ return num
+
+ def _parse_post(self, post_id):
+ url = "{}/view/{}/".format(self.root, post_id)
+ extr = text.extract_from(self.request(url).text)
+ title, _, artist = text.unescape(extr(
+ 'property="og:title" content="', '"')).rpartition(" by ")
+ path = extr('href="//d.facdn.net/', '"')
+
+ if not path:
+ self.log.warning(
+ "Unable to download post %s (\"%s\")",
+ post_id, text.remove_html(
+ extr('System Message', '</section>') or
+ extr('System Message', '</table>')
+ )
+ )
+ return None
+
+ pi = text.parse_int
+ rh = text.remove_html
+
+ data = text.nameext_from_url(path, {
+ "id" : pi(post_id),
+ "title" : title,
+ "artist": artist,
+ "user" : self.user or artist,
+ "url" : "https://d.facdn.net/" + path
+ })
+
+ tags = extr('class="tags-row">', '</section>')
+ if tags:
+ # new site layout
+ data["tags"] = text.split_html(tags)
+ data["description"] = text.unescape(rh(extr(
+ 'class="section-body">', '</div>'), "", ""))
+ data["views"] = pi(rh(extr('class="views">', '</span>')))
+ data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
+ data["comments"] = pi(rh(extr('class="comments">', '</span>')))
+ data["rating"] = rh(extr('class="rating">', '</span>'))
+ data["fa_category"] = rh(extr('>Category</strong>', '</span>'))
+ data["theme"] = rh(extr('>', '<'))
+ data["species"] = rh(extr('>Species</strong>', '</div>'))
+ data["gender"] = rh(extr('>Gender</strong>', '</div>'))
+ data["width"] = pi(extr("<span>", "x"))
+ data["height"] = pi(extr("", "p"))
+ else:
+ # old site layout
+ data["fa_category"] = extr("<b>Category:</b>", "<").strip()
+ data["theme"] = extr("<b>Theme:</b>", "<").strip()
+ data["species"] = extr("<b>Species:</b>", "<").strip()
+ data["gender"] = extr("<b>Gender:</b>", "<").strip()
+ data["favorites"] = pi(extr("<b>Favorites:</b>", "<"))
+ data["comments"] = pi(extr("<b>Comments:</b>", "<"))
+ data["views"] = pi(extr("<b>Views:</b>", "<"))
+ data["width"] = pi(extr("<b>Resolution:</b>", "x"))
+ data["height"] = pi(extr("", "<"))
+ data["tags"] = text.split_html(extr(
+ 'id="keywords">', '</div>'))[::2]
+ data["rating"] = extr('<img alt="', ' ')
+ data["description"] = text.unescape(text.remove_html(extr(
+ "</table>", "</table>"), "", ""))
+ data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
+
+ return data
+
+ def _pagination(self):
+ num = 1
+
+ while True:
+ url = "{}/{}/{}/{}/".format(
+ self.root, self.subcategory, self.user, num)
+ page = self.request(url).text
+ post_id = None
+
+ for post_id in text.extract_iter(page, 'id="sid-', '"'):
+ yield post_id
+
+ if not post_id:
+ return
+ num += 1
+
+ def _pagination_favorites(self):
+ path = "/favorites/{}/".format(self.user)
+
+ while path:
+ page = self.request(self.root + path).text
+ yield from text.extract_iter(page, 'id="sid-', '"')
+ path = text.extract(page, 'right" href="', '"')[0]
+
+
+class FuraffinityGalleryExtractor(FuraffinityExtractor):
+ """Extractor for a furaffinity user's gallery"""
+ subcategory = "gallery"
+ pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)"
+ test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
+ "pattern": r"https://d.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+",
+ "range": "45-50",
+ "count": 6,
+ })
+
+
+class FuraffinityScrapsExtractor(FuraffinityExtractor):
+ """Extractor for a furaffinity user's scraps"""
+ subcategory = "scraps"
+ directory_fmt = ("{category}", "{user!l}", "Scraps")
+ pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)"
+ test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
+ "pattern": r"https://d.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.\w+",
+ "count": ">= 3",
+ })
+
+
+class FuraffinityFavoriteExtractor(FuraffinityExtractor):
+ """Extractor for a furaffinity user's favorites"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "{user!l}", "Favorites")
+ pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)"
+ test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
+ "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "range": "45-50",
+ "count": 6,
+ })
+
+ def posts(self):
+ return self._pagination_favorites()
+
+
+class FuraffinityPostExtractor(FuraffinityExtractor):
+ """Extractor for individual posts on furaffinity"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
+ test = (
+ ("https://www.furaffinity.net/view/21835115/", {
+ "url": "eae4ef93d99365c69b31a37561bd800c03d336ad",
+ "keyword": {
+ "artist" : "mirlinthloth",
+ "date" : "dt:2016-11-27 17:24:06",
+ "description": "A Song made playing the game Cosmic DJ.",
+ "extension" : "mp3",
+ "filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever",
+ "id" : 21835115,
+ "tags" : list,
+ "title" : "Bude's 4 Ever",
+ "url" : "re:https://d.facdn.net/art/mirlinthloth/music",
+ "user" : "mirlinthloth",
+ "views" : int,
+ "favorites" : int,
+ "comments" : int,
+ "rating" : "General",
+ "fa_category": "Music",
+ "theme" : "All",
+ "species" : "Unspecified / Any",
+ "gender" : "Any",
+ "width" : 120,
+ "height" : 120,
+ },
+ }),
+ ("https://furaffinity.net/view/21835115/"),
+ ("https://sfw.furaffinity.net/view/21835115/"),
+ ("https://www.furaffinity.net/full/21835115/"),
+ )
+
+ def posts(self):
+ post_id = self.user
+ self.user = None
+ return (post_id,)
+
+
+class FuraffinityUserExtractor(FuraffinityExtractor):
+ """Extractor for furaffinity user profiles"""
+ subcategory = "user"
+ cookiedomain = None
+ pattern = BASE_PATTERN + r"/user/([^/?&#]+)"
+ test = (
+ ("https://www.furaffinity.net/user/mirlinthloth/", {
+ "pattern": r"/gallery/mirlinthloth/$",
+ }),
+ ("https://www.furaffinity.net/user/mirlinthloth/", {
+ "options": (("include", "all"),),
+ "pattern": r"/(gallery|scraps|favorites)/mirlinthloth/$",
+ "count": 3,
+ }),
+ )
+
+ def items(self):
+ base = "{}/{{}}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (FuraffinityGalleryExtractor , base.format("gallery")),
+ (FuraffinityScrapsExtractor , base.format("scraps")),
+ (FuraffinityFavoriteExtractor, base.format("favorites")),
+ ), ("gallery",))