diff options
| author | 2020-03-16 23:20:15 -0400 | |
|---|---|---|
| committer | 2020-03-16 23:20:15 -0400 | |
| commit | e8cc000750de972384f2f34d02d42222b4018ae9 (patch) | |
| tree | 26eb0bacedff7480d29bafcf184ca529cf9f1d9f /gallery_dl/extractor/furaffinity.py | |
| parent | 4366125d2580982abb57bc65a26fc1fb8ef2a5df (diff) | |
New upstream version 1.13.2upstream/1.13.2
Diffstat (limited to 'gallery_dl/extractor/furaffinity.py')
| -rw-r--r-- | gallery_dl/extractor/furaffinity.py | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py new file mode 100644 index 0000000..ba60e19 --- /dev/null +++ b/gallery_dl/extractor/furaffinity.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.furaffinity.net/""" + +from .common import Extractor, Message +from .. import text, util + + +BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net" + + +class FuraffinityExtractor(Extractor): + """Base class for furaffinity extractors""" + category = "furaffinity" + directory_fmt = ("{category}", "{user!l}") + filename_fmt = "{id} {title}.{extension}" + archive_fmt = "{id}" + cookiedomain = ".furaffinity.net" + root = "https://www.furaffinity.net" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = match.group(1) + self.offset = 0 + + def items(self): + for post_id in util.advance(self.posts(), self.offset): + post = self._parse_post(post_id) + if post: + yield Message.Directory, post + yield Message.Url, post["url"], post + + def posts(self): + return self._pagination() + + def skip(self, num): + self.offset += num + return num + + def _parse_post(self, post_id): + url = "{}/view/{}/".format(self.root, post_id) + extr = text.extract_from(self.request(url).text) + title, _, artist = text.unescape(extr( + 'property="og:title" content="', '"')).rpartition(" by ") + path = extr('href="//d.facdn.net/', '"') + + if not path: + self.log.warning( + "Unable to download post %s (\"%s\")", + post_id, text.remove_html( + extr('System Message', '</section>') or + extr('System Message', '</table>') + ) + ) + return None + + pi = text.parse_int + rh = text.remove_html + + data = text.nameext_from_url(path, { + "id" : pi(post_id), + "title" : title, + "artist": artist, + "user" : self.user or artist, + "url" : "https://d.facdn.net/" + path + }) + + tags = extr('class="tags-row">', '</section>') + if tags: + # new site layout + data["tags"] = text.split_html(tags) + data["description"] = text.unescape(rh(extr( + 'class="section-body">', '</div>'), "", "")) + data["views"] = pi(rh(extr('class="views">', '</span>'))) + data["favorites"] = pi(rh(extr('class="favorites">', '</span>'))) + data["comments"] = pi(rh(extr('class="comments">', '</span>'))) + data["rating"] = rh(extr('class="rating">', '</span>')) + data["fa_category"] = rh(extr('>Category</strong>', '</span>')) + data["theme"] = rh(extr('>', '<')) + data["species"] = rh(extr('>Species</strong>', '</div>')) + data["gender"] = rh(extr('>Gender</strong>', '</div>')) + data["width"] = pi(extr("<span>", "x")) + data["height"] = pi(extr("", "p")) + else: + # old site layout + data["fa_category"] = extr("<b>Category:</b>", "<").strip() + data["theme"] = extr("<b>Theme:</b>", "<").strip() + data["species"] = extr("<b>Species:</b>", "<").strip() + data["gender"] = extr("<b>Gender:</b>", "<").strip() + data["favorites"] = pi(extr("<b>Favorites:</b>", "<")) + data["comments"] = pi(extr("<b>Comments:</b>", "<")) + data["views"] = pi(extr("<b>Views:</b>", "<")) + data["width"] = pi(extr("<b>Resolution:</b>", "x")) + data["height"] = pi(extr("", "<")) + data["tags"] = text.split_html(extr( + 'id="keywords">', '</div>'))[::2] + data["rating"] = extr('<img alt="', ' ') + data["description"] = text.unescape(text.remove_html(extr( + "</table>", "</table>"), "", "")) + data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) + + return data + + def _pagination(self): + num = 1 + + while True: + url = "{}/{}/{}/{}/".format( + self.root, self.subcategory, self.user, num) + page = self.request(url).text + post_id = None + + for post_id in text.extract_iter(page, 'id="sid-', '"'): + yield post_id + + if not post_id: + return + num += 1 + + def _pagination_favorites(self): + path = "/favorites/{}/".format(self.user) + + while path: + page = self.request(self.root + path).text + yield from text.extract_iter(page, 'id="sid-', '"') + path = text.extract(page, 'right" href="', '"')[0] + + +class FuraffinityGalleryExtractor(FuraffinityExtractor): + """Extractor for a furaffinity user's gallery""" + subcategory = "gallery" + pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)" + test = ("https://www.furaffinity.net/gallery/mirlinthloth/", { + "pattern": r"https://d.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+", + "range": "45-50", + "count": 6, + }) + + +class FuraffinityScrapsExtractor(FuraffinityExtractor): + """Extractor for a furaffinity user's scraps""" + subcategory = "scraps" + directory_fmt = ("{category}", "{user!l}", "Scraps") + pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)" + test = ("https://www.furaffinity.net/scraps/mirlinthloth/", { + "pattern": r"https://d.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.\w+", + "count": ">= 3", + }) + + +class FuraffinityFavoriteExtractor(FuraffinityExtractor): + """Extractor for a furaffinity user's favorites""" + subcategory = "favorite" + directory_fmt = ("{category}", "{user!l}", "Favorites") + pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)" + test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { + "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "range": "45-50", + "count": 6, + }) + + def posts(self): + return self._pagination_favorites() + + +class FuraffinityPostExtractor(FuraffinityExtractor): + """Extractor for individual posts on furaffinity""" + subcategory = "post" + pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" + test = ( + ("https://www.furaffinity.net/view/21835115/", { + "url": "eae4ef93d99365c69b31a37561bd800c03d336ad", + "keyword": { + "artist" : "mirlinthloth", + "date" : "dt:2016-11-27 17:24:06", + "description": "A Song made playing the game Cosmic DJ.", + "extension" : "mp3", + "filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever", + "id" : 21835115, + "tags" : list, + "title" : "Bude's 4 Ever", + "url" : "re:https://d.facdn.net/art/mirlinthloth/music", + "user" : "mirlinthloth", + "views" : int, + "favorites" : int, + "comments" : int, + "rating" : "General", + "fa_category": "Music", + "theme" : "All", + "species" : "Unspecified / Any", + "gender" : "Any", + "width" : 120, + "height" : 120, + }, + }), + ("https://furaffinity.net/view/21835115/"), + ("https://sfw.furaffinity.net/view/21835115/"), + ("https://www.furaffinity.net/full/21835115/"), + ) + + def posts(self): + post_id = self.user + self.user = None + return (post_id,) + + +class FuraffinityUserExtractor(FuraffinityExtractor): + """Extractor for furaffinity user profiles""" + subcategory = "user" + cookiedomain = None + pattern = BASE_PATTERN + r"/user/([^/?&#]+)" + test = ( + ("https://www.furaffinity.net/user/mirlinthloth/", { + "pattern": r"/gallery/mirlinthloth/$", + }), + ("https://www.furaffinity.net/user/mirlinthloth/", { + "options": (("include", "all"),), + "pattern": r"/(gallery|scraps|favorites)/mirlinthloth/$", + "count": 3, + }), + ) + + def items(self): + base = "{}/{{}}/{}/".format(self.root, self.user) + return self._dispatch_extractors(( + (FuraffinityGalleryExtractor , base.format("gallery")), + (FuraffinityScrapsExtractor , base.format("scraps")), + (FuraffinityFavoriteExtractor, base.format("favorites")), + ), ("gallery",)) |
