New upstream version 1.13.2upstream/1.13.2

author: Unit 193 <unit193@ubuntu.com> 2020-03-16 23:20:15 -0400
committer: Unit 193 <unit193@ubuntu.com> 2020-03-16 23:20:15 -0400
commit: e8cc000750de972384f2f34d02d42222b4018ae9 (patch)
tree: 26eb0bacedff7480d29bafcf184ca529cf9f1d9f /gallery_dl/extractor/furaffinity.py
parent: 4366125d2580982abb57bc65a26fc1fb8ef2a5df (diff)
1 files changed, 235 insertions, 0 deletions
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
new file mode 100644
index 0000000..ba60e19
--- /dev/null
+++ b/gallery_dl/extractor/furaffinity.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.furaffinity.net/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net"
+
+
+class FuraffinityExtractor(Extractor):
+    """Base class for furaffinity extractors"""
+    category = "furaffinity"
+    directory_fmt = ("{category}", "{user!l}")
+    filename_fmt = "{id} {title}.{extension}"
+    archive_fmt = "{id}"
+    cookiedomain = ".furaffinity.net"
+    root = "https://www.furaffinity.net"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.user = match.group(1)
+        self.offset = 0
+
+    def items(self):
+        for post_id in util.advance(self.posts(), self.offset):
+            post = self._parse_post(post_id)
+            if post:
+                yield Message.Directory, post
+                yield Message.Url, post["url"], post
+
+    def posts(self):
+        return self._pagination()
+
+    def skip(self, num):
+        self.offset += num
+        return num
+
+    def _parse_post(self, post_id):
+        url = "{}/view/{}/".format(self.root, post_id)
+        extr = text.extract_from(self.request(url).text)
+        title, _, artist = text.unescape(extr(
+            'property="og:title" content="', '"')).rpartition(" by ")
+        path = extr('href="//d.facdn.net/', '"')
+
+        if not path:
+            self.log.warning(
+                "Unable to download post %s (\"%s\")",
+                post_id, text.remove_html(
+                    extr('System Message', '</section>') or
+                    extr('System Message', '</table>')
+                )
+            )
+            return None
+
+        pi = text.parse_int
+        rh = text.remove_html
+
+        data = text.nameext_from_url(path, {
+            "id"    : pi(post_id),
+            "title" : title,
+            "artist": artist,
+            "user"  : self.user or artist,
+            "url"   : "https://d.facdn.net/" + path
+        })
+
+        tags = extr('class="tags-row">', '</section>')
+        if tags:
+            # new site layout
+            data["tags"] = text.split_html(tags)
+            data["description"] = text.unescape(rh(extr(
+                'class="section-body">', '</div>'), "", ""))
+            data["views"] = pi(rh(extr('class="views">', '</span>')))
+            data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
+            data["comments"] = pi(rh(extr('class="comments">', '</span>')))
+            data["rating"] = rh(extr('class="rating">', '</span>'))
+            data["fa_category"] = rh(extr('>Category</strong>', '</span>'))
+            data["theme"] = rh(extr('>', '<'))
+            data["species"] = rh(extr('>Species</strong>', '</div>'))
+            data["gender"] = rh(extr('>Gender</strong>', '</div>'))
+            data["width"] = pi(extr("<span>", "x"))
+            data["height"] = pi(extr("", "p"))
+        else:
+            # old site layout
+            data["fa_category"] = extr("<b>Category:</b>", "<").strip()
+            data["theme"] = extr("<b>Theme:</b>", "<").strip()
+            data["species"] = extr("<b>Species:</b>", "<").strip()
+            data["gender"] = extr("<b>Gender:</b>", "<").strip()
+            data["favorites"] = pi(extr("<b>Favorites:</b>", "<"))
+            data["comments"] = pi(extr("<b>Comments:</b>", "<"))
+            data["views"] = pi(extr("<b>Views:</b>", "<"))
+            data["width"] = pi(extr("<b>Resolution:</b>", "x"))
+            data["height"] = pi(extr("", "<"))
+            data["tags"] = text.split_html(extr(
+                'id="keywords">', '</div>'))[::2]
+            data["rating"] = extr('<img alt="', ' ')
+            data["description"] = text.unescape(text.remove_html(extr(
+                "</table>", "</table>"), "", ""))
+        data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
+
+        return data
+
+    def _pagination(self):
+        num = 1
+
+        while True:
+            url = "{}/{}/{}/{}/".format(
+                self.root, self.subcategory, self.user, num)
+            page = self.request(url).text
+            post_id = None
+
+            for post_id in text.extract_iter(page, 'id="sid-', '"'):
+                yield post_id
+
+            if not post_id:
+                return
+            num += 1
+
+    def _pagination_favorites(self):
+        path = "/favorites/{}/".format(self.user)
+
+        while path:
+            page = self.request(self.root + path).text
+            yield from text.extract_iter(page, 'id="sid-', '"')
+            path = text.extract(page, 'right" href="', '"')[0]
+
+
+class FuraffinityGalleryExtractor(FuraffinityExtractor):
+    """Extractor for a furaffinity user's gallery"""
+    subcategory = "gallery"
+    pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)"
+    test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
+        "pattern": r"https://d.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+",
+        "range": "45-50",
+        "count": 6,
+    })
+
+
+class FuraffinityScrapsExtractor(FuraffinityExtractor):
+    """Extractor for a furaffinity user's scraps"""
+    subcategory = "scraps"
+    directory_fmt = ("{category}", "{user!l}", "Scraps")
+    pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)"
+    test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
+        "pattern": r"https://d.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.\w+",
+        "count": ">= 3",
+    })
+
+
+class FuraffinityFavoriteExtractor(FuraffinityExtractor):
+    """Extractor for a furaffinity user's favorites"""
+    subcategory = "favorite"
+    directory_fmt = ("{category}", "{user!l}", "Favorites")
+    pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)"
+    test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
+        "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
+        "range": "45-50",
+        "count": 6,
+    })
+
+    def posts(self):
+        return self._pagination_favorites()
+
+
+class FuraffinityPostExtractor(FuraffinityExtractor):
+    """Extractor for individual posts on furaffinity"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
+    test = (
+        ("https://www.furaffinity.net/view/21835115/", {
+            "url": "eae4ef93d99365c69b31a37561bd800c03d336ad",
+            "keyword": {
+                "artist"     : "mirlinthloth",
+                "date"       : "dt:2016-11-27 17:24:06",
+                "description": "A Song made playing the game Cosmic DJ.",
+                "extension"  : "mp3",
+                "filename"   : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever",
+                "id"         : 21835115,
+                "tags"       : list,
+                "title"      : "Bude's 4 Ever",
+                "url"        : "re:https://d.facdn.net/art/mirlinthloth/music",
+                "user"       : "mirlinthloth",
+                "views"      : int,
+                "favorites"  : int,
+                "comments"   : int,
+                "rating"     : "General",
+                "fa_category": "Music",
+                "theme"      : "All",
+                "species"    : "Unspecified / Any",
+                "gender"     : "Any",
+                "width"      : 120,
+                "height"     : 120,
+            },
+        }),
+        ("https://furaffinity.net/view/21835115/"),
+        ("https://sfw.furaffinity.net/view/21835115/"),
+        ("https://www.furaffinity.net/full/21835115/"),
+    )
+
+    def posts(self):
+        post_id = self.user
+        self.user = None
+        return (post_id,)
+
+
+class FuraffinityUserExtractor(FuraffinityExtractor):
+    """Extractor for furaffinity user profiles"""
+    subcategory = "user"
+    cookiedomain = None
+    pattern = BASE_PATTERN + r"/user/([^/?&#]+)"
+    test = (
+        ("https://www.furaffinity.net/user/mirlinthloth/", {
+            "pattern": r"/gallery/mirlinthloth/$",
+        }),
+        ("https://www.furaffinity.net/user/mirlinthloth/", {
+            "options": (("include", "all"),),
+            "pattern": r"/(gallery|scraps|favorites)/mirlinthloth/$",
+            "count": 3,
+        }),
+    )
+
+    def items(self):
+        base = "{}/{{}}/{}/".format(self.root, self.user)
+        return self._dispatch_extractors((
+            (FuraffinityGalleryExtractor , base.format("gallery")),
+            (FuraffinityScrapsExtractor  , base.format("scraps")),
+            (FuraffinityFavoriteExtractor, base.format("favorites")),
+        ), ("gallery",))
author	Unit 193 <unit193@ubuntu.com>	2020-03-16 23:20:15 -0400
committer	Unit 193 <unit193@ubuntu.com>	2020-03-16 23:20:15 -0400
commit	e8cc000750de972384f2f34d02d42222b4018ae9 (patch)
tree	26eb0bacedff7480d29bafcf184ca529cf9f1d9f /gallery_dl/extractor/furaffinity.py
parent	4366125d2580982abb57bc65a26fc1fb8ef2a5df (diff)