diff options
Diffstat (limited to 'gallery_dl/extractor/patreon.py')
| -rw-r--r-- | gallery_dl/extractor/patreon.py | 86 |
1 files changed, 54 insertions, 32 deletions
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 9b13391..1e52559 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -9,7 +9,7 @@ """Extractors for https://www.patreon.com/""" from .common import Extractor, Message -from .. import text +from .. import text, exception from ..cache import memcache import collections import json @@ -33,13 +33,15 @@ class PatreonExtractor(Extractor): PatreonExtractor._warning = False for post in self.posts(): - yield Message.Directory, post - ids = set() post["num"] = 0 content = post.get("content") postfile = post.get("post_file") + yield Message.Directory, post + yield Message.Metadata, text.nameext_from_url( + post["creator"].get("image_url", ""), post) + for image in post["images"]: url = image.get("download_url") if not url: @@ -97,8 +99,10 @@ class PatreonExtractor(Extractor): attr["attachments"] = self._files(post, included, "attachments") attr["date"] = text.parse_datetime( attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") - attr["creator"] = self._user( - post["relationships"]["user"]["links"]["related"]) + user = post["relationships"]["user"] + attr["creator"] = ( + self._user(user["links"]["related"]) or + included["user"][user["data"]["id"]]) return attr @staticmethod @@ -123,7 +127,10 @@ class PatreonExtractor(Extractor): @memcache(keyarg=1) def _user(self, url): """Fetch user information""" - user = self.request(url).json()["data"] + response = self.request(url, fatal=False) + if response.status_code >= 400: + return None + user = response.json()["data"] attr = user["attributes"] attr["id"] = user["id"] attr["date"] = text.parse_datetime( @@ -168,23 +175,28 @@ class PatreonCreatorExtractor(PatreonExtractor): pattern = (r"(?:https?://)?(?:www\.)?patreon\.com" r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))" r"([^/?&#]+)/?") - test = ("https://www.patreon.com/koveliana", { - "range": "1-25", - "count": ">= 25", - "keyword": { - "attachments": list, - "comment_count": int, - "content": str, - "creator": dict, - "date": "type:datetime", - "id": int, - "images": list, - "like_count": int, - "post_type": str, - "published_at": str, - "title": str, - }, - }) + test = ( + ("https://www.patreon.com/koveliana", { + "range": "1-25", + "count": ">= 25", + "keyword": { + "attachments" : list, + "comment_count": int, + "content" : str, + "creator" : dict, + "date" : "type:datetime", + "id" : int, + "images" : list, + "like_count" : int, + "post_type" : str, + "published_at" : str, + "title" : str, + }, + }), + ("https://www.patreon.com/kovelianot", { + "exception": exception.NotFoundError, + }), + ) def __init__(self, match): PatreonExtractor.__init__(self, match) @@ -192,9 +204,12 @@ class PatreonCreatorExtractor(PatreonExtractor): def posts(self): url = "{}/{}".format(self.root, self.creator) - page = self.request(url).text + page = self.request(url, notfound="creator").text campaign_id = text.extract(page, "/campaign/", "/")[0] + if not campaign_id: + raise exception.NotFoundError("creator") + url = self._build_url("posts", ( "&sort=-published_at" "&filter[is_draft]=false" @@ -221,19 +236,26 @@ class PatreonUserExtractor(PatreonExtractor): class PatreonPostExtractor(PatreonExtractor): """Extractor for media from a single post""" subcategory = "post" - pattern = (r"(?:https?://)?(?:www\.)?patreon\.com" - r"/posts/[^/?&#]*?(\d+)") - test = ("https://www.patreon.com/posts/precious-metal-23563293", { - "count": 4, - }) + pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)" + test = ( + ("https://www.patreon.com/posts/precious-metal-23563293", { + "count": 4, + }), + ("https://www.patreon.com/posts/er1-28201153", { + "count": 1, + }), + ("https://www.patreon.com/posts/not-found-123", { + "exception": exception.NotFoundError, + }), + ) def __init__(self, match): PatreonExtractor.__init__(self, match) - self.post_id = match.group(1) + self.slug = match.group(1) def posts(self): - url = "{}/posts/{}".format(self.root, self.post_id) - page = self.request(url).text + url = "{}/posts/{}".format(self.root, self.slug) + page = self.request(url, notfound="post").text data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0] post = json.loads(data + "}")["post"] |
