summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/patreon.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/patreon.py')
-rw-r--r--gallery_dl/extractor/patreon.py86
1 files changed, 54 insertions, 32 deletions
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 9b13391..1e52559 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
from ..cache import memcache
import collections
import json
@@ -33,13 +33,15 @@ class PatreonExtractor(Extractor):
PatreonExtractor._warning = False
for post in self.posts():
- yield Message.Directory, post
-
ids = set()
post["num"] = 0
content = post.get("content")
postfile = post.get("post_file")
+ yield Message.Directory, post
+ yield Message.Metadata, text.nameext_from_url(
+ post["creator"].get("image_url", ""), post)
+
for image in post["images"]:
url = image.get("download_url")
if not url:
@@ -97,8 +99,10 @@ class PatreonExtractor(Extractor):
attr["attachments"] = self._files(post, included, "attachments")
attr["date"] = text.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- attr["creator"] = self._user(
- post["relationships"]["user"]["links"]["related"])
+ user = post["relationships"]["user"]
+ attr["creator"] = (
+ self._user(user["links"]["related"]) or
+ included["user"][user["data"]["id"]])
return attr
@staticmethod
@@ -123,7 +127,10 @@ class PatreonExtractor(Extractor):
@memcache(keyarg=1)
def _user(self, url):
"""Fetch user information"""
- user = self.request(url).json()["data"]
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ return None
+ user = response.json()["data"]
attr = user["attributes"]
attr["id"] = user["id"]
attr["date"] = text.parse_datetime(
@@ -168,23 +175,28 @@ class PatreonCreatorExtractor(PatreonExtractor):
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
r"([^/?&#]+)/?")
- test = ("https://www.patreon.com/koveliana", {
- "range": "1-25",
- "count": ">= 25",
- "keyword": {
- "attachments": list,
- "comment_count": int,
- "content": str,
- "creator": dict,
- "date": "type:datetime",
- "id": int,
- "images": list,
- "like_count": int,
- "post_type": str,
- "published_at": str,
- "title": str,
- },
- })
+ test = (
+ ("https://www.patreon.com/koveliana", {
+ "range": "1-25",
+ "count": ">= 25",
+ "keyword": {
+ "attachments" : list,
+ "comment_count": int,
+ "content" : str,
+ "creator" : dict,
+ "date" : "type:datetime",
+ "id" : int,
+ "images" : list,
+ "like_count" : int,
+ "post_type" : str,
+ "published_at" : str,
+ "title" : str,
+ },
+ }),
+ ("https://www.patreon.com/kovelianot", {
+ "exception": exception.NotFoundError,
+ }),
+ )
def __init__(self, match):
PatreonExtractor.__init__(self, match)
@@ -192,9 +204,12 @@ class PatreonCreatorExtractor(PatreonExtractor):
def posts(self):
url = "{}/{}".format(self.root, self.creator)
- page = self.request(url).text
+ page = self.request(url, notfound="creator").text
campaign_id = text.extract(page, "/campaign/", "/")[0]
+ if not campaign_id:
+ raise exception.NotFoundError("creator")
+
url = self._build_url("posts", (
"&sort=-published_at"
"&filter[is_draft]=false"
@@ -221,19 +236,26 @@ class PatreonUserExtractor(PatreonExtractor):
class PatreonPostExtractor(PatreonExtractor):
"""Extractor for media from a single post"""
subcategory = "post"
- pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
- r"/posts/[^/?&#]*?(\d+)")
- test = ("https://www.patreon.com/posts/precious-metal-23563293", {
- "count": 4,
- })
+ pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)"
+ test = (
+ ("https://www.patreon.com/posts/precious-metal-23563293", {
+ "count": 4,
+ }),
+ ("https://www.patreon.com/posts/er1-28201153", {
+ "count": 1,
+ }),
+ ("https://www.patreon.com/posts/not-found-123", {
+ "exception": exception.NotFoundError,
+ }),
+ )
def __init__(self, match):
PatreonExtractor.__init__(self, match)
- self.post_id = match.group(1)
+ self.slug = match.group(1)
def posts(self):
- url = "{}/posts/{}".format(self.root, self.post_id)
- page = self.request(url).text
+ url = "{}/posts/{}".format(self.root, self.slug)
+ page = self.request(url, notfound="post").text
data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
post = json.loads(data + "}")["post"]