1 files changed, 93 insertions, 37 deletions
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 4884497..ab5932d 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -11,6 +11,8 @@
 from .common import Extractor, Message
 from .. import text
 from ..cache import memcache
+import collections
+import json
 
 
 class PatreonExtractor(Extractor):
@@ -33,70 +35,92 @@ class PatreonExtractor(Extractor):
         for post in self.posts():
             yield Message.Directory, post
 
+            ids = set()
             post["num"] = 0
             content = post.get("content")
             postfile = post.get("post_file")
 
-            for url in text.extract_iter(content or "", 'src="', '"'):
+            for image in post["images"]:
+                url = image.get("download_url")
+                if not url:
+                    continue
+                ids.add(url.split("/")[-2])
+                name = image.get("file_name") or self._filename(url) or url
+
                 post["num"] += 1
-                yield Message.Url, url, text.nameext_from_url(url, post)
+                post["type"] = "image"
+                yield Message.Url, url, text.nameext_from_url(name, post)
 
-            if postfile:
+            if postfile and postfile["url"].split("/")[-2] not in ids:
                 post["num"] += 1
+                post["type"] = "postfile"
                 text.nameext_from_url(postfile["name"], post)
                 yield Message.Url, postfile["url"], post
 
             for attachment in post["attachments"]:
                 post["num"] += 1
+                post["type"] = "attachment"
                 text.nameext_from_url(attachment["name"], post)
                 yield Message.Url, attachment["url"], post
 
+            if content:
+                for url in text.extract_iter(content, 'src="', '"'):
+                    post["num"] += 1
+                    post["type"] = "content"
+                    yield Message.Url, url, text.nameext_from_url(url, post)
+
     def posts(self):
         """Return all relevant post objects"""
 
     def _pagination(self, url):
         headers = {"Referer": self.root}
-        empty = []
 
         while url:
             posts = self.request(url, headers=headers).json()
 
-            if "included" not in posts:
-                return
-
-            # collect attachments
-            attachments = {}
-            for inc in posts["included"]:
-                if inc["type"] == "attachment":
-                    attachments[inc["id"]] = inc["attributes"]
-
-            # update posts
-            for post in posts["data"]:
-                attr = post["attributes"]
-                attr["id"] = text.parse_int(post["id"])
-                attr["date"] = text.parse_datetime(
-                    attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
-                attr["creator"] = self._user(
-                    post["relationships"]["user"]["links"]["related"])
-
-                # add attachments to post attributes
-                files = post["relationships"].get("attachments")
-                if files:
-                    attr["attachments"] = [
-                        attachments[f["id"]]
-                        for f in files["data"]
-                    ]
-                else:
-                    attr["attachments"] = empty
-
-                yield attr
+            if "included" in posts:
+                included = self._transform(posts["included"])
+                for post in posts["data"]:
+                    yield self._process(post, included)
 
             if "links" not in posts:
                 return
             url = posts["links"].get("next")
 
+    def _process(self, post, included):
+        """Process and extend a 'post' object"""
+        attr = post["attributes"]
+        attr["id"] = text.parse_int(post["id"])
+        attr["images"] = self._files(post, included, "images")
+        attr["attachments"] = self._files(post, included, "attachments")
+        attr["date"] = text.parse_datetime(
+            attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+        attr["creator"] = self._user(
+            post["relationships"]["user"]["links"]["related"])
+        return attr
+
+    @staticmethod
+    def _transform(included):
+        """Transform 'included' into an easier to handle format"""
+        result = collections.defaultdict(dict)
+        for inc in included:
+            result[inc["type"]][inc["id"]] = inc["attributes"]
+        return result
+
+    @staticmethod
+    def _files(post, included, key):
+        """Build a list of files"""
+        files = post["relationships"].get(key)
+        if files and files.get("data"):
+            return [
+                included[file["type"]][file["id"]]
+                for file in files["data"]
+            ]
+        return []
+
     @memcache(keyarg=1)
     def _user(self, url):
+        """Fetch user information"""
         user = self.request(url).json()["data"]
         attr = user["attributes"]
         attr["id"] = user["id"]
@@ -104,14 +128,21 @@ class PatreonExtractor(Extractor):
             attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
         return attr
 
+    def _filename(self, url):
+        """Fetch filename from its Content-Disposition header"""
+        response = self.request(url, method="HEAD", fatal=False)
+        cd = response.headers.get("Content-Disposition")
+        return text.extract(cd, 'filename="', '"')[0]
+
     @staticmethod
     def _build_url(endpoint, query):
         return (
             "https://www.patreon.com/api/" + endpoint +
 
-            "?include=user,attachments,user_defined_tags,campaign,poll.choices"
-            ",poll.current_user_responses.user,poll.current_user_responses.cho"
-            "ice,poll.current_user_responses.poll,access_rules.tier.null"
+            "?include=user,images,attachments,user_defined_tags,campaign,poll."
+            "choices,poll.current_user_responses.user,poll.current_user_respon"
+            "ses.choice,poll.current_user_responses.poll,access_rules.tier.nul"
+            "l"
 
             "&fields[post]=change_visibility_at,comment_count,content,current_"
             "user_can_delete,current_user_can_view,current_user_has_liked,embe"
@@ -133,7 +164,8 @@ class PatreonCreatorExtractor(PatreonExtractor):
     """Extractor for a creator's works"""
     subcategory = "creator"
     pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
-               r"/(?!(?:home|join|login|signup)(?:$|[/?&#]))([^/?&#]+)/?")
+               r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
+               r"([^/?&#]+)/?")
     test = ("https://www.patreon.com/koveliana", {
         "range": "1-25",
         "count": ">= 25",
@@ -144,6 +176,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
             "creator": dict,
             "date": "type:datetime",
             "id": int,
+            "images": list,
             "like_count": int,
             "post_type": str,
             "published_at": str,
@@ -181,3 +214,26 @@ class PatreonUserExtractor(PatreonExtractor):
             "&filter[is_following]=true"
         ))
         return self._pagination(url)
+
+
+class PatreonPostExtractor(PatreonExtractor):
+    """Extractor for media from a single post"""
+    subcategory = "post"
+    pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
+               r"/posts/[^/?&#]*?(\d+)")
+    test = ("https://www.patreon.com/posts/precious-metal-23563293", {
+        "count": 4,
+    })
+
+    def __init__(self, match):
+        PatreonExtractor.__init__(self, match)
+        self.post_id = match.group(1)
+
+    def posts(self):
+        url = "{}/posts/{}".format(self.root, self.post_id)
+        page = self.request(url).text
+        data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
+        post = json.loads(data + "}")["post"]
+
+        included = self._transform(post["included"])
+        return (self._process(post["data"], included),)