1 files changed, 114 insertions, 92 deletions
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 2b6742e..fb2f32c 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019-2023 Mike Fährmann
+# Copyright 2019-2025 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -35,20 +35,21 @@ class PatreonExtractor(Extractor):
                 self.session.headers["User-Agent"] = \
                     "Patreon/7.6.28 (Android; Android 11; Scale/2.10)"
 
-        format_images = self.config("format-images")
-        if format_images:
+        if format_images := self.config("format-images"):
             self._images_fmt = format_images
             self._images_url = self._images_url_fmt
 
+        self._cursor = None
+
     def items(self):
         generators = self._build_file_generators(self.config("files"))
 
         for post in self.posts():
 
+            yield Message.Directory, post
             if not post.get("current_user_can_view", True):
                 self.log.warning("Not allowed to view post %s", post["id"])
                 continue
-            yield Message.Directory, post
 
             post["num"] = 0
             hashes = set()
@@ -63,18 +64,24 @@ class PatreonExtractor(Extractor):
                     text.nameext_from_url(name, post)
                     if text.ext_from_url(url) == "m3u8":
                         url = "ytdl:" + url
+                        headers = {"referer": self.root + "/"}
                         post["_ytdl_manifest"] = "hls"
+                        post["_ytdl_manifest_headers"] = headers
+                        post["_ytdl_extra"] = {"http_headers": headers}
                         post["extension"] = "mp4"
                     yield Message.Url, url, post
                 else:
                     self.log.debug("skipping %s (%s %s)", url, fhash, kind)
 
+    def finalize(self):
+        if self._cursor:
+            self.log.info("Use '-o cursor=%s' to continue downloading "
+                          "from the current position", self._cursor)
+
     def _postfile(self, post):
-        postfile = post.get("post_file")
-        if postfile:
+        if postfile := post.get("post_file"):
             url = postfile["url"]
-            name = postfile.get("name")
-            if not name:
+            if not (name := postfile.get("name")):
                 if url.startswith("https://stream.mux.com/"):
                     name = url
                 else:
@@ -83,11 +90,11 @@ class PatreonExtractor(Extractor):
         return ()
 
     def _images(self, post):
-        for image in post.get("images") or ():
-            url = self._images_url(image)
-            if url:
-                name = image.get("file_name") or self._filename(url) or url
-                yield "image", url, name
+        if images := post.get("images"):
+            for image in images:
+                if url := self._images_url(image):
+                    name = image.get("file_name") or self._filename(url) or url
+                    yield "image", url, name
 
     def _images_url(self, image):
         return image.get("download_url")
@@ -99,32 +106,26 @@ class PatreonExtractor(Extractor):
             return image.get("download_url")
 
     def _image_large(self, post):
-        image = post.get("image")
-        if image:
-            url = image.get("large_url")
-            if url:
+        if image := post.get("image"):
+            if url := image.get("large_url"):
                 name = image.get("file_name") or self._filename(url) or url
                 return (("image_large", url, name),)
         return ()
 
     def _attachments(self, post):
         for attachment in post.get("attachments") or ():
-            url = self.request_location(attachment["url"], fatal=False)
-            if url:
+            if url := self.request_location(attachment["url"], fatal=False):
                 yield "attachment", url, attachment["name"]
 
         for attachment in post.get("attachments_media") or ():
-            url = attachment.get("download_url")
-            if url:
+            if url := attachment.get("download_url"):
                 yield "attachment", url, attachment["file_name"]
 
     def _content(self, post):
-        content = post.get("content")
-        if content:
+        if content := post.get("content"):
             for img in text.extract_iter(
                     content, '<img data-media-id="', '>'):
-                url = text.extr(img, 'src="', '"')
-                if url:
+                if url := text.extr(img, 'src="', '"'):
                     yield "content", url, self._filename(url) or url
 
     def posts(self):
@@ -136,8 +137,9 @@ class PatreonExtractor(Extractor):
         }
 
         while url:
+            self._update_cursor(url)
             url = text.ensure_http_scheme(url)
-            posts = self.request(url, headers=headers).json()
+            posts = self.request_json(url, headers=headers)
 
             if "included" in posts:
                 included = self._transform(posts["included"])
@@ -145,56 +147,67 @@ class PatreonExtractor(Extractor):
                     yield self._process(post, included)
 
             if "links" not in posts:
-                return
+                break
             url = posts["links"].get("next")
 
+        self._update_cursor("")
+
+    def _init_cursor(self):
+        if cursor := self.config("cursor", True):
+            return "" if cursor is True else cursor
+        self._update_cursor = util.identity
+        return ""
+
+    def _update_cursor(self, url):
+        params = text.parse_query(url.partition("?")[2])
+        self._cursor = cursor = params.get("page[cursor]")
+        if cursor:
+            self.log.debug("Cursor: %s", cursor)
+        return cursor
+
     def _process(self, post, included):
         """Process and extend a 'post' object"""
         attr = post["attributes"]
         attr["id"] = text.parse_int(post["id"])
 
-        if attr.get("current_user_can_view", True):
-
-            relationships = post["relationships"]
-            attr["images"] = self._files(
-                post, included, "images")
-            attr["attachments"] = self._files(
-                post, included, "attachments")
-            attr["attachments_media"] = self._files(
-                post, included, "attachments_media")
-            attr["date"] = text.parse_datetime(
-                attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+        relationships = post["relationships"]
+        attr["images"] = self._files(
+            post, included, "images")
+        attr["attachments"] = self._files(
+            post, included, "attachments")
+        attr["attachments_media"] = self._files(
+            post, included, "attachments_media")
+        attr["date"] = text.parse_datetime(
+            attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
 
-            try:
-                attr["campaign"] = (included["campaign"][
-                                    relationships["campaign"]["data"]["id"]])
-            except Exception:
-                attr["campaign"] = None
+        try:
+            attr["campaign"] = (included["campaign"][
+                                relationships["campaign"]["data"]["id"]])
+        except Exception:
+            attr["campaign"] = None
 
-            tags = relationships.get("user_defined_tags")
-            attr["tags"] = [
-                tag["id"].replace("user_defined;", "")
-                for tag in tags["data"]
-                if tag["type"] == "post_tag"
-            ] if tags else []
+        tags = relationships.get("user_defined_tags")
+        attr["tags"] = [
+            tag["id"].replace("user_defined;", "")
+            for tag in tags["data"]
+            if tag["type"] == "post_tag"
+        ] if tags else []
 
-            user = relationships["user"]
-            attr["creator"] = (
-                self._user(user["links"]["related"]) or
-                included["user"][user["data"]["id"]])
+        user = relationships["user"]
+        attr["creator"] = (
+            self._user(user["links"]["related"]) or
+            included["user"][user["data"]["id"]])
 
         return attr
 
-    @staticmethod
-    def _transform(included):
+    def _transform(self, included):
         """Transform 'included' into an easier to handle format"""
         result = collections.defaultdict(dict)
         for inc in included:
             result[inc["type"]][inc["id"]] = inc["attributes"]
         return result
 
-    @staticmethod
-    def _files(post, included, key):
+    def _files(self, post, included, key):
         """Build a list of files"""
         files = post["relationships"].get(key)
         if files and files.get("data"):
@@ -223,8 +236,7 @@ class PatreonExtractor(Extractor):
         cd = response.headers.get("Content-Disposition")
         return text.extr(cd, 'filename="', '"')
 
-    @staticmethod
-    def _filehash(url):
+    def _filehash(self, url):
         """Extract MD5 hash from a download URL"""
         parts = url.partition("?")[0].split("/")
         parts.reverse()
@@ -234,10 +246,9 @@ class PatreonExtractor(Extractor):
                 return part
         return ""
 
-    @staticmethod
-    def _build_url(endpoint, query):
+    def _build_url(self, endpoint, query):
         return (
-            "https://www.patreon.com/api/" + endpoint +
+            f"https://www.patreon.com/api/{endpoint}"
 
             "?include=campaign,access_rules,attachments,attachments_media,"
             "audio,images,media,native_video_insights,poll.choices,"
@@ -267,7 +278,10 @@ class PatreonExtractor(Extractor):
             "&fields[media]=id,image_urls,download_url,metadata,file_name"
             "&fields[native_video_insights]=average_view_duration,"
             "average_view_pct,has_preview,id,last_updated_at,num_views,"
-            "preview_views,video_duration" + query +
+            "preview_views,video_duration"
+
+            f"&page[cursor]={self._init_cursor()}"
+            f"{query}"
 
             "&json-api-version=1.0"
         )
@@ -307,18 +321,16 @@ class PatreonExtractor(Extractor):
         if bootstrap:
             return util.json_loads(bootstrap + "}")
 
-        bootstrap = text.extr(page, "window.patreon.bootstrap,", "});")
-        if bootstrap:
+        if bootstrap := text.extr(page, "window.patreon.bootstrap,", "});"):
             return util.json_loads(bootstrap + "}")
 
-        data = text.extr(page, "window.patreon = {", "};\n")
-        if data:
+        if data := text.extr(page, "window.patreon = {", "};\n"):
             try:
-                return util.json_loads("{" + data + "}")["bootstrap"]
+                return util.json_loads(f"{{{data}}}")["bootstrap"]
             except Exception:
                 pass
 
-        raise exception.StopExtraction("Unable to extract bootstrap data")
+        raise exception.AbortExtraction("Unable to extract bootstrap data")
 
 
 class PatreonCreatorExtractor(PatreonExtractor):
@@ -327,57 +339,63 @@ class PatreonCreatorExtractor(PatreonExtractor):
     pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
                r"/(?!(?:home|create|login|signup|search|posts|messages)"
                r"(?:$|[/?#]))"
-               r"(?:profile/creators|(?:c/)?([^/?#]+)(?:/posts)?)"
+               r"(?:profile/creators|(?:cw?/)?([^/?#]+)(?:/posts)?)"
                r"/?(?:\?([^#]+))?")
     example = "https://www.patreon.com/c/USER"
 
     def posts(self):
         creator, query = self.groups
 
-        query = text.parse_query(query)
-        campaign_id = self._get_campaign_id(creator, query)
-        filters = self._get_filters(query)
-
+        params = text.parse_query(query)
+        campaign_id = self._get_campaign_id(creator, params)
         self.log.debug("campaign_id: %s", campaign_id)
 
         url = self._build_url("posts", (
-            "&filter[campaign_id]=" + campaign_id +
+            f"&filter[campaign_id]={campaign_id}"
             "&filter[contains_exclusive_posts]=true"
-            "&filter[is_draft]=false" + filters +
-            "&sort=" + query.get("sort", "-published_at")
+            "&filter[is_draft]=false"
+            f"{self._get_filters(params)}"
+            f"&sort={params.get('sort', '-published_at')}"
         ))
         return self._pagination(url)
 
-    def _get_campaign_id(self, creator, query):
+    def _get_campaign_id(self, creator, params):
         if creator and creator.startswith("id:"):
             return creator[3:]
 
-        campaign_id = query.get("c") or query.get("campaign_id")
-        if campaign_id:
+        if campaign_id := params.get("c") or params.get("campaign_id"):
             return campaign_id
 
-        user_id = query.get("u")
-        if user_id:
-            url = "{}/user?u={}".format(self.root, user_id)
+        if user_id := params.get("u"):
+            url = f"{self.root}/user?u={user_id}"
         else:
-            url = "{}/{}".format(self.root, creator)
+            url = f"{self.root}/{creator}"
         page = self.request(url, notfound="creator").text
 
         try:
             data = None
             data = self._extract_bootstrap(page)
             return data["campaign"]["data"]["id"]
+        except exception.ControlException:
+            pass
         except Exception as exc:
             if data:
                 self.log.debug(data)
-            raise exception.StopExtraction(
-                "Unable to extract campaign ID (%s: %s)",
-                exc.__class__.__name__, exc)
+            raise exception.AbortExtraction(
+                f"Unable to extract campaign ID "
+                f"({exc.__class__.__name__}: {exc})")
+
+        # Next.js 13
+        if cid := text.extr(
+                page, r'{\"value\":{\"campaign\":{\"data\":{\"id\":\"', '\\"'):
+            return cid
 
-    def _get_filters(self, query):
+        raise exception.AbortExtraction("Failed to extract campaign ID")
+
+    def _get_filters(self, params):
         return "".join(
-            "&filter[{}={}".format(key[8:], text.escape(value))
-            for key, value in query.items()
+            f"&filter[{key[8:]}={text.escape(value)}"
+            for key, value in params.items()
             if key.startswith("filters[")
         )
 
@@ -389,8 +407,12 @@ class PatreonUserExtractor(PatreonExtractor):
     example = "https://www.patreon.com/home"
 
     def posts(self):
+        if date_max := self._get_date_min_max(None, None)[1]:
+            self._cursor = cursor = \
+                util.datetime_from_timestamp(date_max).isoformat()
+            self._init_cursor = lambda: cursor
+
         url = self._build_url("stream", (
-            "&page[cursor]=null"
             "&filter[is_following]=true"
             "&json-api-use-default-includes=false"
         ))
@@ -404,7 +426,7 @@ class PatreonPostExtractor(PatreonExtractor):
     example = "https://www.patreon.com/posts/TITLE-12345"
 
     def posts(self):
-        url = "{}/posts/{}".format(self.root, self.groups[0])
+        url = f"{self.root}/posts/{self.groups[0]}"
         page = self.request(url, notfound="post").text
         bootstrap = self._extract_bootstrap(page)