summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/patreon.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@gmail.com>2020-05-03 00:06:40 -0400
committerLibravatarUnit 193 <unit193@gmail.com>2020-05-03 00:06:40 -0400
commit90e50db2e3c38f523bb5195d295290b06e5cedb0 (patch)
tree4759dc0faea79f83fa5074e2d0bd82b18a9caaea /gallery_dl/extractor/patreon.py
parentd5b96ce44b7809f5ae01e3e9d70a1d58fe21ccf5 (diff)
New upstream version 1.13.6upstream/1.13.6
Diffstat (limited to 'gallery_dl/extractor/patreon.py')
-rw-r--r--gallery_dl/extractor/patreon.py46
1 files changed, 34 insertions, 12 deletions
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 18c10a6..570bd72 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -47,8 +47,8 @@ class PatreonExtractor(Extractor):
self._attachments(post),
self._content(post),
):
- fhash = url.split("/")[9].partition("?")[0]
- if fhash not in hashes:
+ fhash = self._filehash(url)
+ if fhash not in hashes or not fhash:
hashes.add(fhash)
post["hash"] = fhash
post["type"] = kind
@@ -158,12 +158,23 @@ class PatreonExtractor(Extractor):
return attr
def _filename(self, url):
- """Fetch filename from its Content-Disposition header"""
+ """Fetch filename from an URL's Content-Disposition header"""
response = self.request(url, method="HEAD", fatal=False)
cd = response.headers.get("Content-Disposition")
return text.extract(cd, 'filename="', '"')[0]
@staticmethod
+ def _filehash(url):
+ """Extract MD5 hash from a download URL"""
+ parts = url.partition("?")[0].split("/")
+ parts.reverse()
+
+ for part in parts:
+ if len(part) == 32:
+ return part
+ return ""
+
+ @staticmethod
def _build_url(endpoint, query):
return (
"https://www.patreon.com/api/" + endpoint +
@@ -194,7 +205,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
- r"(?:user(?:/posts)?/?\?([^#]+)|([^/?&#]+)/?)")
+ r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?")
test = (
("https://www.patreon.com/koveliana", {
"range": "1-25",
@@ -213,6 +224,10 @@ class PatreonCreatorExtractor(PatreonExtractor):
"title" : str,
},
}),
+ ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {
+ "count": 1,
+ "keyword": {"date": "dt:2020-03-30 21:21:44"},
+ }),
("https://www.patreon.com/kovelianot", {
"exception": exception.NotFoundError,
}),
@@ -222,26 +237,33 @@ class PatreonCreatorExtractor(PatreonExtractor):
def __init__(self, match):
PatreonExtractor.__init__(self, match)
- self.query, self.creator = match.groups()
+ self.creator, self.query = match.groups()
def posts(self):
- if self.creator:
- url = "{}/{}".format(self.root, self.creator.lower())
+ query = text.parse_query(self.query)
+
+ creator_id = query.get("u")
+ if creator_id:
+ url = "{}/user?u={}".format(self.root, creator_id)
else:
- query = text.parse_query(self.query)
- url = "{}/user?u={}".format(self.root, query.get("u"))
+ url = "{}/{}".format(self.root, self.creator.lower())
page = self.request(url, notfound="creator").text
campaign_id = text.extract(page, "/campaign/", "/")[0]
-
if not campaign_id:
raise exception.NotFoundError("creator")
+ filters = "".join(
+ "&filter[{}={}".format(key[8:], text.escape(value))
+ for key, value in query.items()
+ if key.startswith("filters[")
+ )
+
url = self._build_url("posts", (
- "&sort=-published_at"
+ "&sort=" + query.get("sort", "-published_at") +
"&filter[is_draft]=false"
"&filter[contains_exclusive_posts]=true"
- "&filter[campaign_id]=" + campaign_id
+ "&filter[campaign_id]=" + campaign_id + filters
))
return self._pagination(url)