diff options
| author | 2020-03-16 23:20:15 -0400 | |
|---|---|---|
| committer | 2020-03-16 23:20:15 -0400 | |
| commit | e8cc000750de972384f2f34d02d42222b4018ae9 (patch) | |
| tree | 26eb0bacedff7480d29bafcf184ca529cf9f1d9f /gallery_dl/extractor/patreon.py | |
| parent | 4366125d2580982abb57bc65a26fc1fb8ef2a5df (diff) | |
New upstream version 1.13.2upstream/1.13.2
Diffstat (limited to 'gallery_dl/extractor/patreon.py')
| -rw-r--r-- | gallery_dl/extractor/patreon.py | 82 |
1 files changed, 52 insertions, 30 deletions
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 1e52559..0d51df2 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, exception from ..cache import memcache import collections +import itertools import json @@ -33,43 +34,62 @@ class PatreonExtractor(Extractor): PatreonExtractor._warning = False for post in self.posts(): - ids = set() post["num"] = 0 - content = post.get("content") - postfile = post.get("post_file") + hashes = set() yield Message.Directory, post yield Message.Metadata, text.nameext_from_url( post["creator"].get("image_url", ""), post) - for image in post["images"]: - url = image.get("download_url") - if not url: - continue - ids.add(url.split("/")[-2]) - name = image.get("file_name") or self._filename(url) or url + for kind, url, name in itertools.chain( + self._postfile(post), + self._images(post), + self._attachments(post), + self._content(post), + ): + fhash = url.rsplit("/", 2)[1] + if fhash not in hashes: + hashes.add(fhash) + post["hash"] = fhash + post["type"] = kind + post["num"] += 1 + yield Message.Url, url, text.nameext_from_url(name, post) + else: + self.log.debug("skipping %s (%s %s)", url, fhash, kind) - post["num"] += 1 - post["type"] = "image" - yield Message.Url, url, text.nameext_from_url(name, post) + @staticmethod + def _postfile(post): + postfile = post.get("post_file") + if postfile: + return (("postfile", postfile["url"], postfile["name"]),) + return () + + def _images(self, post): + for image in post["images"]: + url = image.get("download_url") + if url: + name = image.get("file_name") or self._filename(url) or url + yield "image", url, name - if postfile and postfile["url"].split("/")[-2] not in ids: - post["num"] += 1 - post["type"] = "postfile" - text.nameext_from_url(postfile["name"], post) - yield Message.Url, postfile["url"], post + def _attachments(self, post): + for attachment in post["attachments"]: + url = self.request( + attachment["url"], method="HEAD", + allow_redirects=False, fatal=False, + ).headers.get("Location") - for attachment in post["attachments"]: - post["num"] += 1 - post["type"] = "attachment" - text.nameext_from_url(attachment["name"], post) - yield Message.Url, attachment["url"], post + if url: + yield "attachment", url, attachment["name"] - if content: - for url in text.extract_iter(content, 'src="', '"'): - post["num"] += 1 - post["type"] = "content" - yield Message.Url, url, text.nameext_from_url(url, post) + @staticmethod + def _content(post): + content = post.get("content") + if content: + for img in text.extract_iter( + content, '<img data-media-id="', '>'): + url = text.extract(img, 'src="', '"')[0] + if url: + yield "content", url, url def posts(self): """Return all relevant post objects""" @@ -238,11 +258,13 @@ class PatreonPostExtractor(PatreonExtractor): subcategory = "post" pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)" test = ( + # postfile + attachments ("https://www.patreon.com/posts/precious-metal-23563293", { "count": 4, }), - ("https://www.patreon.com/posts/er1-28201153", { - "count": 1, + # postfile + content + ("https://www.patreon.com/posts/19987002", { + "count": 4, }), ("https://www.patreon.com/posts/not-found-123", { "exception": exception.NotFoundError, |
