diff options
Diffstat (limited to 'gallery_dl/extractor/pornhub.py')
| -rw-r--r-- | gallery_dl/extractor/pornhub.py | 170 |
1 files changed, 108 insertions, 62 deletions
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index fa4efa0..c5ce832 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -19,6 +19,35 @@ class PornhubExtractor(Extractor): category = "pornhub" root = "https://www.pornhub.com" + def _init(self): + self.cookies.set( + "accessAgeDisclaimerPH", "1", domain=".pornhub.com") + + def _pagination(self, user, path): + if "/" not in path: + path += "/public" + + url = "{}/{}/{}/ajax".format(self.root, user, path) + params = {"page": 1} + headers = { + "Referer": url[:-5], + "X-Requested-With": "XMLHttpRequest", + } + + while True: + response = self.request( + url, method="POST", headers=headers, params=params, + allow_redirects=False) + + if 300 <= response.status_code < 400: + url = "{}{}/{}/ajax".format( + self.root, response.headers["location"], path) + continue + + yield response.text + + params["page"] += 1 + class PornhubGalleryExtractor(PornhubExtractor): """Extractor for image galleries on pornhub.com""" @@ -27,30 +56,7 @@ class PornhubGalleryExtractor(PornhubExtractor): filename_fmt = "{num:>03}_{id}.{extension}" archive_fmt = "{id}" pattern = BASE_PATTERN + r"/album/(\d+)" - test = ( - ("https://www.pornhub.com/album/19289801", { - "pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/", - "count": ">= 300", - "keyword": { - "id" : int, - "num" : int, - "score" : int, - "views" : int, - "caption": str, - "user" : "Danika Mori", - "gallery": { - "id" : 19289801, - "score": int, - "views": int, - "tags" : list, - "title": "Danika Mori Best Moments", - }, - }, - }), - ("https://www.pornhub.com/album/69040172", { - "exception": exception.AuthorizationError, - }), - ) + example = "https://www.pornhub.com/album/12345" def __init__(self, match): PornhubExtractor.__init__(self, match) @@ -58,9 +64,6 @@ class PornhubGalleryExtractor(PornhubExtractor): self._first = None def items(self): - self.session.cookies.set( - "accessAgeDisclaimerPH", "1", domain=".pornhub.com") - data = self.metadata() yield Message.Directory, data for num, image in enumerate(self.images(), 1): @@ -111,57 +114,100 @@ class PornhubGalleryExtractor(PornhubExtractor): "views" : text.parse_int(img["times_viewed"]), "score" : text.parse_int(img["vote_percent"]), } - key = img["next"] + key = str(img["next"]) if key == end: return +class PornhubGifExtractor(PornhubExtractor): + """Extractor for pornhub.com gifs""" + subcategory = "gif" + directory_fmt = ("{category}", "{user}", "gifs") + filename_fmt = "{id} {title}.{extension}" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/gif/(\d+)" + example = "https://www.pornhub.com/gif/12345" + + def __init__(self, match): + PornhubExtractor.__init__(self, match) + self.gallery_id = match.group(1) + + def items(self): + url = "{}/gif/{}".format(self.root, self.gallery_id) + extr = text.extract_from(self.request(url).text) + + gif = { + "id" : self.gallery_id, + "tags" : extr("data-context-tag='", "'").split(","), + "title": extr('"name": "', '"'), + "url" : extr('"contentUrl": "', '"'), + "date" : text.parse_datetime( + extr('"uploadDate": "', '"'), "%Y-%m-%d"), + "user" : extr('data-mxptext="', '"'), + } + + yield Message.Directory, gif + yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif) + + class PornhubUserExtractor(PornhubExtractor): - """Extractor for all galleries of a pornhub user""" + """Extractor for a pornhub user""" subcategory = "user" - pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)" - "(?:/photos(?:/(public|private|favorites))?)?/?$") - test = ( - ("https://www.pornhub.com/pornstar/danika-mori/photos", { - "pattern": PornhubGalleryExtractor.pattern, - "count": ">= 6", - }), - ("https://www.pornhub.com/users/flyings0l0/"), - ("https://www.pornhub.com/users/flyings0l0/photos/public"), - ("https://www.pornhub.com/users/flyings0l0/photos/private"), - ("https://www.pornhub.com/users/flyings0l0/photos/favorites"), - ("https://www.pornhub.com/model/bossgirl/photos"), - ) + pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$" + example = "https://www.pornhub.com/model/USER" def __init__(self, match): PornhubExtractor.__init__(self, match) - self.type, self.user, self.cat = match.groups() + self.user = match.group(1) + + def initialize(self): + pass def items(self): - url = "{}/{}/{}/photos/{}/ajax".format( - self.root, self.type, self.user, self.cat or "public") - params = {"page": 1} - headers = { - "Referer": url[:-5], - "X-Requested-With": "XMLHttpRequest", - } + base = "{}/{}/".format(self.root, self.user) + return self._dispatch_extractors(( + (PornhubPhotosExtractor, base + "photos"), + (PornhubGifsExtractor , base + "gifs"), + ), ("photos",)) - data = {"_extractor": PornhubGalleryExtractor} - while True: - response = self.request( - url, method="POST", headers=headers, params=params, - allow_redirects=False) - if 300 <= response.status_code < 400: - url = "{}{}/photos/{}/ajax".format( - self.root, response.headers["location"], - self.cat or "public") - continue +class PornhubPhotosExtractor(PornhubExtractor): + """Extractor for all galleries of a pornhub user""" + subcategory = "photos" + pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)" + "/(photos(?:/[^/?#]+)?)") + example = "https://www.pornhub.com/model/USER/photos" + + def __init__(self, match): + PornhubExtractor.__init__(self, match) + self.user, self.path = match.groups() + def items(self): + data = {"_extractor": PornhubGalleryExtractor} + for page in self._pagination(self.user, self.path): gid = None - for gid in text.extract_iter(response.text, 'id="albumphoto', '"'): + for gid in text.extract_iter(page, 'id="albumphoto', '"'): yield Message.Queue, self.root + "/album/" + gid, data if gid is None: return - params["page"] += 1 + +class PornhubGifsExtractor(PornhubExtractor): + """Extractor for a pornhub user's gifs""" + subcategory = "gifs" + pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)" + "/(gifs(?:/[^/?#]+)?)") + example = "https://www.pornhub.com/model/USER/gifs" + + def __init__(self, match): + PornhubExtractor.__init__(self, match) + self.user, self.path = match.groups() + + def items(self): + data = {"_extractor": PornhubGifExtractor} + for page in self._pagination(self.user, self.path): + gid = None + for gid in text.extract_iter(page, 'id="gif', '"'): + yield Message.Queue, self.root + "/gif/" + gid, data + if gid is None: + return |
