# -*- coding: utf-8 -*- # Copyright 2019-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.pornhub.com/""" from .common import Extractor, Message, Dispatch from .. import text, exception BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com" class PornhubExtractor(Extractor): """Base class for pornhub extractors""" category = "pornhub" root = "https://www.pornhub.com" def _init(self): self.cookies.set( "accessAgeDisclaimerPH", "1", domain=".pornhub.com") def _pagination(self, user, path): if "/" not in path: path += "/public" url = f"{self.root}/{user}/{path}/ajax" params = {"page": 1} headers = { "Referer": url[:-5], "X-Requested-With": "XMLHttpRequest", } while True: response = self.request( url, method="POST", headers=headers, params=params, allow_redirects=False) if 300 <= response.status_code < 400: url = f"{self.root}{response.headers['location']}/{path}/ajax" continue yield response.text params["page"] += 1 class PornhubGalleryExtractor(PornhubExtractor): """Extractor for image galleries on pornhub.com""" subcategory = "gallery" directory_fmt = ("{category}", "{user}", "{gallery[id]} {gallery[title]}") filename_fmt = "{num:>03}_{id}.{extension}" archive_fmt = "{id}" pattern = BASE_PATTERN + r"/album/(\d+)" example = "https://www.pornhub.com/album/12345" def __init__(self, match): PornhubExtractor.__init__(self, match) self.gallery_id = match[1] self._first = None def items(self): data = self.metadata() yield Message.Directory, "", data for num, img in enumerate(self.images(), 1): image = { "url" : img["img_large"], "caption": img["caption"], "id" : text.parse_int(img["id"]), "views" : text.parse_int(img["times_viewed"]), "score" : text.parse_int(img["vote_percent"]), "num" : num, } url = image["url"] image.update(data) yield Message.Url, url, text.nameext_from_url(url, image) def metadata(self): url = f"{self.root}/album/{self.gallery_id}" extr = text.extract_from(self.request(url).text) title = extr("", "") self._token = extr('name="token" value="', '"') score = extr('
', '<') tags = extr('
', '<'), "user" : text.remove_html(extr("Created by:", "
")), } yield Message.Directory, "", gif yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif) class PornhubUserExtractor(Dispatch, PornhubExtractor): """Extractor for a pornhub user""" pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$" example = "https://www.pornhub.com/model/USER" def items(self): base = f"{self.root}/{self.groups[0]}/" return self._dispatch_extractors(( (PornhubPhotosExtractor, base + "photos"), (PornhubGifsExtractor , base + "gifs"), ), ("photos",)) class PornhubPhotosExtractor(PornhubExtractor): """Extractor for all galleries of a pornhub user""" subcategory = "photos" pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)" "/(photos(?:/[^/?#]+)?)") example = "https://www.pornhub.com/model/USER/photos" def __init__(self, match): PornhubExtractor.__init__(self, match) self.user, self.path = match.groups() def items(self): data = {"_extractor": PornhubGalleryExtractor} for page in self._pagination(self.user, self.path): gid = None for gid in text.extract_iter(page, 'id="albumphoto', '"'): yield Message.Queue, self.root + "/album/" + gid, data if gid is None: return class PornhubGifsExtractor(PornhubExtractor): """Extractor for a pornhub user's gifs""" subcategory = "gifs" pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)" "/(gifs(?:/[^/?#]+)?)") example = "https://www.pornhub.com/model/USER/gifs" def __init__(self, match): PornhubExtractor.__init__(self, match) self.user, self.path = match.groups() def items(self): data = {"_extractor": PornhubGifExtractor} for page in self._pagination(self.user, self.path): gid = None for gid in text.extract_iter(page, 'id="gif', '"'): yield Message.Queue, self.root + "/gif/" + gid, data if gid is None: return