1 files changed, 108 insertions, 62 deletions
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index fa4efa0..c5ce832 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -19,6 +19,35 @@ class PornhubExtractor(Extractor):
     category = "pornhub"
     root = "https://www.pornhub.com"
 
+    def _init(self):
+        self.cookies.set(
+            "accessAgeDisclaimerPH", "1", domain=".pornhub.com")
+
+    def _pagination(self, user, path):
+        if "/" not in path:
+            path += "/public"
+
+        url = "{}/{}/{}/ajax".format(self.root, user, path)
+        params = {"page": 1}
+        headers = {
+            "Referer": url[:-5],
+            "X-Requested-With": "XMLHttpRequest",
+        }
+
+        while True:
+            response = self.request(
+                url, method="POST", headers=headers, params=params,
+                allow_redirects=False)
+
+            if 300 <= response.status_code < 400:
+                url = "{}{}/{}/ajax".format(
+                    self.root, response.headers["location"], path)
+                continue
+
+            yield response.text
+
+            params["page"] += 1
+
 
 class PornhubGalleryExtractor(PornhubExtractor):
     """Extractor for image galleries on pornhub.com"""
@@ -27,30 +56,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
     filename_fmt = "{num:>03}_{id}.{extension}"
     archive_fmt = "{id}"
     pattern = BASE_PATTERN + r"/album/(\d+)"
-    test = (
-        ("https://www.pornhub.com/album/19289801", {
-            "pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/",
-            "count": ">= 300",
-            "keyword": {
-                "id"     : int,
-                "num"    : int,
-                "score"  : int,
-                "views"  : int,
-                "caption": str,
-                "user"   : "Danika Mori",
-                "gallery": {
-                    "id"   : 19289801,
-                    "score": int,
-                    "views": int,
-                    "tags" : list,
-                    "title": "Danika Mori Best Moments",
-                },
-            },
-        }),
-        ("https://www.pornhub.com/album/69040172", {
-            "exception": exception.AuthorizationError,
-        }),
-    )
+    example = "https://www.pornhub.com/album/12345"
 
     def __init__(self, match):
         PornhubExtractor.__init__(self, match)
@@ -58,9 +64,6 @@ class PornhubGalleryExtractor(PornhubExtractor):
         self._first = None
 
     def items(self):
-        self.session.cookies.set(
-            "accessAgeDisclaimerPH", "1", domain=".pornhub.com")
-
         data = self.metadata()
         yield Message.Directory, data
         for num, image in enumerate(self.images(), 1):
@@ -111,57 +114,100 @@ class PornhubGalleryExtractor(PornhubExtractor):
                 "views"  : text.parse_int(img["times_viewed"]),
                 "score"  : text.parse_int(img["vote_percent"]),
             }
-            key = img["next"]
+            key = str(img["next"])
             if key == end:
                 return
 
 
+class PornhubGifExtractor(PornhubExtractor):
+    """Extractor for pornhub.com gifs"""
+    subcategory = "gif"
+    directory_fmt = ("{category}", "{user}", "gifs")
+    filename_fmt = "{id} {title}.{extension}"
+    archive_fmt = "{id}"
+    pattern = BASE_PATTERN + r"/gif/(\d+)"
+    example = "https://www.pornhub.com/gif/12345"
+
+    def __init__(self, match):
+        PornhubExtractor.__init__(self, match)
+        self.gallery_id = match.group(1)
+
+    def items(self):
+        url = "{}/gif/{}".format(self.root, self.gallery_id)
+        extr = text.extract_from(self.request(url).text)
+
+        gif = {
+            "id"   : self.gallery_id,
+            "tags" : extr("data-context-tag='", "'").split(","),
+            "title": extr('"name": "', '"'),
+            "url"  : extr('"contentUrl": "', '"'),
+            "date" : text.parse_datetime(
+                extr('"uploadDate": "', '"'), "%Y-%m-%d"),
+            "user" : extr('data-mxptext="', '"'),
+        }
+
+        yield Message.Directory, gif
+        yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
+
+
 class PornhubUserExtractor(PornhubExtractor):
-    """Extractor for all galleries of a pornhub user"""
+    """Extractor for a pornhub user"""
     subcategory = "user"
-    pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)"
-               "(?:/photos(?:/(public|private|favorites))?)?/?$")
-    test = (
-        ("https://www.pornhub.com/pornstar/danika-mori/photos", {
-            "pattern": PornhubGalleryExtractor.pattern,
-            "count": ">= 6",
-        }),
-        ("https://www.pornhub.com/users/flyings0l0/"),
-        ("https://www.pornhub.com/users/flyings0l0/photos/public"),
-        ("https://www.pornhub.com/users/flyings0l0/photos/private"),
-        ("https://www.pornhub.com/users/flyings0l0/photos/favorites"),
-        ("https://www.pornhub.com/model/bossgirl/photos"),
-    )
+    pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
+    example = "https://www.pornhub.com/model/USER"
 
     def __init__(self, match):
         PornhubExtractor.__init__(self, match)
-        self.type, self.user, self.cat = match.groups()
+        self.user = match.group(1)
+
+    def initialize(self):
+        pass
 
     def items(self):
-        url = "{}/{}/{}/photos/{}/ajax".format(
-            self.root, self.type, self.user, self.cat or "public")
-        params = {"page": 1}
-        headers = {
-            "Referer": url[:-5],
-            "X-Requested-With": "XMLHttpRequest",
-        }
+        base = "{}/{}/".format(self.root, self.user)
+        return self._dispatch_extractors((
+            (PornhubPhotosExtractor, base + "photos"),
+            (PornhubGifsExtractor  , base + "gifs"),
+        ), ("photos",))
 
-        data = {"_extractor": PornhubGalleryExtractor}
-        while True:
-            response = self.request(
-                url, method="POST", headers=headers, params=params,
-                allow_redirects=False)
 
-            if 300 <= response.status_code < 400:
-                url = "{}{}/photos/{}/ajax".format(
-                    self.root, response.headers["location"],
-                    self.cat or "public")
-                continue
+class PornhubPhotosExtractor(PornhubExtractor):
+    """Extractor for all galleries of a pornhub user"""
+    subcategory = "photos"
+    pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+               "/(photos(?:/[^/?#]+)?)")
+    example = "https://www.pornhub.com/model/USER/photos"
+
+    def __init__(self, match):
+        PornhubExtractor.__init__(self, match)
+        self.user, self.path = match.groups()
 
+    def items(self):
+        data = {"_extractor": PornhubGalleryExtractor}
+        for page in self._pagination(self.user, self.path):
             gid = None
-            for gid in text.extract_iter(response.text, 'id="albumphoto', '"'):
+            for gid in text.extract_iter(page, 'id="albumphoto', '"'):
                 yield Message.Queue, self.root + "/album/" + gid, data
             if gid is None:
                 return
 
-            params["page"] += 1
+
+class PornhubGifsExtractor(PornhubExtractor):
+    """Extractor for a pornhub user's gifs"""
+    subcategory = "gifs"
+    pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+               "/(gifs(?:/[^/?#]+)?)")
+    example = "https://www.pornhub.com/model/USER/gifs"
+
+    def __init__(self, match):
+        PornhubExtractor.__init__(self, match)
+        self.user, self.path = match.groups()
+
+    def items(self):
+        data = {"_extractor": PornhubGifExtractor}
+        for page in self._pagination(self.user, self.path):
+            gid = None
+            for gid in text.extract_iter(page, 'id="gif', '"'):
+                yield Message.Queue, self.root + "/gif/" + gid, data
+            if gid is None:
+                return