gallery_dl/extractor/xhamster.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

# -*- coding: utf-8 -*-

# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://xhamster.com/"""

from .common import Extractor, Message
from .. import text, util

BASE_PATTERN = (r"(?:https?://)?((?:[\w-]+\.)?xhamster"
                r"(?:\d?\.(?:com|one|desi)|\.porncache\.net))")


class XhamsterExtractor(Extractor):
    """Base class for xhamster extractors"""
    category = "xhamster"

    def __init__(self, match):
        self.root = "https://" + match[1]
        Extractor.__init__(self, match)


class XhamsterGalleryExtractor(XhamsterExtractor):
    """Extractor for image galleries on xhamster.com"""
    subcategory = "gallery"
    directory_fmt = ("{category}", "{user[name]}",
                     "{gallery[id]} {gallery[title]}")
    filename_fmt = "{num:>03}_{id}.{extension}"
    archive_fmt = "{id}"
    pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)"
    example = "https://xhamster.com/photos/gallery/12345"

    def items(self):
        data = self.metadata()
        yield Message.Directory, "", data
        for num, image in enumerate(self.images(), 1):
            url = image["imageURL"]
            image.update(data)
            text.nameext_from_url(url, image)
            image["num"] = num
            image["extension"] = "webp"
            del image["modelName"]
            yield Message.Url, url, image

    def metadata(self):
        data = self.data = self._extract_data(self.root + self.groups[1])

        gallery = data["galleryPage"]
        info = gallery["infoProps"]
        model = gallery["galleryModel"]
        author = info["authorInfoProps"]

        return {
            "user":
            {
                "id"         : text.parse_int(model["userId"]),
                "url"        : author["authorLink"],
                "name"       : author["authorName"],
                "verified"   : True if author.get("verified") else False,
                "subscribers": info["subscribeButtonProps"]["subscribers"],
            },
            "gallery":
            {
                "id"         : text.parse_int(gallery["id"]),
                "tags"       : [t["label"] for t in info["categoriesTags"]],
                "date"       : self.parse_timestamp(model["created"]),
                "views"      : text.parse_int(model["views"]),
                "likes"      : text.parse_int(model["rating"]["likes"]),
                "dislikes"   : text.parse_int(model["rating"]["dislikes"]),
                "title"      : model["title"],
                "description": model["description"],
                "thumbnail"  : model["thumbURL"],
            },
            "count": text.parse_int(gallery["photosCount"]),
        }

    def images(self):
        data = self.data
        self.data = None

        while True:
            yield from data["photosGalleryModel"]["photos"]

            pagination = data["galleryPage"]["paginationProps"]
            if pagination["currentPageNumber"] >= pagination["lastPageNumber"]:
                return
            url = (pagination["pageLinkTemplate"][:-3] +
                   str(pagination["currentPageNumber"] + 1))

            data = self._extract_data(url)

    def _extract_data(self, url):
        page = self.request(url).text
        return util.json_loads(text.extr(
            page, "window.initials=", "</script>").rstrip("\n\r;"))


class XhamsterUserExtractor(XhamsterExtractor):
    """Extractor for all galleries of an xhamster user"""
    subcategory = "user"
    pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
    example = "https://xhamster.com/users/USER/photos"

    def items(self):
        url = f"{self.root}/users/{self.groups[1]}/photos"
        data = {"_extractor": XhamsterGalleryExtractor}

        while url:
            extr = text.extract_from(self.request(url).text)
            while True:
                url = extr('thumb-image-container role-pop" href="', '"')
                if not url:
                    break
                yield Message.Queue, url, data
            url = extr('data-page="next" href="', '"')