diff options
| author | 2022-09-22 19:43:53 -0400 | |
|---|---|---|
| committer | 2022-09-22 19:43:53 -0400 | |
| commit | e6b82556343116256be047ab7099bedd9063f66a (patch) | |
| tree | 884c0435863d130ec967163b82a2638ff1bd9505 /gallery_dl/extractor/hotleak.py | |
| parent | a768930761f7f20587ae40a8cacca0e55c85290a (diff) | |
New upstream version 1.23.1.upstream/1.23.1
Diffstat (limited to 'gallery_dl/extractor/hotleak.py')
| -rw-r--r-- | gallery_dl/extractor/hotleak.py | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py new file mode 100644 index 0000000..d6575cf --- /dev/null +++ b/gallery_dl/extractor/hotleak.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://hotleak.vip/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip" + + +class HotleakExtractor(Extractor): + """Base class for hotleak extractors""" + category = "hotleak" + directory_fmt = ("{category}", "{creator}",) + filename_fmt = "{creator}_{id}.{extension}" + archive_fmt = "{type}_{creator}_{id}" + root = "https://hotleak.vip" + + def __init__(self, match): + Extractor.__init__(self, match) + self.session.headers["Referer"] = self.root + + def items(self): + for post in self.posts(): + yield Message.Directory, post + yield Message.Url, post["url"], post + + def posts(self): + """Return an iterable containing relevant posts""" + return () + + def _pagination(self, url, params): + params = text.parse_query(params) + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + if "</article>" not in page: + return + + for item in text.extract_iter( + page, '<article class="movie-item', '</article>'): + yield text.extract(item, '<a href="', '"')[0] + + params["page"] += 1 + + +class HotleakPostExtractor(HotleakExtractor): + """Extractor for individual posts on hotleak""" + subcategory = "post" + pattern = (BASE_PATTERN + r"/(?!hot|creators|videos|photos)" + r"([^/]+)/(photo|video)/(\d+)") + test = ( + ("https://hotleak.vip/kaiyakawaii/photo/1617145", { + "pattern": r"https://hotleak\.vip/storage/images/3625" + r"/1617145/fefdd5988dfcf6b98cc9e11616018868\.jpg", + "keyword": { + "id": 1617145, + "creator": "kaiyakawaii", + "type": "photo", + "filename": "fefdd5988dfcf6b98cc9e11616018868", + "extension": "jpg", + }, + }), + ("https://hotleak.vip/lilmochidoll/video/1625538", { + "pattern": r"ytdl:https://cdn8-leak\.camhdxx\.com" + r"/1661/1625538/index\.m3u8", + "keyword": { + "id": 1625538, + "creator": "lilmochidoll", + "type": "video", + "filename": "index", + "extension": "mp4", + }, + }), + ) + + def __init__(self, match): + HotleakExtractor.__init__(self, match) + self.creator, self.type, self.id = match.groups() + + def posts(self): + url = "{}/{}/{}/{}".format( + self.root, self.creator, self.type, self.id) + page = self.request(url).text + page = text.extract( + page, '<div class="movie-image thumb">', '</article>')[0] + data = { + "id" : text.parse_int(self.id), + "creator": self.creator, + "type" : self.type, + } + + if self.type == "photo": + data["url"] = text.extract(page, 'data-src="', '"')[0] + text.nameext_from_url(data["url"], data) + + elif self.type == "video": + data["url"] = "ytdl:" + text.extract( + text.unescape(page), '"src":"', '"')[0] + text.nameext_from_url(data["url"], data) + data["extension"] = "mp4" + + return (data,) + + +class HotleakCreatorExtractor(HotleakExtractor): + """Extractor for all posts from a hotleak creator""" + subcategory = "creator" + pattern = BASE_PATTERN + r"/(?!hot|creators|videos|photos)([^/?#]+)/?$" + test = ( + ("https://hotleak.vip/kaiyakawaii", { + "range": "1-200", + "count": 200, + }), + ("https://hotleak.vip/stellaviolet", { + "count": "> 600" + }), + ("https://hotleak.vip/doesnotexist", { + "exception": exception.NotFoundError, + }), + ) + + def __init__(self, match): + HotleakExtractor.__init__(self, match) + self.creator = match.group(1) + + def posts(self): + url = "{}/{}".format(self.root, self.creator) + return self._pagination(url) + + def _pagination(self, url): + headers = {"X-Requested-With": "XMLHttpRequest"} + params = {"page": 1} + + while True: + try: + response = self.request( + url, headers=headers, params=params, notfound="creator") + except exception.HttpError as exc: + if exc.response.status_code == 429: + self.wait( + until=exc.response.headers.get("X-RateLimit-Reset")) + continue + + posts = response.json() + if not posts: + return + + data = {"creator": self.creator} + for post in posts: + data["id"] = text.parse_int(post["id"]) + + if post["type"] == 0: + data["type"] = "photo" + data["url"] = self.root + "/storage/" + post["image"] + text.nameext_from_url(data["url"], data) + + elif post["type"] == 1: + data["type"] = "video" + data["url"] = "ytdl:" + post["stream_url_play"] + text.nameext_from_url(data["url"], data) + data["extension"] = "mp4" + + yield data + params["page"] += 1 + + +class HotleakCategoryExtractor(HotleakExtractor): + """Extractor for hotleak categories""" + subcategory = "category" + pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?" + test = ( + ("https://hotleak.vip/photos", { + "pattern": HotleakPostExtractor.pattern, + "range": "1-50", + "count": 50, + }), + ("https://hotleak.vip/videos"), + ("https://hotleak.vip/creators", { + "pattern": HotleakCreatorExtractor.pattern, + "range": "1-50", + "count": 50, + }), + ("https://hotleak.vip/hot"), + ) + + def __init__(self, match): + HotleakExtractor.__init__(self, match) + self._category, self.params = match.groups() + + def items(self): + url = "{}/{}".format(self.root, self._category) + + if self._category in ("hot", "creators"): + data = {"_extractor": HotleakCreatorExtractor} + elif self._category in ("videos", "photos"): + data = {"_extractor": HotleakPostExtractor} + + for item in self._pagination(url, self.params): + yield Message.Queue, item, data + + +class HotleakSearchExtractor(HotleakExtractor): + """Extractor for hotleak search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))" + test = ( + ("https://hotleak.vip/search?search=gallery-dl", { + "count": 0, + }), + ("https://hotleak.vip/search?search=hannah", { + "count": "> 30", + }), + ) + + def __init__(self, match): + HotleakExtractor.__init__(self, match) + self.params = match.group(1) + + def items(self): + data = {"_extractor": HotleakCreatorExtractor} + for creator in self._pagination(self.root + "/search", self.params): + yield Message.Queue, creator, data |
