diff options
Diffstat (limited to 'gallery_dl/extractor/ahottie.py')
| -rw-r--r-- | gallery_dl/extractor/ahottie.py | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/gallery_dl/extractor/ahottie.py b/gallery_dl/extractor/ahottie.py new file mode 100644 index 0000000..f8db0d4 --- /dev/null +++ b/gallery_dl/extractor/ahottie.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://ahottie.top/""" + +from .common import Extractor, GalleryExtractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?ahottie\.top" + + +class AhottieExtractor(Extractor): + """Base class for ahottie extractors""" + category = "ahottie" + root = "https://ahottie.top" + + def items(self): + for album in self.albums(): + yield Message.Queue, album["url"], album + + def _pagination(self, url, params): + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + + for album in text.extract_iter( + page, '<div class="relative">', '</div>'): + yield { + "url" : text.extr(album, ' href="', '"'), + "title": text.unquote(text.extr( + album, ' alt="', '"')), + "date" : self.parse_datetime_iso(text.extr( + album, ' datetime="', '"')), + "_extractor": AhottieGalleryExtractor, + } + + if 'rel="next"' not in page: + break + params["page"] += 1 + + +class AhottieGalleryExtractor(GalleryExtractor, AhottieExtractor): + directory_fmt = ("{category}", "{date:%Y-%m-%d} {title} ({gallery_id})") + filename_fmt = "{num:>03}.{extension}" + archive_fmt = "{gallery_id}_{num}_{filename}" + pattern = BASE_PATTERN + r"(/albums/(\w+))" + example = "https://ahottie.top/albums/1234567890" + + def metadata(self, page): + extr = text.extract_from(page) + return { + "gallery_id": self.groups[1], + "title": text.unescape(extr("<title>", "<").rpartition(" | ")[0]), + "date" : self.parse_datetime_iso(extr('datetime="', '"')), + "tags" : text.split_html(extr('<i ', '</div>'))[1:], + } + + def images(self, page): + pos = page.find("<time ") + 1 + data = { + "_http_headers" : {"Referer": None}, + "_http_validate": self._validate, + } + return [ + (url, data) + for url in text.extract_iter(page, '" src="', '"', pos) + ] + + def _validate(self, response): + hget = response.headers.get + return not ( + hget("content-length") == "2421" and + hget("content-type") == "image/jpeg" + ) + + +class AhottieTagExtractor(AhottieExtractor): + subcategory = "tag" + pattern = BASE_PATTERN + r"/tags/([^/?#]+)" + example = "https://ahottie.top/tags/TAG" + + def albums(self): + tag = self.groups[0] + self.kwdict["search_tags"] = text.unquote(tag) + return self._pagination(f"{self.root}/tags/{tag}", {}) + + +class AhottieSearchExtractor(AhottieExtractor): + subcategory = "search" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" + example = "https://ahottie.top/search?kw=QUERY" + + def albums(self): + params = text.parse_query(self.groups[0]) + self.kwdict["search_tags"] = params.get("kw") + return self._pagination(f"{self.root}/search", params) |
