# -*- coding: utf-8 -*- # Copyright 2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://imhentai.xxx/ and mirror sites""" from .common import GalleryExtractor, BaseExtractor, Message from .. import text, util class ImhentaiExtractor(BaseExtractor): basecategory = "IMHentai" def _pagination(self, url): prev = None base = self.root + "/gallery/" data = {"_extractor": ImhentaiGalleryExtractor} while True: page = self.request(url).text pos = page.find('class="ranking_list"') if pos >= 0: page = page[:pos] extr = text.extract_from(page) while True: gallery_id = extr('href="/gallery/', '"') if gallery_id == prev: continue if not gallery_id: break yield Message.Queue, base + gallery_id, data prev = gallery_id href = text.rextr(page, "class='page-link' href='", "'") if not href or href == "#": return if href[0] == "/": if href[1] == "/": href = "https:" + href else: href = self.root + href url = href BASE_PATTERN = ImhentaiExtractor.update({ "imhentai": { "root": "https://imhentai.xxx", "pattern": r"(?:www\.)?imhentai\.xxx", }, "hentaiera": { "root": "https://hentaiera.com", "pattern": r"(?:www\.)?hentaiera\.com", }, "hentairox": { "root": "https://hentairox.com", "pattern": r"(?:www\.)?hentairox\.com", }, "hentaifox": { "root": "https://hentaifox.com", "pattern": r"(?:www\.)?hentaifox\.com", }, "hentaienvy": { "root": "https://hentaienvy.com", "pattern": r"(?:www\.)?hentaienvy\.com", }, "hentaizap": { "root": "https://hentaizap.com", "pattern": r"(?:www\.)?hentaizap\.com", }, }) class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): """Extractor for imhentai galleries""" pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" example = "https://imhentai.xxx/gallery/12345/" def __init__(self, match): ImhentaiExtractor.__init__(self, match) self.gallery_id = self.groups[-1] self.page_url = f"{self.root}/gallery/{self.gallery_id}/" def metadata(self, page): extr = text.extract_from(page) title = extr("