aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/imhentai.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-03-29 07:20:04 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-03-29 07:20:04 -0400
commit5ea6cce4fb40d2cc4f1d7849e44e6825ac2f3a73 (patch)
tree2d7040d732323306b2227682068ed5c9e12d4bf0 /gallery_dl/extractor/imhentai.py
parent68863e88e0e0d8c08a8631831c05c302527627b1 (diff)
parent662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (diff)
Update upstream source from tag 'upstream/1.29.3'
Update to upstream version '1.29.3' with Debian dir 131b9b3bdbc67af5fe84f139a5b499a550f7c22b
Diffstat (limited to 'gallery_dl/extractor/imhentai.py')
-rw-r--r--gallery_dl/extractor/imhentai.py50
1 files changed, 36 insertions, 14 deletions
diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py
index 0439f5b..1b0fba3 100644
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -22,10 +22,15 @@ class ImhentaiExtractor(BaseExtractor):
while True:
page = self.request(url).text
+
+ pos = page.find('class="ranking_list"')
+ if pos >= 0:
+ page = page[:pos]
+
extr = text.extract_from(page)
while True:
- gallery_id = extr('<a href="/gallery/', '"')
+ gallery_id = extr('href="/gallery/', '"')
if gallery_id == prev:
continue
if not gallery_id:
@@ -57,6 +62,18 @@ BASE_PATTERN = ImhentaiExtractor.update({
"root": "https://hentairox.com",
"pattern": r"(?:www\.)?hentairox\.com",
},
+ "hentaifox": {
+ "root": "https://hentaifox.com",
+ "pattern": r"(?:www\.)?hentaifox\.com",
+ },
+ "hentaienvy": {
+ "root": "https://hentaienvy.com",
+ "pattern": r"(?:www\.)?hentaienvy\.com",
+ },
+ "hentaizap": {
+ "root": "https://hentaizap.com",
+ "pattern": r"(?:www\.)?hentaizap\.com",
+ },
})
@@ -72,17 +89,20 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
+ title = extr("<h1>", "<")
+ title_alt = extr('class="subtitle">', "<")
+ end = "</li>" if extr('<ul class="galleries_info', ">") else "</ul>"
data = {
"gallery_id": text.parse_int(self.gallery_id),
- "title" : text.unescape(extr("<h1>", "<")),
- "title_alt" : text.unescape(extr('class="subtitle">', "<")),
- "parody" : self._split(extr(">Parodies", "</li>")),
- "character" : self._split(extr(">Characters", "</li>")),
- "tags" : self._split(extr(">Tags", "</li>")),
- "artist" : self._split(extr(">Artists", "</li>")),
- "group" : self._split(extr(">Groups", "</li>")),
- "language" : self._split(extr(">Languages", "</li>")),
+ "title" : text.unescape(title),
+ "title_alt" : text.unescape(title_alt),
+ "parody" : self._split(extr(">Parodies", end)),
+ "character" : self._split(extr(">Characters", end)),
+ "tags" : self._split(extr(">Tags", end)),
+ "artist" : self._split(extr(">Artists", end)),
+ "group" : self._split(extr(">Groups", end)),
+ "language" : self._split(extr(">Languages", end)),
"type" : extr("href='/category/", "/"),
}
@@ -94,10 +114,12 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
def _split(self, html):
results = []
for tag in text.extract_iter(html, ">", "</a>"):
- tag = tag.partition(" <span class='badge'>")[0]
- if "<" in tag:
- tag = text.remove_html(tag)
+ badge = ("badge'>" in tag or "class='badge" in tag)
+ tag = text.remove_html(tag)
+ if badge:
+ tag = tag.rpartition(" ")[0]
results.append(tag)
+ results.sort()
return results
def images(self, page):
@@ -132,9 +154,9 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
class ImhentaiSearchExtractor(ImhentaiExtractor):
"""Extractor for imhentai search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+ pattern = BASE_PATTERN + r"/search(/?\?[^#]+|/[^/?#]+/?)"
example = "https://imhentai.xxx/search/?key=QUERY"
def items(self):
- url = self.root + "/search/?" + self.groups[-1]
+ url = self.root + "/search" + self.groups[-1]
return self._pagination(url)