Update upstream source from tag 'upstream/1.29.3'

Update to upstream version '1.29.3' with Debian dir 131b9b3bdbc67af5fe84f139a5b499a550f7c22b
author: Unit 193 <unit193@unit193.net> 2025-03-29 07:20:04 -0400
committer: Unit 193 <unit193@unit193.net> 2025-03-29 07:20:04 -0400
commit: 5ea6cce4fb40d2cc4f1d7849e44e6825ac2f3a73 (patch)
tree: 2d7040d732323306b2227682068ed5c9e12d4bf0 /gallery_dl/extractor/imhentai.py
parent: 68863e88e0e0d8c08a8631831c05c302527627b1 (diff)
parent: 662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (diff)
1 files changed, 36 insertions, 14 deletions
diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py
index 0439f5b..1b0fba3 100644
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -22,10 +22,15 @@ class ImhentaiExtractor(BaseExtractor):
 
         while True:
             page = self.request(url).text
+
+            pos = page.find('class="ranking_list"')
+            if pos >= 0:
+                page = page[:pos]
+
             extr = text.extract_from(page)
 
             while True:
-                gallery_id = extr('<a href="/gallery/', '"')
+                gallery_id = extr('href="/gallery/', '"')
                 if gallery_id == prev:
                     continue
                 if not gallery_id:
@@ -57,6 +62,18 @@ BASE_PATTERN = ImhentaiExtractor.update({
         "root": "https://hentairox.com",
         "pattern": r"(?:www\.)?hentairox\.com",
     },
+    "hentaifox": {
+        "root": "https://hentaifox.com",
+        "pattern": r"(?:www\.)?hentaifox\.com",
+    },
+    "hentaienvy": {
+        "root": "https://hentaienvy.com",
+        "pattern": r"(?:www\.)?hentaienvy\.com",
+    },
+    "hentaizap": {
+        "root": "https://hentaizap.com",
+        "pattern": r"(?:www\.)?hentaizap\.com",
+    },
 })
 
 
@@ -72,17 +89,20 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
 
     def metadata(self, page):
         extr = text.extract_from(page)
+        title = extr("<h1>", "<")
+        title_alt = extr('class="subtitle">', "<")
+        end = "</li>" if extr('<ul class="galleries_info', ">") else "</ul>"
 
         data = {
             "gallery_id": text.parse_int(self.gallery_id),
-            "title"     : text.unescape(extr("<h1>", "<")),
-            "title_alt" : text.unescape(extr('class="subtitle">', "<")),
-            "parody"    : self._split(extr(">Parodies", "</li>")),
-            "character" : self._split(extr(">Characters", "</li>")),
-            "tags"      : self._split(extr(">Tags", "</li>")),
-            "artist"    : self._split(extr(">Artists", "</li>")),
-            "group"     : self._split(extr(">Groups", "</li>")),
-            "language"  : self._split(extr(">Languages", "</li>")),
+            "title"     : text.unescape(title),
+            "title_alt" : text.unescape(title_alt),
+            "parody"    : self._split(extr(">Parodies", end)),
+            "character" : self._split(extr(">Characters", end)),
+            "tags"      : self._split(extr(">Tags", end)),
+            "artist"    : self._split(extr(">Artists", end)),
+            "group"     : self._split(extr(">Groups", end)),
+            "language"  : self._split(extr(">Languages", end)),
             "type"      : extr("href='/category/", "/"),
         }
 
@@ -94,10 +114,12 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
     def _split(self, html):
         results = []
         for tag in text.extract_iter(html, ">", "</a>"):
-            tag = tag.partition(" <span class='badge'>")[0]
-            if "<" in tag:
-                tag = text.remove_html(tag)
+            badge = ("badge'>" in tag or "class='badge" in tag)
+            tag = text.remove_html(tag)
+            if badge:
+                tag = tag.rpartition(" ")[0]
             results.append(tag)
+        results.sort()
         return results
 
     def images(self, page):
@@ -132,9 +154,9 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
 class ImhentaiSearchExtractor(ImhentaiExtractor):
     """Extractor for imhentai search results"""
     subcategory = "search"
-    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+    pattern = BASE_PATTERN + r"/search(/?\?[^#]+|/[^/?#]+/?)"
     example = "https://imhentai.xxx/search/?key=QUERY"
 
     def items(self):
-        url = self.root + "/search/?" + self.groups[-1]
+        url = self.root + "/search" + self.groups[-1]
         return self._pagination(url)
author	Unit 193 <unit193@unit193.net>	2025-03-29 07:20:04 -0400
committer	Unit 193 <unit193@unit193.net>	2025-03-29 07:20:04 -0400
commit	5ea6cce4fb40d2cc4f1d7849e44e6825ac2f3a73 (patch)
tree	2d7040d732323306b2227682068ed5c9e12d4bf0 /gallery_dl/extractor/imhentai.py
parent	68863e88e0e0d8c08a8631831c05c302527627b1 (diff)
parent	662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (diff)