New upstream version 1.17.2.upstream/1.17.2

author: Unit 193 <unit193@unit193.net> 2021-04-13 19:33:47 -0400
committer: Unit 193 <unit193@unit193.net> 2021-04-13 19:33:47 -0400
commit: d27dcd4646242d6da8436f14c7b37ce864355858 (patch)
tree: c5c86ca7435010b6b13933217a1921430cf95dc4 /gallery_dl/extractor/exhentai.py
parent: 3201d77a148367d739862b4f07868a76eaeb7cb1 (diff)
1 files changed, 34 insertions, 44 deletions
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 5a7de23..872a338 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -43,16 +43,8 @@ class ExhentaiExtractor(Extractor):
         self.cookiedomain = "." + domain
 
         Extractor.__init__(self, match)
-        self.limits = self.config("limits", True)
         self.original = self.config("original", True)
 
-        if type(self.limits) is int:
-            self._limit_max = self.limits
-            self.limits = True
-        else:
-            self._limit_max = 0
-
-        self._remaining = 0
         self.session.headers["Referer"] = self.root + "/"
         if version != "ex":
             self.session.cookies.set("nw", "1", domain=self.cookiedomain)
@@ -77,7 +69,6 @@ class ExhentaiExtractor(Extractor):
             self.log.info("no username given; using e-hentai.org")
             self.root = "https://e-hentai.org"
             self.original = False
-            self.limits = False
             self.session.cookies["nw"] = "1"
 
     @cache(maxage=90*24*3600, keyarg=1)
@@ -206,8 +197,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             (self.image_from_page(ipage),), self.images_from_api())
         for url, image in images:
             data.update(image)
-            if self.limits:
-                self._check_limits(data)
             if "/fullimg.php" in url:
                 data["extension"] = ""
             yield Message.Url, url, data
@@ -246,6 +235,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             "torrentcount" : extr('>Torrent Download (', ')'),
         }
 
+        f = data["favorites"][0]
+        if f == "N":
+            data["favorites"] = "0"
+        elif f == "O":
+            data["favorites"] = "1"
+
         data["lang"] = util.language_to_code(data["language"])
         data["tags"] = [
             text.unquote(tag.replace("+", " "))
@@ -293,6 +288,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
         data["image_token"] = self.key["start"] = extr('var startkey="', '";')
         self.key["show"] = extr('var showkey="', '";')
 
+        if iurl.endswith("g/509.gif"):
+            self._report_limits(data)
         return url, text.nameext_from_url(iurl, data)
 
     def images_from_api(self):
@@ -327,10 +324,20 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
 
             data["num"] = request["page"]
             data["image_token"] = imgkey
+
+            if imgurl.endswith("g/509.gif"):
+                self._report_limits(data)
             yield url, text.nameext_from_url(imgurl, data)
 
             request["imgkey"] = nextkey
 
+    def _report_limits(self, data):
+        ExhentaiExtractor.LIMIT = True
+        raise exception.StopExtraction(
+            "Image limit reached! "
+            "Continue with '%s/s/%s/%s-%s' as URL after resetting it.",
+            self.root, data["image_token"], self.gallery_id, data["num"])
+
     def _gallery_page(self):
         url = "{}/g/{}/{}/".format(
             self.root, self.gallery_id, self.gallery_token)
@@ -354,35 +361,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             raise exception.NotFoundError("image page")
         return page
 
-    def _check_limits(self, data):
-        if not self._remaining or data["num"] % 25 == 0:
-            self._update_limits()
-        self._remaining -= data["cost"]
-
-        if self._remaining <= 0:
-            ExhentaiExtractor.LIMIT = True
-            url = "{}/s/{}/{}-{}".format(
-                self.root, data["image_token"], self.gallery_id, data["num"])
-            raise exception.StopExtraction(
-                "Image limit reached! Continue with '%s' "
-                "as URL after resetting it.", url)
-
-    def _update_limits(self):
-        url = "https://e-hentai.org/home.php"
-        cookies = {
-            cookie.name: cookie.value
-            for cookie in self.session.cookies
-            if cookie.domain == self.cookiedomain and cookie.name != "igneous"
-        }
-
-        page = self.request(url, cookies=cookies).text
-        current, pos = text.extract(page, "<strong>", "</strong>")
-        maximum, pos = text.extract(page, "<strong>", "</strong>", pos)
-        if self._limit_max:
-            maximum = self._limit_max
-        self.log.debug("Image Limits: %s/%s", current, maximum)
-        self._remaining = text.parse_int(maximum) - text.parse_int(current)
-
     @staticmethod
     def _parse_image_info(url):
         for part in url.split("/")[4:]:
@@ -418,9 +396,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
 class ExhentaiSearchExtractor(ExhentaiExtractor):
     """Extractor for exhentai search results"""
     subcategory = "search"
-    pattern = BASE_PATTERN + r"/?\?(.*)$"
+    pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))"
     test = (
         ("https://e-hentai.org/?f_search=touhou"),
+        ("https://exhentai.org/?f_cats=767&f_search=touhou"),
+        ("https://exhentai.org/tag/parody:touhou+project"),
         (("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
           "&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
           "&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
@@ -432,10 +412,20 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
 
     def __init__(self, match):
         ExhentaiExtractor.__init__(self, match)
-        self.params = text.parse_query(match.group(2))
-        self.params["page"] = text.parse_int(self.params.get("page"))
         self.search_url = self.root
 
+        _, query, tag = match.groups()
+        if tag:
+            if "+" in tag:
+                ns, _, tag = tag.rpartition(":")
+                tag = '{}:"{}$"'.format(ns, tag.replace("+", " "))
+            else:
+                tag += "$"
+            self.params = {"f_search": tag, "page": 0}
+        else:
+            self.params = text.parse_query(query)
+            self.params["page"] = text.parse_int(self.params.get("page"))
+
     def items(self):
         self.login()
         data = {"_extractor": ExhentaiGalleryExtractor}
@@ -459,7 +449,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
 class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
     """Extractor for favorited exhentai galleries"""
     subcategory = "favorite"
-    pattern = BASE_PATTERN + r"/favorites\.php(?:\?(.*))?"
+    pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?"
     test = (
         ("https://e-hentai.org/favorites.php", {
             "count": 1,
author	Unit 193 <unit193@unit193.net>	2021-04-13 19:33:47 -0400
committer	Unit 193 <unit193@unit193.net>	2021-04-13 19:33:47 -0400
commit	d27dcd4646242d6da8436f14c7b37ce864355858 (patch)
tree	c5c86ca7435010b6b13933217a1921430cf95dc4 /gallery_dl/extractor/exhentai.py
parent	3201d77a148367d739862b4f07868a76eaeb7cb1 (diff)