diff options
Diffstat (limited to 'gallery_dl/extractor/exhentai.py')
| -rw-r--r-- | gallery_dl/extractor/exhentai.py | 35 |
1 files changed, 26 insertions, 9 deletions
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index e37e81b..a546f68 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -185,7 +185,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if self.gallery_token: gpage = self._gallery_page() - self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0] + self.image_token = text.extr(gpage, 'hentai.org/s/', '"') if not self.image_token: self.log.error("Failed to extract initial image token") self.log.debug("Page content:\n%s", gpage) @@ -193,7 +193,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): ipage = self._image_page() else: ipage = self._image_page() - part = text.extract(ipage, 'hentai.org/g/', '"')[0] + part = text.extr(ipage, 'hentai.org/g/', '"') if not part: self.log.error("Failed to extract gallery token") self.log.debug("Page content:\n%s", ipage) @@ -271,8 +271,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): } if data["uploader"].startswith("<"): - data["uploader"] = text.unescape(text.extract( - data["uploader"], ">", "<")[0]) + data["uploader"] = text.unescape(text.extr( + data["uploader"], ">", "<")) f = data["favorites"][0] if f == "N": @@ -400,7 +400,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): } page = self.request(url, cookies=cookies).text - current = text.extract(page, "<strong>", "</strong>")[0] + current = text.extr(page, "<strong>", "</strong>") self.log.debug("Image Limits: %s/%s", current, self.limits) self._remaining = self.limits - text.parse_int(current) @@ -473,6 +473,10 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): "pattern": ExhentaiGalleryExtractor.pattern, "range": "1-30", "count": 30, + "keyword": { + "gallery_id": int, + "gallery_token": r"re:^[0-9a-f]{10}$" + }, }), ) @@ -490,26 +494,39 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): self.params = {"f_search": tag, "page": 0} else: self.params = text.parse_query(query) - self.params["page"] = text.parse_int(self.params.get("page")) + if "next" not in self.params: + self.params["page"] = text.parse_int(self.params.get("page")) def items(self): self.login() data = {"_extractor": ExhentaiGalleryExtractor} + search_url = self.search_url + params = self.params while True: last = None - page = self.request(self.search_url, params=self.params).text + page = self.request(search_url, params=params).text for gallery in ExhentaiGalleryExtractor.pattern.finditer(page): url = gallery.group(0) if url == last: continue last = url + data["gallery_id"] = text.parse_int(gallery.group(2)) + data["gallery_token"] = gallery.group(3) yield Message.Queue, url + "/", data - if 'class="ptdd">><' in page or ">No hits found</p>" in page: + next_url = text.extr(page, 'nexturl = "', '"', None) + if next_url is not None: + if not next_url: + return + search_url = next_url + params = None + + elif 'class="ptdd">><' in page or ">No hits found</p>" in page: return - self.params["page"] += 1 + else: + params["page"] += 1 class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): |
