summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/exhentai.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/exhentai.py')
-rw-r--r--gallery_dl/extractor/exhentai.py35
1 files changed, 26 insertions, 9 deletions
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index e37e81b..a546f68 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -185,7 +185,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if self.gallery_token:
gpage = self._gallery_page()
- self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0]
+ self.image_token = text.extr(gpage, 'hentai.org/s/', '"')
if not self.image_token:
self.log.error("Failed to extract initial image token")
self.log.debug("Page content:\n%s", gpage)
@@ -193,7 +193,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
ipage = self._image_page()
else:
ipage = self._image_page()
- part = text.extract(ipage, 'hentai.org/g/', '"')[0]
+ part = text.extr(ipage, 'hentai.org/g/', '"')
if not part:
self.log.error("Failed to extract gallery token")
self.log.debug("Page content:\n%s", ipage)
@@ -271,8 +271,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
}
if data["uploader"].startswith("<"):
- data["uploader"] = text.unescape(text.extract(
- data["uploader"], ">", "<")[0])
+ data["uploader"] = text.unescape(text.extr(
+ data["uploader"], ">", "<"))
f = data["favorites"][0]
if f == "N":
@@ -400,7 +400,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
}
page = self.request(url, cookies=cookies).text
- current = text.extract(page, "<strong>", "</strong>")[0]
+ current = text.extr(page, "<strong>", "</strong>")
self.log.debug("Image Limits: %s/%s", current, self.limits)
self._remaining = self.limits - text.parse_int(current)
@@ -473,6 +473,10 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
"pattern": ExhentaiGalleryExtractor.pattern,
"range": "1-30",
"count": 30,
+ "keyword": {
+ "gallery_id": int,
+ "gallery_token": r"re:^[0-9a-f]{10}$"
+ },
}),
)
@@ -490,26 +494,39 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
self.params = {"f_search": tag, "page": 0}
else:
self.params = text.parse_query(query)
- self.params["page"] = text.parse_int(self.params.get("page"))
+ if "next" not in self.params:
+ self.params["page"] = text.parse_int(self.params.get("page"))
def items(self):
self.login()
data = {"_extractor": ExhentaiGalleryExtractor}
+ search_url = self.search_url
+ params = self.params
while True:
last = None
- page = self.request(self.search_url, params=self.params).text
+ page = self.request(search_url, params=params).text
for gallery in ExhentaiGalleryExtractor.pattern.finditer(page):
url = gallery.group(0)
if url == last:
continue
last = url
+ data["gallery_id"] = text.parse_int(gallery.group(2))
+ data["gallery_token"] = gallery.group(3)
yield Message.Queue, url + "/", data
- if 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page:
+ next_url = text.extr(page, 'nexturl = "', '"', None)
+ if next_url is not None:
+ if not next_url:
+ return
+ search_url = next_url
+ params = None
+
+ elif 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page:
return
- self.params["page"] += 1
+ else:
+ params["page"] += 1
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):