diff options
Diffstat (limited to 'gallery_dl/extractor/exhentai.py')
| -rw-r--r-- | gallery_dl/extractor/exhentai.py | 83 |
1 files changed, 56 insertions, 27 deletions
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 44bfe7d..182910c 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -27,6 +27,7 @@ class ExhentaiExtractor(Extractor): cookies_names = ("ipb_member_id", "ipb_pass_hash") root = "https://exhentai.org" request_interval = 5.0 + ciphers = "DEFAULT:!DH" LIMIT = False @@ -112,12 +113,15 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def __init__(self, match): ExhentaiExtractor.__init__(self, match) - self.key = {} - self.count = 0 self.gallery_id = text.parse_int(match.group(2) or match.group(5)) self.gallery_token = match.group(3) self.image_token = match.group(4) self.image_num = text.parse_int(match.group(6), 1) + self.key_start = None + self.key_show = None + self.key_next = None + self.api_url = "" + self.count = 0 def _init(self): source = self.config("source") @@ -145,17 +149,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): gpage = self._gallery_page() self.image_token = text.extr(gpage, 'hentai.org/s/', '"') if not self.image_token: - self.log.error("Failed to extract initial image token") self.log.debug("Page content:\n%s", gpage) - return + raise exception.StopExtraction( + "Failed to extract initial image token") ipage = self._image_page() else: ipage = self._image_page() part = text.extr(ipage, 'hentai.org/g/', '"') if not part: - self.log.error("Failed to extract gallery token") self.log.debug("Page content:\n%s", ipage) - return + raise exception.StopExtraction( + "Failed to extract gallery token") self.gallery_token = part.split("/")[1] gpage = self._gallery_page() @@ -176,7 +180,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data.update(image) if self.limits: self._check_limits(data) - if "/fullimg.php" in url: + if "/fullimg" in url: data["_http_validate"] = _validate_response else: data["_http_validate"] = None @@ -208,6 +212,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def metadata_from_page(self, page): extr = text.extract_from(page) + self.api_url = extr('var api_url = "', '"') or (self.root + "/api.php") + data = { "gid" : self.gallery_id, "token" : self.gallery_token, @@ -225,7 +231,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): '>Visible:</td><td class="gdt2">', '<'), "language" : extr('>Language:</td><td class="gdt2">', ' '), "filesize" : text.parse_bytes(extr( - '>File Size:</td><td class="gdt2">', '<').rstrip("Bb")), + '>File Size:</td><td class="gdt2">', '<').rstrip("Bbi")), "filecount" : extr('>Length:</td><td class="gdt2">', ' '), "favorites" : extr('id="favcount">', ' '), "rating" : extr(">Average: ", "<"), @@ -251,14 +257,13 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): return data def metadata_from_api(self): - url = self.root + "/api.php" data = { - "method": "gdata", - "gidlist": ((self.gallery_id, self.gallery_token),), + "method" : "gdata", + "gidlist" : ((self.gallery_id, self.gallery_token),), "namespace": 1, } - data = self.request(url, method="POST", json=data).json() + data = self.request(self.api_url, method="POST", json=data).json() if "error" in data: raise exception.StopExtraction(data["error"]) @@ -269,54 +274,70 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): pos = page.index('<div id="i3"><a onclick="return load_image(') + 26 extr = text.extract_from(page, pos) - self.key["next"] = extr("'", "'") + self.key_next = extr("'", "'") iurl = extr('<img id="img" src="', '"') - orig = extr('hentai.org/fullimg.php', '"') + nl = extr(" nl(", ")").strip("\"'") + orig = extr('hentai.org/fullimg', '"') try: if self.original and orig: - url = self.root + "/fullimg.php" + text.unescape(orig) + url = self.root + "/fullimg" + text.unescape(orig) data = self._parse_original_info(extr('ownload original', '<')) + data["_fallback"] = ("{}?nl={}".format(url, nl),) else: url = iurl data = self._parse_image_info(url) + data["_fallback"] = self._fallback( + None, self.image_num, nl) except IndexError: self.log.debug("Page content:\n%s", page) raise exception.StopExtraction( "Unable to parse image info for '%s'", url) data["num"] = self.image_num - data["image_token"] = self.key["start"] = extr('var startkey="', '";') - self.key["show"] = extr('var showkey="', '";') + data["image_token"] = self.key_start = extr('var startkey="', '";') + self.key_show = extr('var showkey="', '";') self._check_509(iurl, data) - return url, text.nameext_from_url(iurl, data) + return url, text.nameext_from_url(url, data) def images_from_api(self): """Get image url and data from api calls""" - api_url = self.root + "/api.php" - nextkey = self.key["next"] + api_url = self.api_url + nextkey = self.key_next request = { "method" : "showpage", "gid" : self.gallery_id, + "page" : 0, "imgkey" : nextkey, - "showkey": self.key["show"], + "showkey": self.key_show, } + for request["page"] in range(self.image_num + 1, self.count + 1): page = self.request(api_url, method="POST", json=request).json() + + i3 = page["i3"] + i6 = page["i6"] + imgkey = nextkey - nextkey, pos = text.extract(page["i3"], "'", "'") - imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos) - origurl, pos = text.extract(page["i7"], '<a href="', '"') + nextkey, pos = text.extract(i3, "'", "'") + imgurl , pos = text.extract(i3, 'id="img" src="', '"', pos) + nl , pos = text.extract(i3, " nl(", ")", pos) + nl = (nl or "").strip("\"'") try: - if self.original and origurl: + pos = i6.find("hentai.org/fullimg") + if self.original and pos >= 0: + origurl, pos = text.rextract(i6, '"', '"', pos) url = text.unescape(origurl) data = self._parse_original_info(text.extract( - page["i7"], "ownload original", "<", pos)[0]) + i6, "ownload original", "<", pos)[0]) + data["_fallback"] = ("{}?nl={}".format(url, nl),) else: url = imgurl data = self._parse_image_info(url) + data["_fallback"] = self._fallback( + imgkey, request["page"], nl) except IndexError: self.log.debug("Page content:\n%s", page) raise exception.StopExtraction( @@ -326,7 +347,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["image_token"] = imgkey self._check_509(imgurl, data) - yield url, text.nameext_from_url(imgurl, data) + yield url, text.nameext_from_url(url, data) request["imgkey"] = nextkey @@ -390,6 +411,14 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page + def _fallback(self, imgkey, num, nl): + url = "{}/s/{}/{}-{}?nl={}".format( + self.root, imgkey or self.key_start, self.gallery_id, num, nl) + page = self.request(url, fatal=False).text + if page.startswith(("Invalid page", "Keep trying")): + return + yield self.image_from_page(page)[0] + @staticmethod def _parse_image_info(url): for part in url.split("/")[4:]: |
