diff options
Diffstat (limited to 'gallery_dl/extractor/hentainexus.py')
| -rw-r--r-- | gallery_dl/extractor/hentainexus.py | 31 |
1 files changed, 13 insertions, 18 deletions
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py index aa41836..49c1a98 100644 --- a/gallery_dl/extractor/hentainexus.py +++ b/gallery_dl/extractor/hentainexus.py @@ -36,21 +36,17 @@ class HentainexusGalleryExtractor(GalleryExtractor): rmve = text.remove_html extr = text.extract_from(page) data = { - "gallery_id" : text.parse_int(self.gallery_id), - "tags" : extr('"og:description" content="', '"').split(", "), - "thumbnail" : extr('"og:image" content="', '"'), - "title" : extr('<h1 class="title">', '</h1>'), - "artist" : rmve(extr('viewcolumn">Artist</td>' , '</td>')), - "book" : rmve(extr('viewcolumn">Book</td>' , '</td>')), - "circle" : rmve(extr('viewcolumn">Circle</td>' , '</td>')), - "event" : rmve(extr('viewcolumn">Event</td>' , '</td>')), - "language" : rmve(extr('viewcolumn">Language</td>' , '</td>')), - "magazine" : rmve(extr('viewcolumn">Magazine</td>' , '</td>')), - "parody" : rmve(extr('viewcolumn">Parody</td>' , '</td>')), - "publisher" : rmve(extr('viewcolumn">Publisher</td>' , '</td>')), - "description": rmve(extr('viewcolumn">Description</td>', '</td>')), + "gallery_id": text.parse_int(self.gallery_id), + "tags" : extr('"og:description" content="', '"').split(", "), + "thumbnail" : extr('"og:image" content="', '"'), + "title" : extr('<h1 class="title">', '</h1>'), } + for key in ("Artist", "Book", "Circle", "Event", "Language", + "Magazine", "Parody", "Publisher", "Description"): + data[key.lower()] = rmve(extr( + 'viewcolumn">' + key + '</td>', '</td>')) data["lang"] = util.language_to_code(data["language"]) + if 'doujin' in data['tags']: data['type'] = 'Doujinshi' elif 'illustration' in data['tags']: @@ -60,10 +56,10 @@ class HentainexusGalleryExtractor(GalleryExtractor): data["title_conventional"] = self._join_title(data) return data - def images(self, page): + def images(self, _): url = "{}/read/{}".format(self.root, self.gallery_id) - extr = text.extract_from(self.request(url).text) - urls = extr("initReader(", "]") + "]" + page = self.request(url).text + urls = text.extract(page, "initReader(", "]")[0] + "]" return [(url, None) for url in json.loads(urls)] @staticmethod @@ -120,14 +116,13 @@ class HentainexusSearchExtractor(Extractor): self.params = text.parse_query(match.group(1)) def items(self): - yield Message.Version, 1 params = self.params path = "/" + data = {"_extractor": HentainexusGalleryExtractor} while path: page = self.request(self.root + path, params=params).text extr = text.extract_from(page) - data = {"_extractor": HentainexusGalleryExtractor} while True: gallery_id = extr('<a href="/view/', '"') |
