diff options
Diffstat (limited to 'gallery_dl/extractor/nhentai.py')
| -rw-r--r-- | gallery_dl/extractor/nhentai.py | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py new file mode 100644 index 0000000..746144a --- /dev/null +++ b/gallery_dl/extractor/nhentai.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015-2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://nhentai.net/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, util +import collections +import json + + +class NhentaiBase(): + """Base class for nhentai extractors""" + category = "nhentai" + root = "https://nhentai.net" + media_url = "https://i.nhentai.net" + + +class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor): + """Extractor for image galleries from nhentai.net""" + pattern = r"(?:https?://)?nhentai\.net(/g/(\d+))" + test = ("https://nhentai.net/g/147850/", { + "url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0", + "keyword": { + "title" : r"re:\[Morris\] Amazon no Hiyaku \| Amazon Elixir", + "title_en" : str, + "title_ja" : str, + "gallery_id": 147850, + "media_id" : 867789, + "count" : 16, + "date" : 1446050915, + "scanlator" : "", + "artist" : ["morris"], + "group" : list, + "parody" : list, + "characters": list, + "tags" : list, + "type" : "manga", + "lang" : "en", + "language" : "English", + "width" : int, + "height" : int, + }, + }) + + def __init__(self, match): + GalleryExtractor.__init__(self, match) + self.gallery_id = match.group(2) + self.data = None + + def metadata(self, page): + data = json.loads(text.extract(page, "N.gallery(", ");")[0]) + self.data = data + + title_en = data["title"].get("english", "") + title_ja = data["title"].get("japanese", "") + + info = collections.defaultdict(list) + for tag in data["tags"]: + info[tag["type"]].append(tag["name"]) + + language = "" + for language in info["language"]: + if language != "translated": + language = language.capitalize() + break + + return { + "title" : title_en or title_ja, + "title_en" : title_en, + "title_ja" : title_ja, + "gallery_id": data["id"], + "media_id" : text.parse_int(data["media_id"]), + "date" : data["upload_date"], + "scanlator" : data["scanlator"], + "artist" : info["artist"], + "group" : info["group"], + "parody" : info["parody"], + "characters": info["character"], + "tags" : info["tag"], + "type" : info["category"][0] if info["category"] else "", + "lang" : util.language_to_code(language), + "language" : language, + } + + def images(self, _): + ufmt = "{}/galleries/{}/{{}}.{{}}".format( + self.media_url, self.data["media_id"]) + extdict = {"j": "jpg", "p": "png", "g": "gif"} + + return [ + (ufmt.format(num, extdict.get(img["t"], "jpg")), { + "width": img["w"], "height": img["h"], + }) + for num, img in enumerate(self.data["images"]["pages"], 1) + ] + + +class NhentaiSearchExtractor(NhentaiBase, Extractor): + """Extractor for nhentai search results""" + category = "nhentai" + subcategory = "search" + pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)" + test = ("https://nhentai.net/search/?q=touhou", { + "pattern": NhentaiGalleryExtractor.pattern, + "count": 30, + "range": "1-30", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.params = text.parse_query(match.group(1)) + + def items(self): + yield Message.Version, 1 + data = {"_extractor": NhentaiGalleryExtractor} + for gallery_id in self._pagination(self.params): + url = "{}/g/{}/".format(self.root, gallery_id) + yield Message.Queue, url, data + + def _pagination(self, params): + url = "{}/search/".format(self.root) + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + yield from text.extract_iter(page, 'href="/g/', '/') + if 'class="next"' not in page: + return + params["page"] += 1 |
