diff options
Diffstat (limited to 'gallery_dl/extractor/nozomi.py')
| -rw-r--r-- | gallery_dl/extractor/nozomi.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py new file mode 100644 index 0000000..97be789 --- /dev/null +++ b/gallery_dl/extractor/nozomi.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://nozomi.la/""" + +from .common import Extractor, Message +from .. import text + + +class NozomiExtractor(Extractor): + """Base class for nozomi extractors""" + category = "nozomi" + root = "https://nozomi.la" + filename_fmt = "{postid}.{extension}" + archive_fmt = "{postid}" + + def items(self): + yield Message.Version, 1 + + data = self.metadata() + self.session.headers["Origin"] = self.root + self.session.headers["Referer"] = self.root + "/" + + for post_id in map(str, self.posts()): + url = "https://j.nozomi.la/post/{}/{}/{}.json".format( + post_id[-1], post_id[-3:-1], post_id) + response = self.request(url, fatal=False) + + if response.status_code >= 400: + self.log.warning( + "Skipping post %s ('%s %s')", + post_id, response.status_code, response.reason) + continue + + image = response.json() + image["tags"] = self._list(image.get("general")) + image["artist"] = self._list(image.get("artist")) + image["copyright"] = self._list(image.get("copyright")) + image["character"] = self._list(image.get("character")) + image["is_video"] = bool(image.get("is_video")) + image["date"] = text.parse_datetime( + image["date"] + ":00", "%Y-%m-%d %H:%M:%S%z") + image["url"] = text.urljoin(self.root, image["imageurl"]) + text.nameext_from_url(image["url"], image) + image.update(data) + + for key in ("general", "imageurl", "imageurls"): + if key in image: + del image[key] + + yield Message.Directory, image + yield Message.Url, image["url"], image + + def metadata(self): + return {} + + def posts(self): + return () + + @staticmethod + def _list(src): + if not src: + return [] + return [x["tagname_display"] for x in src] + + @staticmethod + def _unpack(b): + for i in range(0, len(b), 4): + yield (b[i] << 24) + (b[i+1] << 16) + (b[i+2] << 8) + b[i+3] + + +class NozomiPostExtractor(NozomiExtractor): + """Extractor for individual posts on nozomi.la""" + subcategory = "post" + pattern = r"(?:https?://)?nozomi\.la/post/(\d+)" + test = ("https://nozomi.la/post/3649262.html", { + "url": "f4522adfc8159355fd0476de28761b5be0f02068", + "content": "cd20d2c5149871a0b80a1b0ce356526278964999", + "keyword": { + "artist" : ["hammer (sunset beach)"], + "character": ["patchouli knowledge"], + "copyright": ["touhou"], + "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5cf5a", + "date" : "type:datetime", + "extension": "jpg", + "favorites": int, + "filename" : str, + "height" : 768, + "is_video" : False, + "postid" : 3649262, + "source" : "danbooru", + "sourceid" : 2434215, + "tags" : list, + "type" : "jpg", + "url" : str, + "width" : 1024, + }, + }) + + def __init__(self, match): + NozomiExtractor.__init__(self, match) + self.post_id = match.group(1) + + def posts(self): + return (self.post_id,) + + +class NozomiTagExtractor(NozomiExtractor): + """Extractor for posts from tag searches on nozomi.la""" + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + archive_fmt = "t_{search_tags}_{postid}" + pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\." + test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", { + "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$", + "count": ">= 75", + "range": "1-75", + }) + + def __init__(self, match): + NozomiExtractor.__init__(self, match) + self.tags = text.unquote(match.group(1)).lower() + + def metadata(self): + return {"search_tags": self.tags} + + def posts(self): + url = "https://n.nozomi.la/nozomi/{}.nozomi".format(self.tags) + i = 0 + + while True: + headers = {"Range": "bytes={}-{}".format(i, i+255)} + response = self.request(url, headers=headers) + yield from self._unpack(response.content) + + i += 256 + cr = response.headers.get("Content-Range", "").rpartition("/")[2] + if text.parse_int(cr, i) <= i: + return + + +class NozomiSearchExtractor(NozomiExtractor): + """Extractor for search results on nozomi.la""" + subcategory = "search" + directory_fmt = ("{category}", "{search_tags:J }") + archive_fmt = "t_{search_tags}_{postid}" + pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)" + test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", { + "count": ">= 5", + }) + + def __init__(self, match): + NozomiExtractor.__init__(self, match) + self.tags = text.unquote(match.group(1)).lower().split() + + def metadata(self): + return {"search_tags": self.tags} + + def posts(self): + index = None + result = set() + + def nozomi(path): + url = "https://j.nozomi.la/" + path + ".nozomi" + return self._unpack(self.request(url).content) + + for tag in self.tags: + if tag[0] == "-": + if not index: + index = set(nozomi("index")) + items = index.difference(nozomi("nozomi/" + tag[1:])) + else: + items = nozomi("nozomi/" + tag) + + if result: + result.intersection_update(items) + else: + result.update(items) + + return result |
