summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hitomi.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/hitomi.py')
-rw-r--r--gallery_dl/extractor/hitomi.py109
1 files changed, 106 insertions, 3 deletions
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 18df9df..308b42c 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -108,9 +108,9 @@ class HitomiTagExtractor(Extractor):
category = "hitomi"
subcategory = "tag"
root = "https://hitomi.la"
- pattern = (r"(?:https?://)?hitomi\.la/"
- r"(tag|artist|group|series|type|character)/"
- r"([^/?#]+)\.html")
+ pattern = (r"(?:https?://)?hitomi\.la"
+ r"/(tag|artist|group|series|type|character)"
+ r"/([^/?#]+)\.html")
example = "https://hitomi.la/tag/TAG-LANG.html"
def __init__(self, match):
@@ -151,6 +151,109 @@ class HitomiTagExtractor(Extractor):
return
+class HitomiIndexExtractor(HitomiTagExtractor):
+ """Extractor for galleries from index searches on hitomi.la"""
+ subcategory = "index"
+ pattern = r"(?:https?://)?hitomi\.la/(\w+)-(\w+)\.html"
+ example = "https://hitomi.la/index-LANG.html"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.tag, self.language = match.groups()
+
+ def items(self):
+ data = {"_extractor": HitomiGalleryExtractor}
+ nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ self.tag, self.language)
+ headers = {
+ "Origin": self.root,
+ "Cache-Control": "max-age=0",
+ }
+
+ offset = 0
+ total = None
+ while True:
+ headers["Referer"] = "{}/{}-{}.html?page={}".format(
+ self.root, self.tag, self.language, offset // 100 + 1)
+ headers["Range"] = "bytes={}-{}".format(offset, offset+99)
+ response = self.request(nozomi_url, headers=headers)
+
+ for gallery_id in decode_nozomi(response.content):
+ gallery_url = "{}/galleries/{}.html".format(
+ self.root, gallery_id)
+ yield Message.Queue, gallery_url, data
+
+ offset += 100
+ if total is None:
+ total = text.parse_int(
+ response.headers["content-range"].rpartition("/")[2])
+ if offset >= total:
+ return
+
+
+class HitomiSearchExtractor(Extractor):
+ """Extractor for galleries from multiple tag searches on hitomi.la"""
+ category = "hitomi"
+ subcategory = "search"
+ root = "https://hitomi.la"
+ pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
+ example = "https://hitomi.la/search.html?QUERY"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.query = match.group(1)
+ self.tags = text.unquote(self.query).split(" ")
+
+ def items(self):
+ data = {"_extractor": HitomiGalleryExtractor}
+
+ results = [self.get_nozomi_items(tag) for tag in self.tags]
+ intersects = set.intersection(*results)
+
+ for gallery_id in sorted(intersects, reverse=True):
+ gallery_url = "{}/galleries/{}.html".format(
+ self.root, gallery_id)
+ yield Message.Queue, gallery_url, data
+
+ def get_nozomi_items(self, full_tag):
+ area, tag, language = self.get_nozomi_args(full_tag)
+
+ if area:
+ referer_base = "{}/n/{}/{}-{}.html".format(
+ self.root, area, tag, language)
+ nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(
+ area, tag, language)
+ else:
+ referer_base = "{}/n/{}-{}.html".format(
+ self.root, tag, language)
+ nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ tag, language)
+
+ headers = {
+ "Origin": self.root,
+ "Cache-Control": "max-age=0",
+ "Referer": "{}/search.html?{}".format(referer_base, self.query),
+ }
+
+ response = self.request(nozomi_url, headers=headers)
+ return set(decode_nozomi(response.content))
+
+ def get_nozomi_args(self, query):
+ ns, _, tag = query.strip().partition(":")
+ area = ns
+ language = "all"
+
+ if ns == "female" or ns == "male":
+ area = "tag"
+ tag = query
+ elif ns == "language":
+ area = None
+ language = tag
+ tag = "index"
+
+ return area, tag, language
+
+
@memcache(maxage=1800)
def _parse_gg(extr):
page = extr.request("https://ltn.hitomi.la/gg.js").text