diff options
| author | 2024-12-02 00:32:05 -0500 | |
|---|---|---|
| committer | 2024-12-02 00:32:05 -0500 | |
| commit | 7c235d96e2417306334cd93517a2723039a5a0b5 (patch) | |
| tree | cd29cbb34195d8f384237d70a3f8e7405827496a /gallery_dl/extractor/hitomi.py | |
| parent | c68fa03f6af81fc1a23a7021fe28e73bacf71f66 (diff) | |
| parent | 1981ccaaea6eab2cf32536ec5afe132a870914d8 (diff) | |
Update upstream source from tag 'upstream/1.28.0'
Update to upstream version '1.28.0'
with Debian dir d2219dc8130062139eee5a8e4e0da3dc60db3a4e
Diffstat (limited to 'gallery_dl/extractor/hitomi.py')
| -rw-r--r-- | gallery_dl/extractor/hitomi.py | 109 |
1 files changed, 106 insertions, 3 deletions
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 18df9df..308b42c 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -108,9 +108,9 @@ class HitomiTagExtractor(Extractor): category = "hitomi" subcategory = "tag" root = "https://hitomi.la" - pattern = (r"(?:https?://)?hitomi\.la/" - r"(tag|artist|group|series|type|character)/" - r"([^/?#]+)\.html") + pattern = (r"(?:https?://)?hitomi\.la" + r"/(tag|artist|group|series|type|character)" + r"/([^/?#]+)\.html") example = "https://hitomi.la/tag/TAG-LANG.html" def __init__(self, match): @@ -151,6 +151,109 @@ class HitomiTagExtractor(Extractor): return +class HitomiIndexExtractor(HitomiTagExtractor): + """Extractor for galleries from index searches on hitomi.la""" + subcategory = "index" + pattern = r"(?:https?://)?hitomi\.la/(\w+)-(\w+)\.html" + example = "https://hitomi.la/index-LANG.html" + + def __init__(self, match): + Extractor.__init__(self, match) + self.tag, self.language = match.groups() + + def items(self): + data = {"_extractor": HitomiGalleryExtractor} + nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format( + self.tag, self.language) + headers = { + "Origin": self.root, + "Cache-Control": "max-age=0", + } + + offset = 0 + total = None + while True: + headers["Referer"] = "{}/{}-{}.html?page={}".format( + self.root, self.tag, self.language, offset // 100 + 1) + headers["Range"] = "bytes={}-{}".format(offset, offset+99) + response = self.request(nozomi_url, headers=headers) + + for gallery_id in decode_nozomi(response.content): + gallery_url = "{}/galleries/{}.html".format( + self.root, gallery_id) + yield Message.Queue, gallery_url, data + + offset += 100 + if total is None: + total = text.parse_int( + response.headers["content-range"].rpartition("/")[2]) + if offset >= total: + return + + +class HitomiSearchExtractor(Extractor): + """Extractor for galleries from multiple tag searches on hitomi.la""" + category = "hitomi" + subcategory = "search" + root = "https://hitomi.la" + pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)" + example = "https://hitomi.la/search.html?QUERY" + + def __init__(self, match): + Extractor.__init__(self, match) + self.query = match.group(1) + self.tags = text.unquote(self.query).split(" ") + + def items(self): + data = {"_extractor": HitomiGalleryExtractor} + + results = [self.get_nozomi_items(tag) for tag in self.tags] + intersects = set.intersection(*results) + + for gallery_id in sorted(intersects, reverse=True): + gallery_url = "{}/galleries/{}.html".format( + self.root, gallery_id) + yield Message.Queue, gallery_url, data + + def get_nozomi_items(self, full_tag): + area, tag, language = self.get_nozomi_args(full_tag) + + if area: + referer_base = "{}/n/{}/{}-{}.html".format( + self.root, area, tag, language) + nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format( + area, tag, language) + else: + referer_base = "{}/n/{}-{}.html".format( + self.root, tag, language) + nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format( + tag, language) + + headers = { + "Origin": self.root, + "Cache-Control": "max-age=0", + "Referer": "{}/search.html?{}".format(referer_base, self.query), + } + + response = self.request(nozomi_url, headers=headers) + return set(decode_nozomi(response.content)) + + def get_nozomi_args(self, query): + ns, _, tag = query.strip().partition(":") + area = ns + language = "all" + + if ns == "female" or ns == "male": + area = "tag" + tag = query + elif ns == "language": + area = None + language = tag + tag = "index" + + return area, tag, language + + @memcache(maxage=1800) def _parse_gg(extr): page = extr.request("https://ltn.hitomi.la/gg.js").text |
