diff options
Diffstat (limited to 'gallery_dl/extractor/hitomi.py')
| -rw-r--r-- | gallery_dl/extractor/hitomi.py | 69 |
1 files changed, 32 insertions, 37 deletions
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index e15e13c..086b77c 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -16,19 +16,25 @@ import string import re -class HitomiGalleryExtractor(GalleryExtractor): - """Extractor for image galleries from hitomi.la""" +class HitomiExtractor(Extractor): + """Base class for hitomi extractors""" category = "hitomi" root = "https://hitomi.la" + domain = "gold-usergeneratedcontent.net" + + +class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor): + """Extractor for hitomi.la galleries""" pattern = (r"(?:https?://)?hitomi\.la" r"/(?:manga|doujinshi|cg|gamecg|imageset|galleries|reader)" r"/(?:[^/?#]+-)?(\d+)") example = "https://hitomi.la/manga/TITLE-867789.html" def __init__(self, match): - self.gid = match.group(1) - url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid) - GalleryExtractor.__init__(self, match, url) + GalleryExtractor.__init__(self, match, False) + self.gid = gid = self.groups[0] + self.gallery_url = "https://ltn.{}/galleries/{}.js".format( + self.domain, gid) def _init(self): self.session.headers["Referer"] = "{}/reader/{}.html".format( @@ -71,43 +77,34 @@ class HitomiGalleryExtractor(GalleryExtractor): } def images(self, _): - # see https://ltn.hitomi.la/gg.js + # https://ltn.gold-usergeneratedcontent.net/gg.js gg_m, gg_b, gg_default = _parse_gg(self) - fmt = self.config("format") or "webp" - if fmt == "original": - subdomain, path, ext, check = "b", "images", None, False - else: - subdomain, path, ext, check = "a", fmt, fmt, (fmt != "webp") + fmt = ext = self.config("format") or "webp" + check = (fmt != "webp") result = [] for image in self.info["files"]: if check: - if image.get("has" + fmt): - path = ext = fmt - else: - path = ext = "webp" + ext = fmt if image.get("has" + fmt) else "webp" ihash = image["hash"] idata = text.nameext_from_url(image["name"]) idata["extension_original"] = idata["extension"] - if ext: - idata["extension"] = ext + idata["extension"] = ext - # see https://ltn.hitomi.la/common.js + # https://ltn.gold-usergeneratedcontent.net/common.js inum = int(ihash[-1] + ihash[-3:-1], 16) - url = "https://{}{}.hitomi.la/{}/{}/{}/{}.{}".format( - chr(97 + gg_m.get(inum, gg_default)), - subdomain, path, gg_b, inum, ihash, idata["extension"], + url = "https://{}{}.{}/{}/{}/{}.{}".format( + ext[0], gg_m.get(inum, gg_default) + 1, self.domain, + gg_b, inum, ihash, ext, ) result.append((url, idata)) return result -class HitomiTagExtractor(Extractor): +class HitomiTagExtractor(HitomiExtractor): """Extractor for galleries from tag searches on hitomi.la""" - category = "hitomi" subcategory = "tag" - root = "https://hitomi.la" pattern = (r"(?:https?://)?hitomi\.la" r"/(tag|artist|group|series|type|character)" r"/([^/?#]+)\.html") @@ -126,8 +123,8 @@ class HitomiTagExtractor(Extractor): "_extractor": HitomiGalleryExtractor, "search_tags": text.unquote(self.tag.rpartition("-")[0]), } - nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format( - self.type, self.tag) + nozomi_url = "https://ltn.{}/{}/{}.nozomi".format( + self.domain, self.type, self.tag) headers = { "Origin": self.root, "Cache-Control": "max-age=0", @@ -166,8 +163,8 @@ class HitomiIndexExtractor(HitomiTagExtractor): def items(self): data = {"_extractor": HitomiGalleryExtractor} - nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format( - self.tag, self.language) + nozomi_url = "https://ltn.{}/{}-{}.nozomi".format( + self.domain, self.tag, self.language) headers = { "Origin": self.root, "Cache-Control": "max-age=0", @@ -194,11 +191,9 @@ class HitomiIndexExtractor(HitomiTagExtractor): return -class HitomiSearchExtractor(Extractor): +class HitomiSearchExtractor(HitomiExtractor): """Extractor for galleries from multiple tag searches on hitomi.la""" - category = "hitomi" subcategory = "search" - root = "https://hitomi.la" pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)" example = "https://hitomi.la/search.html?QUERY" @@ -224,11 +219,11 @@ class HitomiSearchExtractor(Extractor): area, tag, language = self.get_nozomi_args(full_tag) if area: - nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format( - area, tag, language) + nozomi_url = "https://ltn.{}/n/{}/{}-{}.nozomi".format( + self.domain, area, tag, language) else: - nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format( - tag, language) + nozomi_url = "https://ltn.{}/n/{}-{}.nozomi".format( + self.domain, tag, language) headers = { "Origin": self.root, @@ -257,7 +252,7 @@ class HitomiSearchExtractor(Extractor): @memcache(maxage=1800) def _parse_gg(extr): - page = extr.request("https://ltn.hitomi.la/gg.js").text + page = extr.request("https://ltn.gold-usergeneratedcontent.net/gg.js").text m = {} @@ -280,4 +275,4 @@ def _parse_gg(extr): d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page) b = re.search(r"b:\s*[\"'](.+)[\"']", page) - return m, b.group(1).strip("/"), int(d.group(1)) if d else 1 + return m, b.group(1).strip("/"), int(d.group(1)) if d else 0 |
