summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hitomi.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/hitomi.py')
-rw-r--r--gallery_dl/extractor/hitomi.py69
1 files changed, 32 insertions, 37 deletions
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index e15e13c..086b77c 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -16,19 +16,25 @@ import string
import re
-class HitomiGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries from hitomi.la"""
+class HitomiExtractor(Extractor):
+ """Base class for hitomi extractors"""
category = "hitomi"
root = "https://hitomi.la"
+ domain = "gold-usergeneratedcontent.net"
+
+
+class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
+ """Extractor for hitomi.la galleries"""
pattern = (r"(?:https?://)?hitomi\.la"
r"/(?:manga|doujinshi|cg|gamecg|imageset|galleries|reader)"
r"/(?:[^/?#]+-)?(\d+)")
example = "https://hitomi.la/manga/TITLE-867789.html"
def __init__(self, match):
- self.gid = match.group(1)
- url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid)
- GalleryExtractor.__init__(self, match, url)
+ GalleryExtractor.__init__(self, match, False)
+ self.gid = gid = self.groups[0]
+ self.gallery_url = "https://ltn.{}/galleries/{}.js".format(
+ self.domain, gid)
def _init(self):
self.session.headers["Referer"] = "{}/reader/{}.html".format(
@@ -71,43 +77,34 @@ class HitomiGalleryExtractor(GalleryExtractor):
}
def images(self, _):
- # see https://ltn.hitomi.la/gg.js
+ # https://ltn.gold-usergeneratedcontent.net/gg.js
gg_m, gg_b, gg_default = _parse_gg(self)
- fmt = self.config("format") or "webp"
- if fmt == "original":
- subdomain, path, ext, check = "b", "images", None, False
- else:
- subdomain, path, ext, check = "a", fmt, fmt, (fmt != "webp")
+ fmt = ext = self.config("format") or "webp"
+ check = (fmt != "webp")
result = []
for image in self.info["files"]:
if check:
- if image.get("has" + fmt):
- path = ext = fmt
- else:
- path = ext = "webp"
+ ext = fmt if image.get("has" + fmt) else "webp"
ihash = image["hash"]
idata = text.nameext_from_url(image["name"])
idata["extension_original"] = idata["extension"]
- if ext:
- idata["extension"] = ext
+ idata["extension"] = ext
- # see https://ltn.hitomi.la/common.js
+ # https://ltn.gold-usergeneratedcontent.net/common.js
inum = int(ihash[-1] + ihash[-3:-1], 16)
- url = "https://{}{}.hitomi.la/{}/{}/{}/{}.{}".format(
- chr(97 + gg_m.get(inum, gg_default)),
- subdomain, path, gg_b, inum, ihash, idata["extension"],
+ url = "https://{}{}.{}/{}/{}/{}.{}".format(
+ ext[0], gg_m.get(inum, gg_default) + 1, self.domain,
+ gg_b, inum, ihash, ext,
)
result.append((url, idata))
return result
-class HitomiTagExtractor(Extractor):
+class HitomiTagExtractor(HitomiExtractor):
"""Extractor for galleries from tag searches on hitomi.la"""
- category = "hitomi"
subcategory = "tag"
- root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la"
r"/(tag|artist|group|series|type|character)"
r"/([^/?#]+)\.html")
@@ -126,8 +123,8 @@ class HitomiTagExtractor(Extractor):
"_extractor": HitomiGalleryExtractor,
"search_tags": text.unquote(self.tag.rpartition("-")[0]),
}
- nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format(
- self.type, self.tag)
+ nozomi_url = "https://ltn.{}/{}/{}.nozomi".format(
+ self.domain, self.type, self.tag)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
@@ -166,8 +163,8 @@ class HitomiIndexExtractor(HitomiTagExtractor):
def items(self):
data = {"_extractor": HitomiGalleryExtractor}
- nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
- self.tag, self.language)
+ nozomi_url = "https://ltn.{}/{}-{}.nozomi".format(
+ self.domain, self.tag, self.language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
@@ -194,11 +191,9 @@ class HitomiIndexExtractor(HitomiTagExtractor):
return
-class HitomiSearchExtractor(Extractor):
+class HitomiSearchExtractor(HitomiExtractor):
"""Extractor for galleries from multiple tag searches on hitomi.la"""
- category = "hitomi"
subcategory = "search"
- root = "https://hitomi.la"
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
example = "https://hitomi.la/search.html?QUERY"
@@ -224,11 +219,11 @@ class HitomiSearchExtractor(Extractor):
area, tag, language = self.get_nozomi_args(full_tag)
if area:
- nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format(
- area, tag, language)
+ nozomi_url = "https://ltn.{}/n/{}/{}-{}.nozomi".format(
+ self.domain, area, tag, language)
else:
- nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format(
- tag, language)
+ nozomi_url = "https://ltn.{}/n/{}-{}.nozomi".format(
+ self.domain, tag, language)
headers = {
"Origin": self.root,
@@ -257,7 +252,7 @@ class HitomiSearchExtractor(Extractor):
@memcache(maxage=1800)
def _parse_gg(extr):
- page = extr.request("https://ltn.hitomi.la/gg.js").text
+ page = extr.request("https://ltn.gold-usergeneratedcontent.net/gg.js").text
m = {}
@@ -280,4 +275,4 @@ def _parse_gg(extr):
d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
b = re.search(r"b:\s*[\"'](.+)[\"']", page)
- return m, b.group(1).strip("/"), int(d.group(1)) if d else 1
+ return m, b.group(1).strip("/"), int(d.group(1)) if d else 0