diff options
Diffstat (limited to 'gallery_dl/extractor/hitomi.py')
| -rw-r--r-- | gallery_dl/extractor/hitomi.py | 41 |
1 files changed, 24 insertions, 17 deletions
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index ef08d69..e53b051 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -11,17 +11,20 @@ from .common import GalleryExtractor from .. import text, util import string +import json class HitomiGalleryExtractor(GalleryExtractor): """Extractor for image galleries from hitomi.la""" category = "hitomi" root = "https://hitomi.la" - pattern = r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)" + pattern = (r"(?:https?://)?hitomi\.la" + r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)" + r"/(?:[^/?&#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg", - "keyword": "d097a8db8e810045131b4510c41714004f9eff3a", + "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2", "count": 16, }), ("https://hitomi.la/galleries/1401410.html", { @@ -39,6 +42,11 @@ class HitomiGalleryExtractor(GalleryExtractor): "url": "055c898a36389719799d6bce76889cc4ea4421fc", "count": 1413, }), + ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"), + ("https://hitomi.la/manga/867789.html"), + ("https://hitomi.la/doujinshi/867789.html"), + ("https://hitomi.la/cg/867789.html"), + ("https://hitomi.la/gamecg/867789.html"), ("https://hitomi.la/reader/867789.html"), ) @@ -54,6 +62,11 @@ class HitomiGalleryExtractor(GalleryExtractor): self.fallback = True url = url.replace("/galleries/", "/reader/") response = GalleryExtractor.request(self, url, **kwargs) + elif b"<title>Redirect</title>" in response.content: + url = text.extract(response.text, "href='", "'")[0] + if not url.startswith("http"): + url = text.urljoin(self.root, url) + response = self.request(url, **kwargs) return response def metadata(self, page): @@ -86,25 +99,19 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js offset = text.parse_int(self.gallery_id[-1]) % 3 subdomain = chr(97 + offset) + "a" - base = "https://" + subdomain + ".hitomi.la/galleries/" + base = "https://{}.hitomi.la/galleries/{}/".format( + subdomain, self.gallery_id) # set Referer header before image downloads (#239) - self.session.headers["Referer"] = self.chapter_url - - # handle Game CG galleries with scenes (#321) - scenes = text.extract(page, "var scene_indexes = [", "]")[0] - if scenes and scenes.strip(): - url = "{}/reader/{}.html".format(self.root, self.gallery_id) - page = self.request(url).text - begin, end = ">//g.hitomi.la/galleries/", "</div>" - elif self.fallback: - begin, end = ">//g.hitomi.la/galleries/", "</div>" - else: - begin, end = "'//tn.hitomi.la/smalltn/", ".jpg'," + self.session.headers["Referer"] = self.gallery_url + + # get 'galleryinfo' + url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gallery_id) + page = self.request(url).text return [ - (base + urlpart, None) - for urlpart in text.extract_iter(page, begin, end) + (base + image["name"], None) + for image in json.loads(page.partition("=")[2]) ] @staticmethod |
