diff options
Diffstat (limited to 'gallery_dl/extractor/hitomi.py')
| -rw-r--r-- | gallery_dl/extractor/hitomi.py | 36 |
1 files changed, 28 insertions, 8 deletions
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index e4f18b3..ef08d69 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -20,12 +20,9 @@ class HitomiGalleryExtractor(GalleryExtractor): pattern = r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)" test = ( ("https://hitomi.la/galleries/867789.html", { - "url": "cb759868d090fe0e2655c3e29ebf146054322b6d", + "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg", "keyword": "d097a8db8e810045131b4510c41714004f9eff3a", - }), - ("https://hitomi.la/galleries/1036181.html", { - # "aa" subdomain for gallery-id ending in 1 (#142) - "pattern": r"https://aa\.hitomi\.la/", + "count": 16, }), ("https://hitomi.la/galleries/1401410.html", { # download test @@ -37,18 +34,39 @@ class HitomiGalleryExtractor(GalleryExtractor): "url": "c2a84185f467450b8b9b72fbe40c0649029ce007", "count": 210, }), + ("https://hitomi.la/galleries/1045954.html", { + # fallback for galleries only available through /reader/ URLs + "url": "055c898a36389719799d6bce76889cc4ea4421fc", + "count": 1413, + }), ("https://hitomi.la/reader/867789.html"), ) def __init__(self, match): - self.gallery_id = text.parse_int(match.group(1)) + self.gallery_id = match.group(1) + self.fallback = False url = "{}/galleries/{}.html".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) + def request(self, url, **kwargs): + response = GalleryExtractor.request(self, url, fatal=False, **kwargs) + if response.status_code == 404: + self.fallback = True + url = url.replace("/galleries/", "/reader/") + response = GalleryExtractor.request(self, url, **kwargs) + return response + def metadata(self, page): + if self.fallback: + return { + "gallery_id": text.parse_int(self.gallery_id), + "title": text.unescape(text.extract( + page, "<title>", "<")[0].rpartition(" | ")[0]), + } + extr = text.extract_from(page, page.index('<h1><a href="/reader/')) data = { - "gallery_id": self.gallery_id, + "gallery_id": text.parse_int(self.gallery_id), "title" : text.unescape(extr('.html">', '<').strip()), "artist" : self._prep(extr('<h2>', '</h2>')), "group" : self._prep(extr('<td>Group</td><td>', '</td>')), @@ -66,7 +84,7 @@ class HitomiGalleryExtractor(GalleryExtractor): def images(self, page): # see https://ltn.hitomi.la/common.js - offset = self.gallery_id % 2 if self.gallery_id % 10 != 1 else 0 + offset = text.parse_int(self.gallery_id[-1]) % 3 subdomain = chr(97 + offset) + "a" base = "https://" + subdomain + ".hitomi.la/galleries/" @@ -79,6 +97,8 @@ class HitomiGalleryExtractor(GalleryExtractor): url = "{}/reader/{}.html".format(self.root, self.gallery_id) page = self.request(url).text begin, end = ">//g.hitomi.la/galleries/", "</div>" + elif self.fallback: + begin, end = ">//g.hitomi.la/galleries/", "</div>" else: begin, end = "'//tn.hitomi.la/smalltn/", ".jpg'," |
