diff options
| author | 2021-03-13 16:26:30 -0500 | |
|---|---|---|
| committer | 2021-03-13 16:26:30 -0500 | |
| commit | 3201d77a148367d739862b4f07868a76eaeb7cb1 (patch) | |
| tree | 78b8d71633ec000672a84ad0bbbddd0513ae2d30 /gallery_dl/extractor/hentaicafe.py | |
| parent | fc83315c164afd74734adf27e0f7fec2011904aa (diff) | |
New upstream version 1.17.0.upstream/1.17.0
Diffstat (limited to 'gallery_dl/extractor/hentaicafe.py')
| -rw-r--r-- | gallery_dl/extractor/hentaicafe.py | 103 |
1 files changed, 87 insertions, 16 deletions
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index 462d3e9..aa79b67 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,23 +10,46 @@ from . import foolslide from .. import text -from .common import Extractor +from .common import Extractor, Message from ..cache import memcache import re +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentai\.cafe" -class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor): - """Extractor for manga-chapters from hentai.cafe""" + +class HentaicafeBase(): + """Base class for hentaicafe extractors""" category = "hentaicafe" + root = "https://hentai.cafe" + + def _pagination(self, urlfmt): + data = {"_extractor": HentaicafeMangaExtractor} + pnum = text.parse_int(self.page_start, 1) + + while True: + page = self.request(urlfmt(pnum)).text + + for entry in text.extract_iter( + page, 'class="entry-featured', 'title="'): + url = text.extract(entry, 'href="', '"')[0] + if url: + yield Message.Queue, url, data + + if '>→<' not in page: + return + pnum += 1 + + +class HentaicafeChapterExtractor(HentaicafeBase, + foolslide.FoolslideChapterExtractor): + """Extractor for manga-chapters from hentai.cafe""" directory_fmt = ("{category}", "{manga}") filename_fmt = "c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}" - pattern = (r"(?:https?://)?(?:www\.)?hentai\.cafe" - r"(/manga/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)") + pattern = BASE_PATTERN + r"(/manga/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" test = ("https://hentai.cafe/manga/read/saitom-box/en/0/1/", { "url": "8c6a8c56875ba3ed7ab0a74a64f9960077767fc2", "keyword": "6913608267d883c82b887303b9ced13821188329", }) - root = "https://hentai.cafe" def metadata(self, page): info = text.unescape(text.extract(page, '<title>', '</title>')[0]) @@ -43,11 +66,10 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor): return {"artist": (), "tags": ()} -class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): +class HentaicafeMangaExtractor(HentaicafeBase, + foolslide.FoolslideMangaExtractor): """Extractor for manga from hentai.cafe""" - category = "hentaicafe" - pattern = (r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe" - r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?#]+)/?$") + pattern = BASE_PATTERN + r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?#]+)/?$" test = ( # single chapter ("https://hentai.cafe/hazuki-yuuto-summer-blues/", { @@ -71,13 +93,20 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): }), ) - root = "https://hentai.cafe" - reverse = False - request = Extractor.request - chapterclass = HentaicafeChapterExtractor + + def items(self): + page = Extractor.request(self, self.gallery_url).text + + chapters = self.chapters(page) + if self.config("chapter-reverse", False): + chapters.reverse() + + for chapter, data in chapters: + data["_extractor"] = HentaicafeChapterExtractor + yield Message.Queue, chapter, data def chapters(self, page): - if "/manga/series/" in self.manga_url: + if "/manga/series/" in self.gallery_url: chapters = foolslide.FoolslideMangaExtractor.chapters(self, page) chapters.reverse() return chapters @@ -100,3 +129,45 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): for url in re.findall( r'<a +class="x-btn[^"]*" +href="([^"]+)"', page) ] + + +class HentaicafeSearchExtractor(HentaicafeBase, Extractor): + """Extractor for hentaicafe search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/(?:page/(\d+)/?)?\?s=([^&#]+)" + test = ("https://hentai.cafe/?s=benimura", { + "pattern": HentaicafeMangaExtractor.pattern, + "count": ">= 10", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.page_start, self.search = match.groups() + + def items(self): + fmt = "{}/page/{}?s={}".format + return self._pagination(lambda pnum: fmt(self.root, pnum, self.search)) + + +class HentaicafeTagExtractor(HentaicafeBase, Extractor): + """Extractor for hentaicafe tag/artist searches""" + subcategory = "tag" + pattern = (BASE_PATTERN + + r"/hc\.fyi/(tag|artist|category)/([^/?#]+)(?:/page/(\d+))?") + test = ( + ("https://hentai.cafe/hc.fyi/tag/vanilla"), + ("https://hentai.cafe/hc.fyi/category/book/page/5"), + ("https://hentai.cafe/hc.fyi/artist/benimura-karu", { + "pattern": HentaicafeMangaExtractor.pattern, + "count": ">= 10", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.type, self.search, self.page_start = match.groups() + + def items(self): + fmt = "{}/hc.fyi/{}/{}/page/{}".format + return self._pagination( + lambda pnum: fmt(self.root, self.type, self.search, pnum)) |
