diff options
Diffstat (limited to 'gallery_dl/extractor/myhentaigallery.py')
| -rw-r--r-- | gallery_dl/extractor/myhentaigallery.py | 43 |
1 files changed, 38 insertions, 5 deletions
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 2a39dc9..936f857 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -6,17 +6,21 @@ """Extractors for https://myhentaigallery.com/""" -from .common import GalleryExtractor +from .common import Extractor, GalleryExtractor, Message from .. import text, exception +BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com" -class MyhentaigalleryGalleryExtractor(GalleryExtractor): - """Extractor for image galleries from myhentaigallery.com""" + +class MyhentaigalleryBase(): category = "myhentaigallery" root = "https://myhentaigallery.com" + + +class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor): + """Extractor for image galleries from myhentaigallery.com""" directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") - pattern = (r"(?:https?://)?myhentaigallery\.com" - r"/g(?:allery/(?:thumbnails|show))?/(\d+)") + pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)" example = "https://myhentaigallery.com/g/12345" def __init__(self, match): @@ -53,3 +57,32 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): "/thumbnail/", "/original/"), None) for url in text.extract_iter(page, 'class="comic-thumb"', '</div>') ] + + +class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor): + """Extractor for myhentaigallery tag searches""" + subcategory = "tag" + pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)" + example = "https://myhentaigallery.com/g/category/123" + + def items(self): + data = {"_extractor": MyhentaigalleryGalleryExtractor} + for url in self.galleries(): + yield Message.Queue, url, data + + def galleries(self): + root = self.root + url = root + self.groups[0] + + while True: + page = self.request(url).text + + for inner in text.extract_iter( + page, '<div class="comic-inner">', "<div"): + yield root + text.extr(inner, 'href="', '"') + + try: + pos = page.index(">Next<") + except ValueError: + return + url = root + text.rextr(page, 'href="', '"', pos) |
