diff options
Diffstat (limited to 'gallery_dl/extractor/fapello.py')
| -rw-r--r-- | gallery_dl/extractor/fapello.py | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py new file mode 100644 index 0000000..d6fcb4b --- /dev/null +++ b/gallery_dl/extractor/fapello.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://fapello.com/""" + +from .common import Extractor, Message +from .. import text, exception + + +class FapelloPostExtractor(Extractor): + """Extractor for individual posts on fapello.com""" + category = "fapello" + subcategory = "post" + directory_fmt = ("{category}", "{model}") + filename_fmt = "{model}_{id}.{extension}" + archive_fmt = "{type}_{model}_{id}" + pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)") + test = ( + ("https://fapello.com/carrykey/530/", { + "pattern": (r"https://fapello\.com/content/c/a" + r"/carrykey/1000/carrykey_0530\.jpg"), + "keyword": { + "model": "carrykey", + "id" : 530, + "type" : "photo", + "thumbnail": "", + }, + }), + ("https://fapello.com/vladislava-661/693/", { + "pattern": (r"https://cdn\.fapello\.com/content/v/l" + r"/vladislava-661/1000/vladislava-661_0693\.mp4"), + "keyword": { + "model": "vladislava-661", + "id" : 693, + "type" : "video", + "thumbnail": ("https://fapello.com/content/v/l" + "/vladislava-661/1000/vladislava-661_0693.jpg"), + }, + }), + ("https://fapello.com/carrykey/000/", { + "exception": exception.NotFoundError, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.model, self.id = match.groups() + + def items(self): + url = "https://fapello.com/{}/{}/".format(self.model, self.id) + page = text.extr( + self.request(url, allow_redirects=False).text, + 'class="uk-align-center"', "</div>", None) + if page is None: + raise exception.NotFoundError("post") + + data = { + "model": self.model, + "id" : text.parse_int(self.id), + "type" : "video" if 'type="video' in page else "photo", + "thumbnail": text.extr(page, 'poster="', '"'), + } + url = text.extr(page, 'src="', '"') + yield Message.Directory, data + yield Message.Url, url, text.nameext_from_url(url, data) + + +class FapelloModelExtractor(Extractor): + """Extractor for all posts from a fapello model""" + category = "fapello" + subcategory = "model" + pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" + r"/(?!top-(?:likes|followers)|popular_videos" + r"|videos|trending|search/?$)" + r"([^/?#]+)/?$") + test = ( + ("https://fapello.com/hyoon/", { + "pattern": FapelloPostExtractor.pattern, + "range" : "1-50", + "count" : 50, + }), + ("https://fapello.com/kobaebeefboo/"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.model = match.group(1) + + def items(self): + num = 1 + data = {"_extractor": FapelloPostExtractor} + while True: + url = "https://fapello.com/ajax/model/{}/page-{}/".format( + self.model, num) + page = self.request(url).text + if not page: + return + + for url in text.extract_iter(page, '<a href="', '"'): + yield Message.Queue, url, data + num += 1 + + +class FapelloPathExtractor(Extractor): + """Extractor for models and posts from fapello.com paths""" + category = "fapello" + subcategory = "path" + pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" + r"/(?!search/?$)(top-(?:likes|followers)|videos|trending" + r"|popular_videos/[^/?#]+)/?$") + test = ( + ("https://fapello.com/top-likes/", { + "pattern": FapelloModelExtractor.pattern, + "range" : "1-10", + "count" : 10, + }), + ("https://fapello.com/videos/", { + "pattern": FapelloPostExtractor.pattern, + "range" : "1-10", + "count" : 10, + }), + ("https://fapello.com/top-followers/"), + ("https://fapello.com/trending/"), + ("https://fapello.com/popular_videos/twelve_hours/"), + ("https://fapello.com/popular_videos/week/"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.path = match.group(1) + + def items(self): + num = 1 + if self.path in ("top-likes", "top-followers"): + data = {"_extractor": FapelloModelExtractor} + else: + data = {"_extractor": FapelloPostExtractor} + + while True: + page = self.request("https://fapello.com/ajax/{}/page-{}/".format( + self.path, num)).text + if not page: + return + + for item in text.extract_iter( + page, 'uk-transition-toggle">', "</a>"): + yield Message.Queue, text.extr(item, '<a href="', '"'), data + num += 1 |
