diff options
Diffstat (limited to 'gallery_dl/extractor/fapello.py')
| -rw-r--r-- | gallery_dl/extractor/fapello.py | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py index d4524e0..aff8e61 100644 --- a/gallery_dl/extractor/fapello.py +++ b/gallery_dl/extractor/fapello.py @@ -10,6 +10,9 @@ from .common import Extractor, Message from .. import text, exception +BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)" + + class FapelloPostExtractor(Extractor): """Extractor for individual posts on fapello.com""" category = "fapello" @@ -17,16 +20,16 @@ class FapelloPostExtractor(Extractor): directory_fmt = ("{category}", "{model}") filename_fmt = "{model}_{id}.{extension}" archive_fmt = "{type}_{model}_{id}" - pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" - r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)") + pattern = BASE_PATTERN + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)" example = "https://fapello.com/MODEL/12345/" def __init__(self, match): Extractor.__init__(self, match) + self.root = text.root_from_url(match.group(0)) self.model, self.id = match.groups() def items(self): - url = "https://fapello.com/{}/{}/".format(self.model, self.id) + url = "{}/{}/{}/".format(self.root, self.model, self.id) page = text.extr( self.request(url, allow_redirects=False).text, 'class="uk-align-center"', "</div>", None) @@ -48,27 +51,29 @@ class FapelloModelExtractor(Extractor): """Extractor for all posts from a fapello model""" category = "fapello" subcategory = "model" - pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" - r"/(?!top-(?:likes|followers)|popular_videos" + pattern = (BASE_PATTERN + r"/(?!top-(?:likes|followers)|popular_videos" r"|videos|trending|search/?$)" r"([^/?#]+)/?$") example = "https://fapello.com/model/" def __init__(self, match): Extractor.__init__(self, match) + self.root = text.root_from_url(match.group(0)) self.model = match.group(1) def items(self): num = 1 data = {"_extractor": FapelloPostExtractor} while True: - url = "https://fapello.com/ajax/model/{}/page-{}/".format( - self.model, num) + url = "{}/ajax/model/{}/page-{}/".format( + self.root, self.model, num) page = self.request(url).text if not page: return for url in text.extract_iter(page, '<a href="', '"'): + if url == "javascript:void(0);": + continue yield Message.Queue, url, data num += 1 @@ -77,13 +82,14 @@ class FapelloPathExtractor(Extractor): """Extractor for models and posts from fapello.com paths""" category = "fapello" subcategory = "path" - pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" + pattern = (BASE_PATTERN + r"/(?!search/?$)(top-(?:likes|followers)|videos|trending" r"|popular_videos/[^/?#]+)/?$") example = "https://fapello.com/trending/" def __init__(self, match): Extractor.__init__(self, match) + self.root = text.root_from_url(match.group(0)) self.path = match.group(1) def items(self): @@ -93,9 +99,14 @@ class FapelloPathExtractor(Extractor): else: data = {"_extractor": FapelloPostExtractor} + if "fapello.su" in self.root: + self.path = self.path.replace("-", "/") + if self.path == "trending": + data = {"_extractor": FapelloModelExtractor} + while True: - page = self.request("https://fapello.com/ajax/{}/page-{}/".format( - self.path, num)).text + page = self.request("{}/ajax/{}/page-{}/".format( + self.root, self.path, num)).text if not page: return |
