diff options
Diffstat (limited to 'gallery_dl/extractor/xvideos.py')
| -rw-r--r-- | gallery_dl/extractor/xvideos.py | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 46e574e..da9d6b0 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -11,6 +11,9 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, util +BASE_PATTERN = (r"(?:https?://)?(?:www\.)?xvideos\.com" + r"/(?:profiles|(?:amateur-|model-)?channels)") + class XvideosBase(): """Base class for xvideos extractors""" @@ -25,9 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): "{gallery[id]} {gallery[title]}") filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}" archive_fmt = "{gallery[id]}_{num}" - pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com" - r"/(?:profiles|amateur-channels|model-channels)" - r"/([^/?#]+)/photos/(\d+)") + pattern = BASE_PATTERN + r"/([^/?#]+)/photos/(\d+)" example = "https://www.xvideos.com/profiles/USER/photos/12345" def __init__(self, match): @@ -58,22 +59,35 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): }, } - @staticmethod - def images(page): - """Return a list of all image urls for this gallery""" - return [ + def images(self, page): + results = [ (url, None) for url in text.extract_iter( page, '<a class="embed-responsive-item" href="', '"') ] + if not results: + return + + while len(results) % 500 == 0: + path = text.rextract(page, ' href="', '"', page.find(">Next</"))[0] + if not path: + break + page = self.request(self.root + path).text + results.extend( + (url, None) + for url in text.extract_iter( + page, '<a class="embed-responsive-item" href="', '"') + ) + + return results + class XvideosUserExtractor(XvideosBase, Extractor): """Extractor for user profiles on xvideos.com""" subcategory = "user" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com" - r"/profiles/([^/?#]+)/?(?:#.*)?$") + pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:#.*)?$" example = "https://www.xvideos.com/profiles/USER" def __init__(self, match): |
