diff options
| author | 2024-03-25 02:57:44 -0400 | |
|---|---|---|
| committer | 2024-03-25 02:57:44 -0400 | |
| commit | 6e662211019a89caec44de8a57c675872b0b5498 (patch) | |
| tree | 5d9d5a2b7efc3a24dd6074e99b253b639fe5af1d /gallery_dl/extractor/xvideos.py | |
| parent | 01166fa52707cc282467427cf0e65c1b8983c4be (diff) | |
New upstream version 1.26.9.upstream/1.26.9
Diffstat (limited to 'gallery_dl/extractor/xvideos.py')
| -rw-r--r-- | gallery_dl/extractor/xvideos.py | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 46e574e..da9d6b0 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -11,6 +11,9 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, util +BASE_PATTERN = (r"(?:https?://)?(?:www\.)?xvideos\.com" + r"/(?:profiles|(?:amateur-|model-)?channels)") + class XvideosBase(): """Base class for xvideos extractors""" @@ -25,9 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): "{gallery[id]} {gallery[title]}") filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}" archive_fmt = "{gallery[id]}_{num}" - pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com" - r"/(?:profiles|amateur-channels|model-channels)" - r"/([^/?#]+)/photos/(\d+)") + pattern = BASE_PATTERN + r"/([^/?#]+)/photos/(\d+)" example = "https://www.xvideos.com/profiles/USER/photos/12345" def __init__(self, match): @@ -58,22 +59,35 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): }, } - @staticmethod - def images(page): - """Return a list of all image urls for this gallery""" - return [ + def images(self, page): + results = [ (url, None) for url in text.extract_iter( page, '<a class="embed-responsive-item" href="', '"') ] + if not results: + return + + while len(results) % 500 == 0: + path = text.rextract(page, ' href="', '"', page.find(">Next</"))[0] + if not path: + break + page = self.request(self.root + path).text + results.extend( + (url, None) + for url in text.extract_iter( + page, '<a class="embed-responsive-item" href="', '"') + ) + + return results + class XvideosUserExtractor(XvideosBase, Extractor): """Extractor for user profiles on xvideos.com""" subcategory = "user" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com" - r"/profiles/([^/?#]+)/?(?:#.*)?$") + pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:#.*)?$" example = "https://www.xvideos.com/profiles/USER" def __init__(self, match): |
