summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/xvideos.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-12-25 19:40:28 -0500
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-12-25 19:40:28 -0500
commitf9a1a9dcb7df977eeac9544786df9c0b93795815 (patch)
tree8cb69cf7685da8d7e4deb7dc1d6b209098e1ddfb /gallery_dl/extractor/xvideos.py
parent0c73e982fa596da07f23b377621ab894a9e64884 (diff)
New upstream version 1.12.1upstream/1.12.1
Diffstat (limited to 'gallery_dl/extractor/xvideos.py')
-rw-r--r--gallery_dl/extractor/xvideos.py126
1 files changed, 64 insertions, 62 deletions
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index e253b7f..80a3614 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -6,86 +6,91 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.xvideos.com/"""
+"""Extractors for https://www.xvideos.com/"""
-from .common import Extractor, Message
-from .. import text, exception
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
import json
-class XvideosExtractor(Extractor):
+class XvideosBase():
"""Base class for xvideos extractors"""
category = "xvideos"
root = "https://www.xvideos.com"
-class XvideosGalleryExtractor(XvideosExtractor):
- """Extractor for user profile galleries from xvideos.com"""
+class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
+ """Extractor for user profile galleries on xvideos.com"""
subcategory = "gallery"
- directory_fmt = ("{category}", "{user[name]}", "{title}")
- filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
- archive_fmt = "{gallery_id}_{num}"
+ directory_fmt = ("{category}", "{user[name]}",
+ "{gallery[id]} {gallery[title]}")
+ filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}"
+ archive_fmt = "{gallery[id]}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
- r"/profiles/([^/?&#]+)/photos/(\d+)")
+ r"/(?:profiles|amateur-channels|model-channels)"
+ r"/([^/?&#]+)/photos/(\d+)")
test = (
- (("https://www.xvideos.com/profiles"
- "/pervertedcouple/photos/751031/random_stuff"), {
+ ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
"url": "4f0d992e5dc39def2c3ac8e099d17bf09e76e3c7",
- "keyword": "65979d63a69576cf692b41d5fbbd995cc40a51b9",
- }),
- ("https://www.xvideos.com/profiles/pervertedcouple/photos/751032/", {
- "exception": exception.NotFoundError,
+ "keyword": {
+ "gallery": {
+ "id" : 751031,
+ "title": "Random Stuff",
+ "tags" : list,
+ },
+ "user": {
+ "id" : 20245371,
+ "name" : "pervertedcouple",
+ "display" : "Pervertedcouple",
+ "sex" : "Woman",
+ "description": str,
+ },
+ },
}),
+ ("https://www.xvideos.com/amateur-channels/pervertedcouple/photos/12"),
+ ("https://www.xvideos.com/model-channels/pervertedcouple/photos/12"),
)
def __init__(self, match):
- XvideosExtractor.__init__(self, match)
- self.user, self.gid = match.groups()
-
- def items(self):
- url = "{}/profiles/{}/photos/{}".format(self.root, self.user, self.gid)
- page = self.request(url, notfound=self.subcategory).text
- data = self.get_metadata(page)
- imgs = self.get_images(page)
- data["count"] = len(imgs)
- yield Message.Version, 1
- yield Message.Directory, data
- for url in imgs:
- data["num"] = text.parse_int(url.rsplit("_", 2)[1])
- data["extension"] = url.rpartition(".")[2]
- yield Message.Url, url, data
-
- def get_metadata(self, page):
- """Collect metadata for extractor-job"""
- data = text.extract_all(page, (
- ("userid" , '"id_user":', ','),
- ("display", '"display":"', '"'),
- ("title" , '"title":"', '"'),
- ("descr" , '<small class="mobile-hide">', '</small>'),
- ("tags" , '<em>Tagged:</em>', '<'),
- ))[0]
+ self.user, self.gallery_id = match.groups()
+ url = "{}/profiles/{}/photos/{}".format(
+ self.root, self.user, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ user = {
+ "id" : text.parse_int(extr('"id_user":', ',')),
+ "display": extr('"display":"', '"'),
+ "sex" : extr('"sex":"', '"'),
+ "name" : self.user,
+ }
+ title = extr('"title":"', '"')
+ user["description"] = extr(
+ '<small class="mobile-hide">', '</small>').strip()
+ tags = extr('<em>Tagged:</em>', '<').strip()
return {
- "user": {
- "id": text.parse_int(data["userid"]),
- "name": self.user,
- "display": data["display"],
- "description": data["descr"].strip(),
+ "user": user,
+ "gallery": {
+ "id" : text.parse_int(self.gallery_id),
+ "title": text.unescape(title),
+ "tags" : text.unescape(tags).split(", ") if tags else [],
},
- "tags": text.unescape(data["tags"] or "").strip().split(", "),
- "title": text.unescape(data["title"]),
- "gallery_id": text.parse_int(self.gid),
}
@staticmethod
- def get_images(page):
+ def images(page):
"""Return a list of all image urls for this gallery"""
- return list(text.extract_iter(
- page, '<a class="embed-responsive-item" href="', '"'))
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, '<a class="embed-responsive-item" href="', '"')
+ ]
-class XvideosUserExtractor(XvideosExtractor):
- """Extractor for user profiles from xvideos.com"""
+class XvideosUserExtractor(XvideosBase, Extractor):
+ """Extractor for user profiles on xvideos.com"""
subcategory = "user"
categorytransfer = True
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
@@ -93,16 +98,13 @@ class XvideosUserExtractor(XvideosExtractor):
test = (
("https://www.xvideos.com/profiles/pervertedcouple", {
"url": "a413f3e60d6d3a2de79bd44fa3b7a9c03db4336e",
- "keyword": "a796760d34732adc7ec52a8feb057515209a2ca6",
- }),
- ("https://www.xvideos.com/profiles/niwehrwhernvh", {
- "exception": exception.NotFoundError,
+ "keyword": "335a3304941ff2e666c0201e9122819b61b34adb",
}),
("https://www.xvideos.com/profiles/pervertedcouple#_tabPhotos"),
)
def __init__(self, match):
- XvideosExtractor.__init__(self, match)
+ Extractor.__init__(self, match)
self.user = match.group(1)
def items(self):
@@ -118,17 +120,17 @@ class XvideosUserExtractor(XvideosExtractor):
galleries = [
{
- "gallery_id": text.parse_int(gid),
+ "id" : text.parse_int(gid),
"title": text.unescape(gdata["title"]),
"count": gdata["nb_pics"],
"_extractor": XvideosGalleryExtractor,
}
for gid, gdata in data["galleries"].items()
]
- galleries.sort(key=lambda x: x["gallery_id"])
+ galleries.sort(key=lambda x: x["id"])
yield Message.Version, 1
for gallery in galleries:
url = "https://www.xvideos.com/profiles/{}/photos/{}".format(
- self.user, gallery["gallery_id"])
+ self.user, gallery["id"])
yield Message.Queue, url, gallery