aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/sexcom.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-03-29 07:19:58 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-03-29 07:19:58 -0400
commit662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch)
tree537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/sexcom.py
parent8026a3c45446030d7af524bfc487d3462c8114ef (diff)
New upstream version 1.29.3.upstream/1.29.3
Diffstat (limited to 'gallery_dl/extractor/sexcom.py')
-rw-r--r--gallery_dl/extractor/sexcom.py121
1 files changed, 83 insertions, 38 deletions
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 7708b5c..9e7d75d 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -10,6 +10,9 @@
from .common import Extractor, Message
from .. import text
+from datetime import datetime
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com"
class SexcomExtractor(Extractor):
@@ -23,8 +26,20 @@ class SexcomExtractor(Extractor):
def items(self):
yield Message.Directory, self.metadata()
for pin in map(self._parse_pin, self.pins()):
- if pin:
- yield Message.Url, pin["url"], pin
+ if not pin:
+ continue
+
+ url = pin["url"]
+ parts = url.rsplit("/", 4)
+ try:
+ pin["date_url"] = dt = datetime(
+ int(parts[1]), int(parts[2]), int(parts[3]))
+ if "date" not in pin:
+ pin["date"] = dt
+ except Exception:
+ pass
+
+ yield Message.Url, url, pin
def metadata(self):
return {}
@@ -53,10 +68,18 @@ class SexcomExtractor(Extractor):
self.log.warning('Unable to fetch %s ("%s %s")',
url, response.status_code, response.reason)
return None
+
+ if "/pin/" in response.url:
+ return self._parse_pin_legacy(response)
+ if "/videos/" in response.url:
+ return self._parse_pin_video(response)
+ return self._parse_pin_gifs(response)
+
+ def _parse_pin_legacy(self, response):
extr = text.extract_from(response.text)
data = {}
- data["_http_headers"] = {"Referer": url}
+ data["_http_headers"] = {"Referer": response.url}
data["thumbnail"] = extr('itemprop="thumbnail" content="', '"')
data["type"] = extr('<h1>' , '<').rstrip(" -").strip().lower()
data["title"] = text.unescape(extr('itemprop="name">' , '<'))
@@ -82,7 +105,8 @@ class SexcomExtractor(Extractor):
src = (text.extr(iframe, ' src="', '"') or
text.extr(iframe, " src='", "'"))
if not src:
- self.log.warning("Unable to fetch media from %s", url)
+ self.log.warning(
+ "Unable to fetch media from %s", response.url)
return None
data["extension"] = None
data["url"] = "ytdl:" + src
@@ -100,27 +124,60 @@ class SexcomExtractor(Extractor):
return data
+ def _parse_pin_gifs(self, response):
+ extr = text.extract_from(response.text)
+
+ data = {
+ "_http_headers": {"Referer": response.url},
+ "type": "gif",
+ "url": extr(' href="', '"'),
+ "title": text.unescape(extr("<title>", " Gif | Sex.com<")),
+ "pin_id": text.parse_int(extr(
+ 'rel="canonical" href="', '"').rpartition("/")[2]),
+ "tags": text.split_html(extr("</h1>", "</section>")),
+ }
+
+ return text.nameext_from_url(data["url"], data)
+
+ def _parse_pin_video(self, response):
+ extr = text.extract_from(response.text)
+
+ if not self.cookies.get("CloudFront-Key-Pair-Id", domain=".sex.com"):
+ self.log.warning("CloudFront cookies required for video downloads")
+
+ data = {
+ "_ytdl_manifest": "hls",
+ "extension": "mp4",
+ "type": "video",
+ "title": text.unescape(extr("<title>", " | Sex.com<")),
+ "pin_id": text.parse_int(extr(
+ 'rel="canonical" href="', '"').rpartition("/")[2]),
+ "tags": text.split_html(extr(
+ 'event_name="video_tags_click"', "<div data-testid=")
+ .partition(">")[2]),
+ "url": "ytdl:" + extr('<source src="', '"'),
+ }
+
+ return data
+
class SexcomPinExtractor(SexcomExtractor):
"""Extractor for a pinned image or video on www.sex.com"""
subcategory = "pin"
directory_fmt = ("{category}",)
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)"
+ pattern = (BASE_PATTERN +
+ r"(/(?:pin|\w\w/(?:gif|video)s)/\d+/?)(?!.*#related$)")
example = "https://www.sex.com/pin/12345-TITLE/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.pin_id = match.group(1)
-
def pins(self):
- return ("{}/pin/{}/".format(self.root, self.pin_id),)
+ return (self.root + self.groups[0],)
class SexcomRelatedPinExtractor(SexcomPinExtractor):
"""Extractor for related pins on www.sex.com"""
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[pin_id]}")
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$"
+ pattern = BASE_PATTERN + r"(/pin/(\d+)/?).*#related$"
example = "https://www.sex.com/pin/12345#related"
def metadata(self):
@@ -129,7 +186,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
def pins(self):
url = "{}/pin/related?pinId={}&limit=24&offset=0".format(
- self.root, self.pin_id)
+ self.root, self.groups[1])
return self._pagination(url)
@@ -137,18 +194,14 @@ class SexcomPinsExtractor(SexcomExtractor):
"""Extractor for a user's pins on www.sex.com"""
subcategory = "pins"
directory_fmt = ("{category}", "{user}")
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/pins/"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/pins/"
example = "https://www.sex.com/user/USER/pins/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.user = match.group(1)
-
def metadata(self):
- return {"user": text.unquote(self.user)}
+ return {"user": text.unquote(self.groups[0])}
def pins(self):
- url = "{}/user/{}/pins/".format(self.root, self.user)
+ url = "{}/user/{}/pins/".format(self.root, self.groups[0])
return self._pagination(url)
@@ -156,18 +209,14 @@ class SexcomLikesExtractor(SexcomExtractor):
"""Extractor for a user's liked pins on www.sex.com"""
subcategory = "likes"
directory_fmt = ("{category}", "{user}", "Likes")
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/likes/"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/likes/"
example = "https://www.sex.com/user/USER/likes/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.user = match.group(1)
-
def metadata(self):
- return {"user": text.unquote(self.user)}
+ return {"user": text.unquote(self.groups[0])}
def pins(self):
- url = "{}/user/{}/likes/".format(self.root, self.user)
+ url = "{}/user/{}/likes/".format(self.root, self.groups[0])
return self._pagination(url)
@@ -175,15 +224,12 @@ class SexcomBoardExtractor(SexcomExtractor):
"""Extractor for pins from a board on www.sex.com"""
subcategory = "board"
directory_fmt = ("{category}", "{user}", "{board}")
- pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user"
+ pattern = (BASE_PATTERN + r"/user"
r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
example = "https://www.sex.com/user/USER/BOARD/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.user, self.board = match.groups()
-
def metadata(self):
+ self.user, self.board = self.groups
return {
"user" : text.unquote(self.user),
"board": text.unquote(self.board),
@@ -198,19 +244,18 @@ class SexcomSearchExtractor(SexcomExtractor):
"""Extractor for search results on www.sex.com"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search[query]}")
- pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:"
+ pattern = (BASE_PATTERN + r"/((?:"
r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s"
r")/?(?:\?([^#]+))?)")
example = "https://www.sex.com/search/pics?query=QUERY"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.path = match.group(1)
+ def _init(self):
+ self.path, t1, query_alt, t2, query = self.groups
- self.search = text.parse_query(match.group(5))
- self.search["type"] = match.group(2) or match.group(4)
+ self.search = text.parse_query(query)
+ self.search["type"] = t1 or t2
if "query" not in self.search:
- self.search["query"] = match.group(3) or ""
+ self.search["query"] = query_alt or ""
def metadata(self):
return {"search": self.search}