summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/sexcom.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/sexcom.py')
-rw-r--r--gallery_dl/extractor/sexcom.py138
1 files changed, 105 insertions, 33 deletions
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 9e7d75d..2feb64e 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2023 Mike Fährmann
+# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -12,7 +12,7 @@ from .common import Extractor, Message
from .. import text
from datetime import datetime
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com(?:/[a-z]{2})?"
class SexcomExtractor(Extractor):
@@ -24,6 +24,8 @@ class SexcomExtractor(Extractor):
root = "https://www.sex.com"
def items(self):
+ self.gifs = self.config("gifs", True)
+
yield Message.Directory, self.metadata()
for pin in map(self._parse_pin, self.pins()):
if not pin:
@@ -38,6 +40,7 @@ class SexcomExtractor(Extractor):
pin["date"] = dt
except Exception:
pass
+ pin["tags"] = [t[1:] for t in pin["tags"]]
yield Message.Url, url, pin
@@ -63,17 +66,32 @@ class SexcomExtractor(Extractor):
url = text.urljoin(self.root, text.unescape(url))
def _parse_pin(self, url):
- response = self.request(url, fatal=False)
+ if "/pin/" in url:
+ if url[-1] != "/":
+ url += "/"
+ elif url[-1] == "/":
+ url = url[:-1]
+
+ response = self.request(url, fatal=False, allow_redirects=False)
+ location = response.headers.get("location")
+
+ if location:
+ if location[0] == "/":
+ location = self.root + location
+ if len(location) <= 25:
+ return self.log.warning(
+ 'Unable to fetch %s: Redirect to homepage', url)
+ response = self.request(location, fatal=False)
+
if response.status_code >= 400:
- self.log.warning('Unable to fetch %s ("%s %s")',
- url, response.status_code, response.reason)
- return None
+ return self.log.warning('Unable to fetch %s: %s %s',
+ url, response.status_code, response.reason)
if "/pin/" in response.url:
return self._parse_pin_legacy(response)
if "/videos/" in response.url:
return self._parse_pin_video(response)
- return self._parse_pin_gifs(response)
+ return self._parse_pin_image(response)
def _parse_pin_legacy(self, response):
extr = text.extract_from(response.text)
@@ -94,7 +112,7 @@ class SexcomExtractor(Extractor):
if info:
try:
- path, _ = text.rextract(
+ path = text.rextr(
info, "src: '", "'", info.index("label: 'HD'"))
except ValueError:
path = text.extr(info, "src: '", "'")
@@ -124,20 +142,31 @@ class SexcomExtractor(Extractor):
return data
- def _parse_pin_gifs(self, response):
+ def _parse_pin_image(self, response):
extr = text.extract_from(response.text)
+ href = extr(' href="', '"').partition("?")[0]
+ title, _, type = extr("<title>", " | ").rpartition(" ")
data = {
"_http_headers": {"Referer": response.url},
- "type": "gif",
- "url": extr(' href="', '"'),
- "title": text.unescape(extr("<title>", " Gif | Sex.com<")),
+ "url": href,
+ "title": text.unescape(title),
"pin_id": text.parse_int(extr(
'rel="canonical" href="', '"').rpartition("/")[2]),
"tags": text.split_html(extr("</h1>", "</section>")),
}
- return text.nameext_from_url(data["url"], data)
+ text.nameext_from_url(href, data)
+ if type.lower() == "pic":
+ data["type"] = "picture"
+ else:
+ data["type"] = "gif"
+ if self.gifs and data["extension"] == "webp":
+ data["extension"] = "gif"
+ data["_fallback"] = (href,)
+ data["url"] = href[:-4] + "gif"
+
+ return data
def _parse_pin_video(self, response):
extr = text.extract_from(response.text)
@@ -147,6 +176,7 @@ class SexcomExtractor(Extractor):
data = {
"_ytdl_manifest": "hls",
+ "_ytdl_manifest_headers": {"Referer": response.url},
"extension": "mp4",
"type": "video",
"title": text.unescape(extr("<title>", " | Sex.com<")),
@@ -166,7 +196,7 @@ class SexcomPinExtractor(SexcomExtractor):
subcategory = "pin"
directory_fmt = ("{category}",)
pattern = (BASE_PATTERN +
- r"(/(?:pin|\w\w/(?:gif|video)s)/\d+/?)(?!.*#related$)")
+ r"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)")
example = "https://www.sex.com/pin/12345-TITLE/"
def pins(self):
@@ -185,8 +215,8 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
return {"original_pin": pin}
def pins(self):
- url = "{}/pin/related?pinId={}&limit=24&offset=0".format(
- self.root, self.groups[1])
+ url = (f"{self.root}/pin/related?pinId={self.groups[1]}"
+ f"&limit=24&offset=0")
return self._pagination(url)
@@ -201,7 +231,7 @@ class SexcomPinsExtractor(SexcomExtractor):
return {"user": text.unquote(self.groups[0])}
def pins(self):
- url = "{}/user/{}/pins/".format(self.root, self.groups[0])
+ url = f"{self.root}/user/{self.groups[0]}/pins/"
return self._pagination(url)
@@ -216,7 +246,7 @@ class SexcomLikesExtractor(SexcomExtractor):
return {"user": text.unquote(self.groups[0])}
def pins(self):
- url = "{}/user/{}/likes/".format(self.root, self.groups[0])
+ url = f"{self.root}/user/{self.groups[0]}/likes/"
return self._pagination(url)
@@ -236,33 +266,75 @@ class SexcomBoardExtractor(SexcomExtractor):
}
def pins(self):
- url = "{}/user/{}/{}/".format(self.root, self.user, self.board)
+ url = f"{self.root}/user/{self.user}/{self.board}/"
return self._pagination(url)
class SexcomSearchExtractor(SexcomExtractor):
"""Extractor for search results on www.sex.com"""
subcategory = "search"
- directory_fmt = ("{category}", "search", "{search[query]}")
- pattern = (BASE_PATTERN + r"/((?:"
- r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s"
- r")/?(?:\?([^#]+))?)")
+ directory_fmt = ("{category}", "search", "{search[search]}")
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))"
+ r"|search/(pic|gif|video)s"
+ r")/?(?:\?([^#]+))?")
example = "https://www.sex.com/search/pics?query=QUERY"
def _init(self):
- self.path, t1, query_alt, t2, query = self.groups
+ t1, qs1, search_alt, t2, qs2 = self.groups
- self.search = text.parse_query(query)
- self.search["type"] = t1 or t2
- if "query" not in self.search:
- self.search["query"] = query_alt or ""
+ self.params = params = text.parse_query(qs1 or qs2)
+ if "query" in params:
+ params["search"] = params.pop("query")
+ params.setdefault("sexual-orientation", "straight")
+ params.setdefault("order", "likeCount")
+ params.setdefault("search", search_alt or "")
- def metadata(self):
- return {"search": self.search}
+ self.kwdict["search"] = search = params.copy()
+ search["type"] = self.type = t1 or t2
- def pins(self):
- url = "{}/{}".format(self.root, self.path)
- return self._pagination(url)
+ def items(self):
+ root = "https://imagex1.sx.cdn.live"
+ type = self.type
+ gifs = self.config("gifs", True)
+
+ url = (f"{self.root}/portal/api/"
+ f"{'picture' if type == 'pic' else type}s/search")
+ params = self.params
+ params["page"] = text.parse_int(params.get("page"), 1)
+ params["limit"] = 40
+
+ while True:
+ data = self.request_json(url, params=params)
+
+ for pin in data["data"]:
+ path = pin["uri"]
+ pin["pin_id"] = pin.pop("id")
+ text.nameext_from_url(path, pin)
+
+ parts = path.rsplit("/", 4)
+ try:
+ pin["date_url"] = pin["date"] = datetime(
+ int(parts[1]), int(parts[2]), int(parts[3]))
+ except Exception:
+ pass
+
+ if type == "pic":
+ pin["type"] = "picture"
+ else:
+ pin["type"] = "gif"
+ if gifs and pin["extension"] == "webp":
+ pin["extension"] = "gif"
+ pin["_fallback"] = (f"{root}{path}",)
+ path = f"{path[:-4]}gif"
+
+ pin["url"] = f"{root}{path}"
+ yield Message.Directory, pin
+ yield Message.Url, pin["url"], pin
+
+ if params["page"] >= data["paging"]["numberOfPages"]:
+ break
+ params["page"] += 1
def _check_empty(response):