diff options
| author | 2025-07-31 01:22:07 -0400 | |
|---|---|---|
| committer | 2025-07-31 01:22:07 -0400 | |
| commit | d9539f96cc7ac112b7d8faad022190fbbc88c745 (patch) | |
| tree | 471249d60b9202c00d7d82abec8b296fc881292e /gallery_dl/extractor/sexcom.py | |
| parent | 889fc15f272118bf277737b6fac29d3faeffc641 (diff) | |
| parent | a6e995c093de8aae2e91a0787281bb34c0b871eb (diff) | |
Update upstream source from tag 'upstream/1.30.2'
Update to upstream version '1.30.2'
with Debian dir f0dcd28a671f8600479182ff128e05ba8904a0d8
Diffstat (limited to 'gallery_dl/extractor/sexcom.py')
| -rw-r--r-- | gallery_dl/extractor/sexcom.py | 138 |
1 files changed, 105 insertions, 33 deletions
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index 9e7d75d..2feb64e 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2023 Mike Fährmann +# Copyright 2019-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,7 +12,7 @@ from .common import Extractor, Message from .. import text from datetime import datetime -BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com(?:/[a-z]{2})?" class SexcomExtractor(Extractor): @@ -24,6 +24,8 @@ class SexcomExtractor(Extractor): root = "https://www.sex.com" def items(self): + self.gifs = self.config("gifs", True) + yield Message.Directory, self.metadata() for pin in map(self._parse_pin, self.pins()): if not pin: @@ -38,6 +40,7 @@ class SexcomExtractor(Extractor): pin["date"] = dt except Exception: pass + pin["tags"] = [t[1:] for t in pin["tags"]] yield Message.Url, url, pin @@ -63,17 +66,32 @@ class SexcomExtractor(Extractor): url = text.urljoin(self.root, text.unescape(url)) def _parse_pin(self, url): - response = self.request(url, fatal=False) + if "/pin/" in url: + if url[-1] != "/": + url += "/" + elif url[-1] == "/": + url = url[:-1] + + response = self.request(url, fatal=False, allow_redirects=False) + location = response.headers.get("location") + + if location: + if location[0] == "/": + location = self.root + location + if len(location) <= 25: + return self.log.warning( + 'Unable to fetch %s: Redirect to homepage', url) + response = self.request(location, fatal=False) + if response.status_code >= 400: - self.log.warning('Unable to fetch %s ("%s %s")', - url, response.status_code, response.reason) - return None + return self.log.warning('Unable to fetch %s: %s %s', + url, response.status_code, response.reason) if "/pin/" in response.url: return self._parse_pin_legacy(response) if "/videos/" in response.url: return self._parse_pin_video(response) - return self._parse_pin_gifs(response) + return self._parse_pin_image(response) def _parse_pin_legacy(self, response): extr = text.extract_from(response.text) @@ -94,7 +112,7 @@ class SexcomExtractor(Extractor): if info: try: - path, _ = text.rextract( + path = text.rextr( info, "src: '", "'", info.index("label: 'HD'")) except ValueError: path = text.extr(info, "src: '", "'") @@ -124,20 +142,31 @@ class SexcomExtractor(Extractor): return data - def _parse_pin_gifs(self, response): + def _parse_pin_image(self, response): extr = text.extract_from(response.text) + href = extr(' href="', '"').partition("?")[0] + title, _, type = extr("<title>", " | ").rpartition(" ") data = { "_http_headers": {"Referer": response.url}, - "type": "gif", - "url": extr(' href="', '"'), - "title": text.unescape(extr("<title>", " Gif | Sex.com<")), + "url": href, + "title": text.unescape(title), "pin_id": text.parse_int(extr( 'rel="canonical" href="', '"').rpartition("/")[2]), "tags": text.split_html(extr("</h1>", "</section>")), } - return text.nameext_from_url(data["url"], data) + text.nameext_from_url(href, data) + if type.lower() == "pic": + data["type"] = "picture" + else: + data["type"] = "gif" + if self.gifs and data["extension"] == "webp": + data["extension"] = "gif" + data["_fallback"] = (href,) + data["url"] = href[:-4] + "gif" + + return data def _parse_pin_video(self, response): extr = text.extract_from(response.text) @@ -147,6 +176,7 @@ class SexcomExtractor(Extractor): data = { "_ytdl_manifest": "hls", + "_ytdl_manifest_headers": {"Referer": response.url}, "extension": "mp4", "type": "video", "title": text.unescape(extr("<title>", " | Sex.com<")), @@ -166,7 +196,7 @@ class SexcomPinExtractor(SexcomExtractor): subcategory = "pin" directory_fmt = ("{category}",) pattern = (BASE_PATTERN + - r"(/(?:pin|\w\w/(?:gif|video)s)/\d+/?)(?!.*#related$)") + r"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)") example = "https://www.sex.com/pin/12345-TITLE/" def pins(self): @@ -185,8 +215,8 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor): return {"original_pin": pin} def pins(self): - url = "{}/pin/related?pinId={}&limit=24&offset=0".format( - self.root, self.groups[1]) + url = (f"{self.root}/pin/related?pinId={self.groups[1]}" + f"&limit=24&offset=0") return self._pagination(url) @@ -201,7 +231,7 @@ class SexcomPinsExtractor(SexcomExtractor): return {"user": text.unquote(self.groups[0])} def pins(self): - url = "{}/user/{}/pins/".format(self.root, self.groups[0]) + url = f"{self.root}/user/{self.groups[0]}/pins/" return self._pagination(url) @@ -216,7 +246,7 @@ class SexcomLikesExtractor(SexcomExtractor): return {"user": text.unquote(self.groups[0])} def pins(self): - url = "{}/user/{}/likes/".format(self.root, self.groups[0]) + url = f"{self.root}/user/{self.groups[0]}/likes/" return self._pagination(url) @@ -236,33 +266,75 @@ class SexcomBoardExtractor(SexcomExtractor): } def pins(self): - url = "{}/user/{}/{}/".format(self.root, self.user, self.board) + url = f"{self.root}/user/{self.user}/{self.board}/" return self._pagination(url) class SexcomSearchExtractor(SexcomExtractor): """Extractor for search results on www.sex.com""" subcategory = "search" - directory_fmt = ("{category}", "search", "{search[query]}") - pattern = (BASE_PATTERN + r"/((?:" - r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s" - r")/?(?:\?([^#]+))?)") + directory_fmt = ("{category}", "search", "{search[search]}") + pattern = (BASE_PATTERN + r"/(?:" + r"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))" + r"|search/(pic|gif|video)s" + r")/?(?:\?([^#]+))?") example = "https://www.sex.com/search/pics?query=QUERY" def _init(self): - self.path, t1, query_alt, t2, query = self.groups + t1, qs1, search_alt, t2, qs2 = self.groups - self.search = text.parse_query(query) - self.search["type"] = t1 or t2 - if "query" not in self.search: - self.search["query"] = query_alt or "" + self.params = params = text.parse_query(qs1 or qs2) + if "query" in params: + params["search"] = params.pop("query") + params.setdefault("sexual-orientation", "straight") + params.setdefault("order", "likeCount") + params.setdefault("search", search_alt or "") - def metadata(self): - return {"search": self.search} + self.kwdict["search"] = search = params.copy() + search["type"] = self.type = t1 or t2 - def pins(self): - url = "{}/{}".format(self.root, self.path) - return self._pagination(url) + def items(self): + root = "https://imagex1.sx.cdn.live" + type = self.type + gifs = self.config("gifs", True) + + url = (f"{self.root}/portal/api/" + f"{'picture' if type == 'pic' else type}s/search") + params = self.params + params["page"] = text.parse_int(params.get("page"), 1) + params["limit"] = 40 + + while True: + data = self.request_json(url, params=params) + + for pin in data["data"]: + path = pin["uri"] + pin["pin_id"] = pin.pop("id") + text.nameext_from_url(path, pin) + + parts = path.rsplit("/", 4) + try: + pin["date_url"] = pin["date"] = datetime( + int(parts[1]), int(parts[2]), int(parts[3])) + except Exception: + pass + + if type == "pic": + pin["type"] = "picture" + else: + pin["type"] = "gif" + if gifs and pin["extension"] == "webp": + pin["extension"] = "gif" + pin["_fallback"] = (f"{root}{path}",) + path = f"{path[:-4]}gif" + + pin["url"] = f"{root}{path}" + yield Message.Directory, pin + yield Message.Url, pin["url"], pin + + if params["page"] >= data["paging"]["numberOfPages"]: + break + params["page"] += 1 def _check_empty(response): |
