Update upstream source from tag 'upstream/1.30.2'

Update to upstream version '1.30.2' with Debian dir f0dcd28a671f8600479182ff128e05ba8904a0d8
author: Unit 193 <unit193@unit193.net> 2025-07-31 01:22:07 -0400
committer: Unit 193 <unit193@unit193.net> 2025-07-31 01:22:07 -0400
commit: d9539f96cc7ac112b7d8faad022190fbbc88c745 (patch)
tree: 471249d60b9202c00d7d82abec8b296fc881292e /gallery_dl/extractor/sexcom.py
parent: 889fc15f272118bf277737b6fac29d3faeffc641 (diff)
parent: a6e995c093de8aae2e91a0787281bb34c0b871eb (diff)
1 files changed, 105 insertions, 33 deletions
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 9e7d75d..2feb64e 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019-2023 Mike Fährmann
+# Copyright 2019-2025 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -12,7 +12,7 @@ from .common import Extractor, Message
 from .. import text
 from datetime import datetime
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com(?:/[a-z]{2})?"
 
 
 class SexcomExtractor(Extractor):
@@ -24,6 +24,8 @@ class SexcomExtractor(Extractor):
     root = "https://www.sex.com"
 
     def items(self):
+        self.gifs = self.config("gifs", True)
+
         yield Message.Directory, self.metadata()
         for pin in map(self._parse_pin, self.pins()):
             if not pin:
@@ -38,6 +40,7 @@ class SexcomExtractor(Extractor):
                     pin["date"] = dt
             except Exception:
                 pass
+            pin["tags"] = [t[1:] for t in pin["tags"]]
 
             yield Message.Url, url, pin
 
@@ -63,17 +66,32 @@ class SexcomExtractor(Extractor):
             url = text.urljoin(self.root, text.unescape(url))
 
     def _parse_pin(self, url):
-        response = self.request(url, fatal=False)
+        if "/pin/" in url:
+            if url[-1] != "/":
+                url += "/"
+        elif url[-1] == "/":
+            url = url[:-1]
+
+        response = self.request(url, fatal=False, allow_redirects=False)
+        location = response.headers.get("location")
+
+        if location:
+            if location[0] == "/":
+                location = self.root + location
+            if len(location) <= 25:
+                return self.log.warning(
+                    'Unable to fetch %s: Redirect to homepage', url)
+            response = self.request(location, fatal=False)
+
         if response.status_code >= 400:
-            self.log.warning('Unable to fetch %s ("%s %s")',
-                             url, response.status_code, response.reason)
-            return None
+            return self.log.warning('Unable to fetch %s: %s %s',
+                                    url, response.status_code, response.reason)
 
         if "/pin/" in response.url:
             return self._parse_pin_legacy(response)
         if "/videos/" in response.url:
             return self._parse_pin_video(response)
-        return self._parse_pin_gifs(response)
+        return self._parse_pin_image(response)
 
     def _parse_pin_legacy(self, response):
         extr = text.extract_from(response.text)
@@ -94,7 +112,7 @@ class SexcomExtractor(Extractor):
 
             if info:
                 try:
-                    path, _ = text.rextract(
+                    path = text.rextr(
                         info, "src: '", "'", info.index("label: 'HD'"))
                 except ValueError:
                     path = text.extr(info, "src: '", "'")
@@ -124,20 +142,31 @@ class SexcomExtractor(Extractor):
 
         return data
 
-    def _parse_pin_gifs(self, response):
+    def _parse_pin_image(self, response):
         extr = text.extract_from(response.text)
+        href = extr(' href="', '"').partition("?")[0]
+        title, _, type = extr("<title>", " | ").rpartition(" ")
 
         data = {
             "_http_headers": {"Referer": response.url},
-            "type": "gif",
-            "url": extr(' href="', '"'),
-            "title": text.unescape(extr("<title>", " Gif | Sex.com<")),
+            "url": href,
+            "title": text.unescape(title),
             "pin_id": text.parse_int(extr(
                 'rel="canonical" href="', '"').rpartition("/")[2]),
             "tags": text.split_html(extr("</h1>", "</section>")),
         }
 
-        return text.nameext_from_url(data["url"], data)
+        text.nameext_from_url(href, data)
+        if type.lower() == "pic":
+            data["type"] = "picture"
+        else:
+            data["type"] = "gif"
+            if self.gifs and data["extension"] == "webp":
+                data["extension"] = "gif"
+                data["_fallback"] = (href,)
+                data["url"] = href[:-4] + "gif"
+
+        return data
 
     def _parse_pin_video(self, response):
         extr = text.extract_from(response.text)
@@ -147,6 +176,7 @@ class SexcomExtractor(Extractor):
 
         data = {
             "_ytdl_manifest": "hls",
+            "_ytdl_manifest_headers": {"Referer": response.url},
             "extension": "mp4",
             "type": "video",
             "title": text.unescape(extr("<title>", " | Sex.com<")),
@@ -166,7 +196,7 @@ class SexcomPinExtractor(SexcomExtractor):
     subcategory = "pin"
     directory_fmt = ("{category}",)
     pattern = (BASE_PATTERN +
-               r"(/(?:pin|\w\w/(?:gif|video)s)/\d+/?)(?!.*#related$)")
+               r"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)")
     example = "https://www.sex.com/pin/12345-TITLE/"
 
     def pins(self):
@@ -185,8 +215,8 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
         return {"original_pin": pin}
 
     def pins(self):
-        url = "{}/pin/related?pinId={}&limit=24&offset=0".format(
-            self.root, self.groups[1])
+        url = (f"{self.root}/pin/related?pinId={self.groups[1]}"
+               f"&limit=24&offset=0")
         return self._pagination(url)
 
 
@@ -201,7 +231,7 @@ class SexcomPinsExtractor(SexcomExtractor):
         return {"user": text.unquote(self.groups[0])}
 
     def pins(self):
-        url = "{}/user/{}/pins/".format(self.root, self.groups[0])
+        url = f"{self.root}/user/{self.groups[0]}/pins/"
         return self._pagination(url)
 
 
@@ -216,7 +246,7 @@ class SexcomLikesExtractor(SexcomExtractor):
         return {"user": text.unquote(self.groups[0])}
 
     def pins(self):
-        url = "{}/user/{}/likes/".format(self.root, self.groups[0])
+        url = f"{self.root}/user/{self.groups[0]}/likes/"
         return self._pagination(url)
 
 
@@ -236,33 +266,75 @@ class SexcomBoardExtractor(SexcomExtractor):
         }
 
     def pins(self):
-        url = "{}/user/{}/{}/".format(self.root, self.user, self.board)
+        url = f"{self.root}/user/{self.user}/{self.board}/"
         return self._pagination(url)
 
 
 class SexcomSearchExtractor(SexcomExtractor):
     """Extractor for search results on www.sex.com"""
     subcategory = "search"
-    directory_fmt = ("{category}", "search", "{search[query]}")
-    pattern = (BASE_PATTERN + r"/((?:"
-               r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s"
-               r")/?(?:\?([^#]+))?)")
+    directory_fmt = ("{category}", "search", "{search[search]}")
+    pattern = (BASE_PATTERN + r"/(?:"
+               r"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))"
+               r"|search/(pic|gif|video)s"
+               r")/?(?:\?([^#]+))?")
     example = "https://www.sex.com/search/pics?query=QUERY"
 
     def _init(self):
-        self.path, t1, query_alt, t2, query = self.groups
+        t1, qs1, search_alt, t2, qs2 = self.groups
 
-        self.search = text.parse_query(query)
-        self.search["type"] = t1 or t2
-        if "query" not in self.search:
-            self.search["query"] = query_alt or ""
+        self.params = params = text.parse_query(qs1 or qs2)
+        if "query" in params:
+            params["search"] = params.pop("query")
+        params.setdefault("sexual-orientation", "straight")
+        params.setdefault("order", "likeCount")
+        params.setdefault("search", search_alt or "")
 
-    def metadata(self):
-        return {"search": self.search}
+        self.kwdict["search"] = search = params.copy()
+        search["type"] = self.type = t1 or t2
 
-    def pins(self):
-        url = "{}/{}".format(self.root, self.path)
-        return self._pagination(url)
+    def items(self):
+        root = "https://imagex1.sx.cdn.live"
+        type = self.type
+        gifs = self.config("gifs", True)
+
+        url = (f"{self.root}/portal/api/"
+               f"{'picture' if type == 'pic' else type}s/search")
+        params = self.params
+        params["page"] = text.parse_int(params.get("page"), 1)
+        params["limit"] = 40
+
+        while True:
+            data = self.request_json(url, params=params)
+
+            for pin in data["data"]:
+                path = pin["uri"]
+                pin["pin_id"] = pin.pop("id")
+                text.nameext_from_url(path, pin)
+
+                parts = path.rsplit("/", 4)
+                try:
+                    pin["date_url"] = pin["date"] = datetime(
+                        int(parts[1]), int(parts[2]), int(parts[3]))
+                except Exception:
+                    pass
+
+                if type == "pic":
+                    pin["type"] = "picture"
+                else:
+                    pin["type"] = "gif"
+                    if gifs and pin["extension"] == "webp":
+                        pin["extension"] = "gif"
+                        pin["_fallback"] = (f"{root}{path}",)
+                        path = f"{path[:-4]}gif"
+
+                pin["url"] = f"{root}{path}"
+                yield Message.Directory, pin
+                yield Message.Url, pin["url"], pin
+
+            if params["page"] >= data["paging"]["numberOfPages"]:
+                break
+            params["page"] += 1
 
 
 def _check_empty(response):
author	Unit 193 <unit193@unit193.net>	2025-07-31 01:22:07 -0400
committer	Unit 193 <unit193@unit193.net>	2025-07-31 01:22:07 -0400
commit	d9539f96cc7ac112b7d8faad022190fbbc88c745 (patch)
tree	471249d60b9202c00d7d82abec8b296fc881292e /gallery_dl/extractor/sexcom.py
parent	889fc15f272118bf277737b6fac29d3faeffc641 (diff)
parent	a6e995c093de8aae2e91a0787281bb34c0b871eb (diff)