New upstream version 1.29.3.upstream/1.29.3

author: Unit 193 <unit193@unit193.net> 2025-03-29 07:19:58 -0400
committer: Unit 193 <unit193@unit193.net> 2025-03-29 07:19:58 -0400
commit: 662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch)
tree: 537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/sexcom.py
parent: 8026a3c45446030d7af524bfc487d3462c8114ef (diff)
1 files changed, 83 insertions, 38 deletions
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 7708b5c..9e7d75d 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -10,6 +10,9 @@
 
 from .common import Extractor, Message
 from .. import text
+from datetime import datetime
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com"
 
 
 class SexcomExtractor(Extractor):
@@ -23,8 +26,20 @@ class SexcomExtractor(Extractor):
     def items(self):
         yield Message.Directory, self.metadata()
         for pin in map(self._parse_pin, self.pins()):
-            if pin:
-                yield Message.Url, pin["url"], pin
+            if not pin:
+                continue
+
+            url = pin["url"]
+            parts = url.rsplit("/", 4)
+            try:
+                pin["date_url"] = dt = datetime(
+                    int(parts[1]), int(parts[2]), int(parts[3]))
+                if "date" not in pin:
+                    pin["date"] = dt
+            except Exception:
+                pass
+
+            yield Message.Url, url, pin
 
     def metadata(self):
         return {}
@@ -53,10 +68,18 @@ class SexcomExtractor(Extractor):
             self.log.warning('Unable to fetch %s ("%s %s")',
                              url, response.status_code, response.reason)
             return None
+
+        if "/pin/" in response.url:
+            return self._parse_pin_legacy(response)
+        if "/videos/" in response.url:
+            return self._parse_pin_video(response)
+        return self._parse_pin_gifs(response)
+
+    def _parse_pin_legacy(self, response):
         extr = text.extract_from(response.text)
         data = {}
 
-        data["_http_headers"] = {"Referer": url}
+        data["_http_headers"] = {"Referer": response.url}
         data["thumbnail"] = extr('itemprop="thumbnail" content="', '"')
         data["type"] = extr('<h1>' , '<').rstrip(" -").strip().lower()
         data["title"] = text.unescape(extr('itemprop="name">' , '<'))
@@ -82,7 +105,8 @@ class SexcomExtractor(Extractor):
                 src = (text.extr(iframe, ' src="', '"') or
                        text.extr(iframe, " src='", "'"))
                 if not src:
-                    self.log.warning("Unable to fetch media from %s", url)
+                    self.log.warning(
+                        "Unable to fetch media from %s", response.url)
                     return None
                 data["extension"] = None
                 data["url"] = "ytdl:" + src
@@ -100,27 +124,60 @@ class SexcomExtractor(Extractor):
 
         return data
 
+    def _parse_pin_gifs(self, response):
+        extr = text.extract_from(response.text)
+
+        data = {
+            "_http_headers": {"Referer": response.url},
+            "type": "gif",
+            "url": extr(' href="', '"'),
+            "title": text.unescape(extr("<title>", " Gif | Sex.com<")),
+            "pin_id": text.parse_int(extr(
+                'rel="canonical" href="', '"').rpartition("/")[2]),
+            "tags": text.split_html(extr("</h1>", "</section>")),
+        }
+
+        return text.nameext_from_url(data["url"], data)
+
+    def _parse_pin_video(self, response):
+        extr = text.extract_from(response.text)
+
+        if not self.cookies.get("CloudFront-Key-Pair-Id", domain=".sex.com"):
+            self.log.warning("CloudFront cookies required for video downloads")
+
+        data = {
+            "_ytdl_manifest": "hls",
+            "extension": "mp4",
+            "type": "video",
+            "title": text.unescape(extr("<title>", " | Sex.com<")),
+            "pin_id": text.parse_int(extr(
+                'rel="canonical" href="', '"').rpartition("/")[2]),
+            "tags": text.split_html(extr(
+                'event_name="video_tags_click"', "<div data-testid=")
+                .partition(">")[2]),
+            "url": "ytdl:" + extr('<source src="', '"'),
+        }
+
+        return data
+
 
 class SexcomPinExtractor(SexcomExtractor):
     """Extractor for a pinned image or video on www.sex.com"""
     subcategory = "pin"
     directory_fmt = ("{category}",)
-    pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)"
+    pattern = (BASE_PATTERN +
+               r"(/(?:pin|\w\w/(?:gif|video)s)/\d+/?)(?!.*#related$)")
     example = "https://www.sex.com/pin/12345-TITLE/"
 
-    def __init__(self, match):
-        SexcomExtractor.__init__(self, match)
-        self.pin_id = match.group(1)
-
     def pins(self):
-        return ("{}/pin/{}/".format(self.root, self.pin_id),)
+        return (self.root + self.groups[0],)
 
 
 class SexcomRelatedPinExtractor(SexcomPinExtractor):
     """Extractor for related pins on www.sex.com"""
     subcategory = "related-pin"
     directory_fmt = ("{category}", "related {original_pin[pin_id]}")
-    pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$"
+    pattern = BASE_PATTERN + r"(/pin/(\d+)/?).*#related$"
     example = "https://www.sex.com/pin/12345#related"
 
     def metadata(self):
@@ -129,7 +186,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
 
     def pins(self):
         url = "{}/pin/related?pinId={}&limit=24&offset=0".format(
-            self.root, self.pin_id)
+            self.root, self.groups[1])
         return self._pagination(url)
 
 
@@ -137,18 +194,14 @@ class SexcomPinsExtractor(SexcomExtractor):
     """Extractor for a user's pins on www.sex.com"""
     subcategory = "pins"
     directory_fmt = ("{category}", "{user}")
-    pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/pins/"
+    pattern = BASE_PATTERN + r"/user/([^/?#]+)/pins/"
     example = "https://www.sex.com/user/USER/pins/"
 
-    def __init__(self, match):
-        SexcomExtractor.__init__(self, match)
-        self.user = match.group(1)
-
     def metadata(self):
-        return {"user": text.unquote(self.user)}
+        return {"user": text.unquote(self.groups[0])}
 
     def pins(self):
-        url = "{}/user/{}/pins/".format(self.root, self.user)
+        url = "{}/user/{}/pins/".format(self.root, self.groups[0])
         return self._pagination(url)
 
 
@@ -156,18 +209,14 @@ class SexcomLikesExtractor(SexcomExtractor):
     """Extractor for a user's liked pins on www.sex.com"""
     subcategory = "likes"
     directory_fmt = ("{category}", "{user}", "Likes")
-    pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/likes/"
+    pattern = BASE_PATTERN + r"/user/([^/?#]+)/likes/"
     example = "https://www.sex.com/user/USER/likes/"
 
-    def __init__(self, match):
-        SexcomExtractor.__init__(self, match)
-        self.user = match.group(1)
-
     def metadata(self):
-        return {"user": text.unquote(self.user)}
+        return {"user": text.unquote(self.groups[0])}
 
     def pins(self):
-        url = "{}/user/{}/likes/".format(self.root, self.user)
+        url = "{}/user/{}/likes/".format(self.root, self.groups[0])
         return self._pagination(url)
 
 
@@ -175,15 +224,12 @@ class SexcomBoardExtractor(SexcomExtractor):
     """Extractor for pins from a board on www.sex.com"""
     subcategory = "board"
     directory_fmt = ("{category}", "{user}", "{board}")
-    pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user"
+    pattern = (BASE_PATTERN + r"/user"
                r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
     example = "https://www.sex.com/user/USER/BOARD/"
 
-    def __init__(self, match):
-        SexcomExtractor.__init__(self, match)
-        self.user, self.board = match.groups()
-
     def metadata(self):
+        self.user, self.board = self.groups
         return {
             "user" : text.unquote(self.user),
             "board": text.unquote(self.board),
@@ -198,19 +244,18 @@ class SexcomSearchExtractor(SexcomExtractor):
     """Extractor for search results on www.sex.com"""
     subcategory = "search"
     directory_fmt = ("{category}", "search", "{search[query]}")
-    pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:"
+    pattern = (BASE_PATTERN + r"/((?:"
                r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s"
                r")/?(?:\?([^#]+))?)")
     example = "https://www.sex.com/search/pics?query=QUERY"
 
-    def __init__(self, match):
-        SexcomExtractor.__init__(self, match)
-        self.path = match.group(1)
+    def _init(self):
+        self.path, t1, query_alt, t2, query = self.groups
 
-        self.search = text.parse_query(match.group(5))
-        self.search["type"] = match.group(2) or match.group(4)
+        self.search = text.parse_query(query)
+        self.search["type"] = t1 or t2
         if "query" not in self.search:
-            self.search["query"] = match.group(3) or ""
+            self.search["query"] = query_alt or ""
 
     def metadata(self):
         return {"search": self.search}
author	Unit 193 <unit193@unit193.net>	2025-03-29 07:19:58 -0400
committer	Unit 193 <unit193@unit193.net>	2025-03-29 07:19:58 -0400
commit	662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch)
tree	537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/sexcom.py
parent	8026a3c45446030d7af524bfc487d3462c8114ef (diff)