New upstream version 1.27.7.upstream/1.27.7

author: Unit 193 <unit193@unit193.net> 2024-10-25 17:27:30 -0400
committer: Unit 193 <unit193@unit193.net> 2024-10-25 17:27:30 -0400
commit: fc004701f923bb954a22c7fec2ae8d607e78cb2b (patch)
tree: a5bea4ed6447ea43c099131430e3bd6182ee87d7 /gallery_dl/extractor/pinterest.py
parent: 0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff)
1 files changed, 124 insertions, 47 deletions
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 8c04ed5..499c579 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -18,8 +18,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
 class PinterestExtractor(Extractor):
     """Base class for pinterest extractors"""
     category = "pinterest"
-    filename_fmt = "{category}_{id}{media_id:?_//}.{extension}"
-    archive_fmt = "{id}{media_id}"
+    filename_fmt = "{category}_{id}{media_id|page_id:?_//}.{extension}"
+    archive_fmt = "{id}{media_id|page_id}"
     root = "https://www.pinterest.com"
 
     def _init(self):
@@ -30,12 +30,12 @@ class PinterestExtractor(Extractor):
             self.root = text.ensure_http_scheme(domain)
 
         self.api = PinterestAPI(self)
+        self.stories = self.config("stories", True)
+        self.videos = self.config("videos", True)
 
     def items(self):
         data = self.metadata()
-        videos = self.config("videos", True)
 
-        yield Message.Directory, data
         for pin in self.pins():
 
             if isinstance(pin, tuple):
@@ -43,40 +43,35 @@ class PinterestExtractor(Extractor):
                 yield Message.Queue, url, data
                 continue
 
+            try:
+                files = self._extract_files(pin)
+            except Exception as exc:
+                self.log.debug("", exc_info=exc)
+                self.log.warning(
+                    "%s: Error when extracting download URLs (%s: %s)",
+                    pin.get("id"), exc.__class__.__name__, exc)
+                continue
+
             pin.update(data)
+            pin["count"] = len(files)
 
-            carousel_data = pin.get("carousel_data")
-            if carousel_data:
-                pin["count"] = len(carousel_data["carousel_slots"])
-                for num, slot in enumerate(carousel_data["carousel_slots"], 1):
-                    slot["media_id"] = slot.pop("id")
-                    pin.update(slot)
-                    pin["num"] = num
-                    size, image = next(iter(slot["images"].items()))
-                    url = image["url"].replace("/" + size + "/", "/originals/")
-                    yield Message.Url, url, text.nameext_from_url(url, pin)
-
-            else:
-                try:
-                    media = self._media_from_pin(pin)
-                except Exception:
-                    self.log.debug("Unable to fetch download URL for pin %s",
-                                   pin.get("id"))
-                    continue
+            yield Message.Directory, pin
+            for pin["num"], file in enumerate(files, 1):
+                url = file["url"]
+                text.nameext_from_url(url, pin)
+                pin.update(file)
 
-                if videos or media.get("duration") is None:
-                    pin.update(media)
-                    pin["num"] = pin["count"] = 1
+                if "media_id" not in file:
                     pin["media_id"] = ""
+                if "page_id" not in file:
+                    pin["page_id"] = ""
 
-                    url = media["url"]
-                    text.nameext_from_url(url, pin)
+                if pin["extension"] == "m3u8":
+                    url = "ytdl:" + url
+                    pin["_ytdl_manifest"] = "hls"
+                    pin["extension"] = "mp4"
 
-                    if pin["extension"] == "m3u8":
-                        url = "ytdl:" + url
-                        pin["extension"] = "mp4"
-
-                    yield Message.Url, url, pin
+                yield Message.Url, url, pin
 
     def metadata(self):
         """Return general metadata"""
@@ -84,26 +79,108 @@ class PinterestExtractor(Extractor):
     def pins(self):
         """Return all relevant pin objects"""
 
-    @staticmethod
-    def _media_from_pin(pin):
+    def _extract_files(self, pin):
+        story_pin_data = pin.get("story_pin_data")
+        if story_pin_data and self.stories:
+            return self._extract_story(pin, story_pin_data)
+
+        carousel_data = pin.get("carousel_data")
+        if carousel_data:
+            return self._extract_carousel(pin, carousel_data)
+
         videos = pin.get("videos")
-        if videos:
-            video_formats = videos["video_list"]
+        if videos and self.videos:
+            return (self._extract_video(videos),)
 
-            for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
-                if fmt in video_formats:
-                    media = video_formats[fmt]
-                    break
-            else:
-                media = max(video_formats.values(),
-                            key=lambda x: x.get("width", 0))
+        try:
+            return (pin["images"]["orig"],)
+        except Exception:
+            self.log.debug("%s: No files found", pin.get("id"))
+            return ()
+
+    def _extract_story(self, pin, story):
+        files = []
+        story_id = story.get("id")
+
+        for page in story["pages"]:
+            page_id = page.get("id")
+
+            for block in page["blocks"]:
+                type = block.get("type")
+
+                if type == "story_pin_image_block":
+                    if 1 == len(page["blocks"]) == len(story["pages"]):
+                        try:
+                            media = pin["images"]["orig"]
+                        except Exception:
+                            media = self._extract_image(page, block)
+                    else:
+                        media = self._extract_image(page, block)
+
+                elif type == "story_pin_video_block":
+                    video = block["video"]
+                    media = self._extract_video(video)
+                    media["media_id"] = video.get("id") or ""
+
+                elif type == "story_pin_paragraph_block":
+                    media = {"url": "text:" + block["text"],
+                             "extension": "txt",
+                             "media_id": block.get("id")}
+
+                else:
+                    self.log.warning("%s: Unsupported story block '%s'",
+                                     pin.get("id"), type)
+                    continue
 
-            if "V_720P" in video_formats:
-                media["_fallback"] = (video_formats["V_720P"]["url"],)
+                media["story_id"] = story_id
+                media["page_id"] = page_id
+                files.append(media)
+
+        return files
+
+    def _extract_carousel(self, pin, carousel_data):
+        files = []
+        for slot in carousel_data["carousel_slots"]:
+            size, image = next(iter(slot["images"].items()))
+            slot["media_id"] = slot.pop("id")
+            slot["url"] = image["url"].replace(
+                "/" + size + "/", "/originals/", 1)
+            files.append(slot)
+        return files
+
+    def _extract_image(self, page, block):
+        sig = block.get("image_signature") or page["image_signature"]
+        url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format(
+            sig[0:2], sig[2:4], sig[4:6], sig)
+        url_jpg = url_base + "jpg"
+        url_png = url_base + "png"
+        url_webp = url_base + "webp"
 
-            return media
+        try:
+            media = block["image"]["images"]["originals"]
+        except Exception:
+            media = {"url": url_jpg, "_fallback": (url_png, url_webp,)}
 
-        return pin["images"]["orig"]
+        if media["url"] == url_jpg:
+            media["_fallback"] = (url_png, url_webp,)
+        else:
+            media["_fallback"] = (url_jpg, url_png, url_webp,)
+        media["media_id"] = sig
+
+        return media
+
+    def _extract_video(self, video):
+        video_formats = video["video_list"]
+        for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
+            if fmt in video_formats:
+                media = video_formats[fmt]
+                break
+        else:
+            media = max(video_formats.values(),
+                        key=lambda x: x.get("width", 0))
+        if "V_720P" in video_formats:
+            media["_fallback"] = (video_formats["V_720P"]["url"],)
+        return media
 
 
 class PinterestPinExtractor(PinterestExtractor):
author	Unit 193 <unit193@unit193.net>	2024-10-25 17:27:30 -0400
committer	Unit 193 <unit193@unit193.net>	2024-10-25 17:27:30 -0400
commit	fc004701f923bb954a22c7fec2ae8d607e78cb2b (patch)
tree	a5bea4ed6447ea43c099131430e3bd6182ee87d7 /gallery_dl/extractor/pinterest.py
parent	0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff)