summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/pinterest.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-10-25 17:27:30 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-10-25 17:27:30 -0400
commitfc004701f923bb954a22c7fec2ae8d607e78cb2b (patch)
treea5bea4ed6447ea43c099131430e3bd6182ee87d7 /gallery_dl/extractor/pinterest.py
parent0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff)
New upstream version 1.27.7.upstream/1.27.7
Diffstat (limited to 'gallery_dl/extractor/pinterest.py')
-rw-r--r--gallery_dl/extractor/pinterest.py171
1 files changed, 124 insertions, 47 deletions
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 8c04ed5..499c579 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -18,8 +18,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
- filename_fmt = "{category}_{id}{media_id:?_//}.{extension}"
- archive_fmt = "{id}{media_id}"
+ filename_fmt = "{category}_{id}{media_id|page_id:?_//}.{extension}"
+ archive_fmt = "{id}{media_id|page_id}"
root = "https://www.pinterest.com"
def _init(self):
@@ -30,12 +30,12 @@ class PinterestExtractor(Extractor):
self.root = text.ensure_http_scheme(domain)
self.api = PinterestAPI(self)
+ self.stories = self.config("stories", True)
+ self.videos = self.config("videos", True)
def items(self):
data = self.metadata()
- videos = self.config("videos", True)
- yield Message.Directory, data
for pin in self.pins():
if isinstance(pin, tuple):
@@ -43,40 +43,35 @@ class PinterestExtractor(Extractor):
yield Message.Queue, url, data
continue
+ try:
+ files = self._extract_files(pin)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
+ self.log.warning(
+ "%s: Error when extracting download URLs (%s: %s)",
+ pin.get("id"), exc.__class__.__name__, exc)
+ continue
+
pin.update(data)
+ pin["count"] = len(files)
- carousel_data = pin.get("carousel_data")
- if carousel_data:
- pin["count"] = len(carousel_data["carousel_slots"])
- for num, slot in enumerate(carousel_data["carousel_slots"], 1):
- slot["media_id"] = slot.pop("id")
- pin.update(slot)
- pin["num"] = num
- size, image = next(iter(slot["images"].items()))
- url = image["url"].replace("/" + size + "/", "/originals/")
- yield Message.Url, url, text.nameext_from_url(url, pin)
-
- else:
- try:
- media = self._media_from_pin(pin)
- except Exception:
- self.log.debug("Unable to fetch download URL for pin %s",
- pin.get("id"))
- continue
+ yield Message.Directory, pin
+ for pin["num"], file in enumerate(files, 1):
+ url = file["url"]
+ text.nameext_from_url(url, pin)
+ pin.update(file)
- if videos or media.get("duration") is None:
- pin.update(media)
- pin["num"] = pin["count"] = 1
+ if "media_id" not in file:
pin["media_id"] = ""
+ if "page_id" not in file:
+ pin["page_id"] = ""
- url = media["url"]
- text.nameext_from_url(url, pin)
+ if pin["extension"] == "m3u8":
+ url = "ytdl:" + url
+ pin["_ytdl_manifest"] = "hls"
+ pin["extension"] = "mp4"
- if pin["extension"] == "m3u8":
- url = "ytdl:" + url
- pin["extension"] = "mp4"
-
- yield Message.Url, url, pin
+ yield Message.Url, url, pin
def metadata(self):
"""Return general metadata"""
@@ -84,26 +79,108 @@ class PinterestExtractor(Extractor):
def pins(self):
"""Return all relevant pin objects"""
- @staticmethod
- def _media_from_pin(pin):
+ def _extract_files(self, pin):
+ story_pin_data = pin.get("story_pin_data")
+ if story_pin_data and self.stories:
+ return self._extract_story(pin, story_pin_data)
+
+ carousel_data = pin.get("carousel_data")
+ if carousel_data:
+ return self._extract_carousel(pin, carousel_data)
+
videos = pin.get("videos")
- if videos:
- video_formats = videos["video_list"]
+ if videos and self.videos:
+ return (self._extract_video(videos),)
- for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
- if fmt in video_formats:
- media = video_formats[fmt]
- break
- else:
- media = max(video_formats.values(),
- key=lambda x: x.get("width", 0))
+ try:
+ return (pin["images"]["orig"],)
+ except Exception:
+ self.log.debug("%s: No files found", pin.get("id"))
+ return ()
+
+ def _extract_story(self, pin, story):
+ files = []
+ story_id = story.get("id")
+
+ for page in story["pages"]:
+ page_id = page.get("id")
+
+ for block in page["blocks"]:
+ type = block.get("type")
+
+ if type == "story_pin_image_block":
+ if 1 == len(page["blocks"]) == len(story["pages"]):
+ try:
+ media = pin["images"]["orig"]
+ except Exception:
+ media = self._extract_image(page, block)
+ else:
+ media = self._extract_image(page, block)
+
+ elif type == "story_pin_video_block":
+ video = block["video"]
+ media = self._extract_video(video)
+ media["media_id"] = video.get("id") or ""
+
+ elif type == "story_pin_paragraph_block":
+ media = {"url": "text:" + block["text"],
+ "extension": "txt",
+ "media_id": block.get("id")}
+
+ else:
+ self.log.warning("%s: Unsupported story block '%s'",
+ pin.get("id"), type)
+ continue
- if "V_720P" in video_formats:
- media["_fallback"] = (video_formats["V_720P"]["url"],)
+ media["story_id"] = story_id
+ media["page_id"] = page_id
+ files.append(media)
+
+ return files
+
+ def _extract_carousel(self, pin, carousel_data):
+ files = []
+ for slot in carousel_data["carousel_slots"]:
+ size, image = next(iter(slot["images"].items()))
+ slot["media_id"] = slot.pop("id")
+ slot["url"] = image["url"].replace(
+ "/" + size + "/", "/originals/", 1)
+ files.append(slot)
+ return files
+
+ def _extract_image(self, page, block):
+ sig = block.get("image_signature") or page["image_signature"]
+ url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format(
+ sig[0:2], sig[2:4], sig[4:6], sig)
+ url_jpg = url_base + "jpg"
+ url_png = url_base + "png"
+ url_webp = url_base + "webp"
- return media
+ try:
+ media = block["image"]["images"]["originals"]
+ except Exception:
+ media = {"url": url_jpg, "_fallback": (url_png, url_webp,)}
- return pin["images"]["orig"]
+ if media["url"] == url_jpg:
+ media["_fallback"] = (url_png, url_webp,)
+ else:
+ media["_fallback"] = (url_jpg, url_png, url_webp,)
+ media["media_id"] = sig
+
+ return media
+
+ def _extract_video(self, video):
+ video_formats = video["video_list"]
+ for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
+ if fmt in video_formats:
+ media = video_formats[fmt]
+ break
+ else:
+ media = max(video_formats.values(),
+ key=lambda x: x.get("width", 0))
+ if "V_720P" in video_formats:
+ media["_fallback"] = (video_formats["V_720P"]["url"],)
+ return media
class PinterestPinExtractor(PinterestExtractor):