diff options
| author | 2024-10-25 17:27:30 -0400 | |
|---|---|---|
| committer | 2024-10-25 17:27:30 -0400 | |
| commit | fc004701f923bb954a22c7fec2ae8d607e78cb2b (patch) | |
| tree | a5bea4ed6447ea43c099131430e3bd6182ee87d7 /gallery_dl/extractor/pinterest.py | |
| parent | 0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff) | |
New upstream version 1.27.7.upstream/1.27.7
Diffstat (limited to 'gallery_dl/extractor/pinterest.py')
| -rw-r--r-- | gallery_dl/extractor/pinterest.py | 171 |
1 files changed, 124 insertions, 47 deletions
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 8c04ed5..499c579 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -18,8 +18,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+" class PinterestExtractor(Extractor): """Base class for pinterest extractors""" category = "pinterest" - filename_fmt = "{category}_{id}{media_id:?_//}.{extension}" - archive_fmt = "{id}{media_id}" + filename_fmt = "{category}_{id}{media_id|page_id:?_//}.{extension}" + archive_fmt = "{id}{media_id|page_id}" root = "https://www.pinterest.com" def _init(self): @@ -30,12 +30,12 @@ class PinterestExtractor(Extractor): self.root = text.ensure_http_scheme(domain) self.api = PinterestAPI(self) + self.stories = self.config("stories", True) + self.videos = self.config("videos", True) def items(self): data = self.metadata() - videos = self.config("videos", True) - yield Message.Directory, data for pin in self.pins(): if isinstance(pin, tuple): @@ -43,40 +43,35 @@ class PinterestExtractor(Extractor): yield Message.Queue, url, data continue + try: + files = self._extract_files(pin) + except Exception as exc: + self.log.debug("", exc_info=exc) + self.log.warning( + "%s: Error when extracting download URLs (%s: %s)", + pin.get("id"), exc.__class__.__name__, exc) + continue + pin.update(data) + pin["count"] = len(files) - carousel_data = pin.get("carousel_data") - if carousel_data: - pin["count"] = len(carousel_data["carousel_slots"]) - for num, slot in enumerate(carousel_data["carousel_slots"], 1): - slot["media_id"] = slot.pop("id") - pin.update(slot) - pin["num"] = num - size, image = next(iter(slot["images"].items())) - url = image["url"].replace("/" + size + "/", "/originals/") - yield Message.Url, url, text.nameext_from_url(url, pin) - - else: - try: - media = self._media_from_pin(pin) - except Exception: - self.log.debug("Unable to fetch download URL for pin %s", - pin.get("id")) - continue + yield Message.Directory, pin + for pin["num"], file in enumerate(files, 1): + url = file["url"] + text.nameext_from_url(url, pin) + pin.update(file) - if videos or media.get("duration") is None: - pin.update(media) - pin["num"] = pin["count"] = 1 + if "media_id" not in file: pin["media_id"] = "" + if "page_id" not in file: + pin["page_id"] = "" - url = media["url"] - text.nameext_from_url(url, pin) + if pin["extension"] == "m3u8": + url = "ytdl:" + url + pin["_ytdl_manifest"] = "hls" + pin["extension"] = "mp4" - if pin["extension"] == "m3u8": - url = "ytdl:" + url - pin["extension"] = "mp4" - - yield Message.Url, url, pin + yield Message.Url, url, pin def metadata(self): """Return general metadata""" @@ -84,26 +79,108 @@ class PinterestExtractor(Extractor): def pins(self): """Return all relevant pin objects""" - @staticmethod - def _media_from_pin(pin): + def _extract_files(self, pin): + story_pin_data = pin.get("story_pin_data") + if story_pin_data and self.stories: + return self._extract_story(pin, story_pin_data) + + carousel_data = pin.get("carousel_data") + if carousel_data: + return self._extract_carousel(pin, carousel_data) + videos = pin.get("videos") - if videos: - video_formats = videos["video_list"] + if videos and self.videos: + return (self._extract_video(videos),) - for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"): - if fmt in video_formats: - media = video_formats[fmt] - break - else: - media = max(video_formats.values(), - key=lambda x: x.get("width", 0)) + try: + return (pin["images"]["orig"],) + except Exception: + self.log.debug("%s: No files found", pin.get("id")) + return () + + def _extract_story(self, pin, story): + files = [] + story_id = story.get("id") + + for page in story["pages"]: + page_id = page.get("id") + + for block in page["blocks"]: + type = block.get("type") + + if type == "story_pin_image_block": + if 1 == len(page["blocks"]) == len(story["pages"]): + try: + media = pin["images"]["orig"] + except Exception: + media = self._extract_image(page, block) + else: + media = self._extract_image(page, block) + + elif type == "story_pin_video_block": + video = block["video"] + media = self._extract_video(video) + media["media_id"] = video.get("id") or "" + + elif type == "story_pin_paragraph_block": + media = {"url": "text:" + block["text"], + "extension": "txt", + "media_id": block.get("id")} + + else: + self.log.warning("%s: Unsupported story block '%s'", + pin.get("id"), type) + continue - if "V_720P" in video_formats: - media["_fallback"] = (video_formats["V_720P"]["url"],) + media["story_id"] = story_id + media["page_id"] = page_id + files.append(media) + + return files + + def _extract_carousel(self, pin, carousel_data): + files = [] + for slot in carousel_data["carousel_slots"]: + size, image = next(iter(slot["images"].items())) + slot["media_id"] = slot.pop("id") + slot["url"] = image["url"].replace( + "/" + size + "/", "/originals/", 1) + files.append(slot) + return files + + def _extract_image(self, page, block): + sig = block.get("image_signature") or page["image_signature"] + url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format( + sig[0:2], sig[2:4], sig[4:6], sig) + url_jpg = url_base + "jpg" + url_png = url_base + "png" + url_webp = url_base + "webp" - return media + try: + media = block["image"]["images"]["originals"] + except Exception: + media = {"url": url_jpg, "_fallback": (url_png, url_webp,)} - return pin["images"]["orig"] + if media["url"] == url_jpg: + media["_fallback"] = (url_png, url_webp,) + else: + media["_fallback"] = (url_jpg, url_png, url_webp,) + media["media_id"] = sig + + return media + + def _extract_video(self, video): + video_formats = video["video_list"] + for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"): + if fmt in video_formats: + media = video_formats[fmt] + break + else: + media = max(video_formats.values(), + key=lambda x: x.get("width", 0)) + if "V_720P" in video_formats: + media["_fallback"] = (video_formats["V_720P"]["url"],) + return media class PinterestPinExtractor(PinterestExtractor): |
