diff options
Diffstat (limited to 'gallery_dl/extractor/pinterest.py')
| -rw-r--r-- | gallery_dl/extractor/pinterest.py | 63 |
1 files changed, 38 insertions, 25 deletions
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 62fa9be..9c335ad 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2023 Mike Fährmann +# Copyright 2016-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -55,6 +55,14 @@ class PinterestExtractor(Extractor): pin.update(data) pin["count"] = len(files) + for key in ( + "description", + "closeup_description", + "closeup_unified_description", + ): + if value := pin.get(key): + pin[key] = value.strip() + yield Message.Directory, pin for pin["num"], file in enumerate(files, 1): url = file["url"] @@ -84,8 +92,7 @@ class PinterestExtractor(Extractor): if story_pin_data and self.stories: return self._extract_story(pin, story_pin_data) - carousel_data = pin.get("carousel_data") - if carousel_data: + if carousel_data := pin.get("carousel_data"): return self._extract_carousel(pin, carousel_data) videos = pin.get("videos") @@ -164,8 +171,8 @@ class PinterestExtractor(Extractor): def _extract_image(self, page, block): sig = block.get("image_signature") or page["image_signature"] - url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format( - sig[0:2], sig[2:4], sig[4:6], sig) + url_base = (f"https://i.pinimg.com/originals" + f"/{sig[0:2]}/{sig[2:4]}/{sig[4:6]}/{sig}.") url_jpg = url_base + "jpg" url_png = url_base + "png" url_webp = url_base + "webp" @@ -205,7 +212,7 @@ class PinterestPinExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) - self.pin_id = match.group(1) + self.pin_id = match[1] self.pin = None def metadata(self): @@ -222,13 +229,13 @@ class PinterestBoardExtractor(PinterestExtractor): directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") archive_fmt = "{board[id]}_{id}" pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)" - "/(?!_saved|_created|pins/)([^/?#]+)/?$") + r"/(?!_saved|_created|pins/)([^/?#]+)/?(?:$|\?|#)") example = "https://www.pinterest.com/USER/BOARD/" def __init__(self, match): PinterestExtractor.__init__(self, match) - self.user = text.unquote(match.group(1)) - self.board_name = text.unquote(match.group(2)) + self.user = text.unquote(match[1]) + self.board_name = text.unquote(match[2]) self.board = None def metadata(self): @@ -240,7 +247,7 @@ class PinterestBoardExtractor(PinterestExtractor): pins = self.api.board_pins(board["id"]) if board["section_count"] and self.config("sections", True): - base = "{}{}id:".format(self.root, board["url"]) + base = f"{self.root}{board['url']}id:" data = {"_extractor": PinterestSectionExtractor} sections = [(base + section["id"], data) for section in self.api.board_sections(board["id"])] @@ -257,12 +264,11 @@ class PinterestUserExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) - self.user = text.unquote(match.group(1)) + self.user = text.unquote(match[1]) def items(self): for board in self.api.boards(self.user): - url = board.get("url") - if url: + if url := board.get("url"): board["_extractor"] = PinterestBoardExtractor yield Message.Queue, self.root + url, board @@ -276,7 +282,7 @@ class PinterestAllpinsExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) - self.user = text.unquote(match.group(1)) + self.user = text.unquote(match[1]) def metadata(self): return {"user": self.user} @@ -294,7 +300,7 @@ class PinterestCreatedExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) - self.user = text.unquote(match.group(1)) + self.user = text.unquote(match[1]) def metadata(self): return {"user": self.user} @@ -314,9 +320,9 @@ class PinterestSectionExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) - self.user = text.unquote(match.group(1)) - self.board_slug = text.unquote(match.group(2)) - self.section_slug = text.unquote(match.group(3)) + self.user = text.unquote(match[1]) + self.board_slug = text.unquote(match[2]) + self.section_slug = text.unquote(match[3]) self.section = None def metadata(self): @@ -342,7 +348,7 @@ class PinterestSearchExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) - self.search = text.unquote(match.group(1)) + self.search = text.unquote(match[1]) def metadata(self): return {"search": self.search} @@ -384,12 +390,19 @@ class PinterestPinitExtractor(PinterestExtractor): example = "https://pin.it/abcde" def items(self): - url = "https://api.pinterest.com/url_shortener/{}/redirect/".format( - self.groups[0]) + url = (f"https://api.pinterest.com/url_shortener" + f"/{self.groups[0]}/redirect/") location = self.request_location(url) - if not location or not PinterestPinExtractor.pattern.match(location): + if not location: + raise exception.NotFoundError("pin") + elif PinterestPinExtractor.pattern.match(location): + yield Message.Queue, location, { + "_extractor": PinterestPinExtractor} + elif PinterestBoardExtractor.pattern.match(location): + yield Message.Queue, location, { + "_extractor": PinterestBoardExtractor} + else: raise exception.NotFoundError("pin") - yield Message.Queue, location, {"_extractor": PinterestPinExtractor} class PinterestAPI(): @@ -511,7 +524,7 @@ class PinterestAPI(): return self._pagination("BaseSearch", options) def _call(self, resource, options): - url = "{}/resource/{}Resource/get/".format(self.root, resource) + url = f"{self.root}/resource/{resource}Resource/get/" params = { "data" : util.json_dumps({"options": options}), "source_url": "", @@ -534,7 +547,7 @@ class PinterestAPI(): resource = self.extractor.subcategory.rpartition("-")[2] raise exception.NotFoundError(resource) self.extractor.log.debug("Server response: %s", response.text) - raise exception.StopExtraction("API request failed") + raise exception.AbortExtraction("API request failed") def _pagination(self, resource, options): while True: |
