diff options
Diffstat (limited to 'gallery_dl/extractor/behance.py')
| -rw-r--r-- | gallery_dl/extractor/behance.py | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py new file mode 100644 index 0000000..111d560 --- /dev/null +++ b/gallery_dl/extractor/behance.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018-2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.behance.net/""" + +from .common import Extractor, Message +from .. import text +import json + + +class BehanceExtractor(Extractor): + """Base class for behance extractors""" + category = "behance" + root = "https://www.behance.net" + + def items(self): + yield Message.Version, 1 + for gallery in self.galleries(): + gallery["_extractor"] = BehanceGalleryExtractor + yield Message.Queue, gallery["url"], self._update(gallery) + + def galleries(self): + """Return all relevant gallery URLs""" + + @staticmethod + def _update(data): + # compress data to simple lists + data["fields"] = [field["name"] for field in data["fields"]] + data["owners"] = [owner["display_name"] for owner in data["owners"]] + if "tags" in data: + data["tags"] = [tag["title"] for tag in data["tags"]] + + # backwards compatibility + data["gallery_id"] = data["id"] + data["title"] = data["name"] + data["user"] = ", ".join(data["owners"]) + + return data + + +class BehanceGalleryExtractor(BehanceExtractor): + """Extractor for image galleries from www.behance.net""" + subcategory = "gallery" + directory_fmt = ("{category}", "{owners:J, }", "{id} {name}") + filename_fmt = "{category}_{id}_{num:>02}.{extension}" + archive_fmt = "{id}_{num}" + pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)" + test = ( + ("https://www.behance.net/gallery/17386197/A-Short-Story", { + "count": 2, + "url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2", + "keyword": { + "id": 17386197, + "name": 're:"Hi". A short story about the important things ', + "owners": ["Place Studio", "Julio César Velazquez"], + "fields": ["Animation", "Character Design", "Directing"], + "tags": list, + "module": dict, + }, + }), + ("https://www.behance.net/gallery/21324767/Nevada-City", { + "count": 6, + "url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d", + "keyword": {"owners": ["Alex Strohl"]}, + }), + ) + + def __init__(self, match): + BehanceExtractor.__init__(self, match) + self.gallery_id = match.group(1) + + def items(self): + data = self.get_gallery_data() + imgs = self.get_images(data) + data["count"] = len(imgs) + + yield Message.Version, 1 + yield Message.Directory, data + for data["num"], (url, module) in enumerate(imgs, 1): + data["module"] = module + data["extension"] = text.ext_from_url(url) + yield Message.Url, url, data + + def get_gallery_data(self): + """Collect gallery info dict""" + url = "{}/gallery/{}/a".format(self.root, self.gallery_id) + cookies = { + "_evidon_consent_cookie": + '{"consent_date":"2019-01-31T09:41:15.132Z"}', + "bcp": "815b5eee-8bdf-4898-ac79-33c2bcc0ed19", + "gk_suid": "66981391", + "gki": '{"feature_project_view":false,' + '"feature_discover_login_prompt":false,' + '"feature_project_login_prompt":false}', + "ilo0": "true", + } + page = self.request(url, cookies=cookies).text + + data = json.loads(text.extract( + page, 'id="beconfig-store_state">', '</script>')[0]) + return self._update(data["project"]["project"]) + + @staticmethod + def get_images(data): + """Extract image results from an API response""" + results = [] + + for module in data["modules"]: + + if module["type"] == "image": + url = module["sizes"]["original"] + results.append((url, module)) + + elif module["type"] == "embed": + embed = module.get("original_embed") or module.get("embed") + url = "ytdl:" + text.extract(embed, 'src="', '"')[0] + results.append((url, module)) + + return results + + +class BehanceUserExtractor(BehanceExtractor): + """Extractor for a user's galleries from www.behance.net""" + subcategory = "user" + categorytransfer = True + pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$" + test = ("https://www.behance.net/alexstrohl", { + "count": ">= 8", + "pattern": BehanceGalleryExtractor.pattern, + }) + + def __init__(self, match): + BehanceExtractor.__init__(self, match) + self.user = match.group(1) + + def galleries(self): + url = "{}/{}/projects".format(self.root, self.user) + headers = {"X-Requested-With": "XMLHttpRequest"} + params = {"offset": 0} + + while True: + data = self.request(url, headers=headers, params=params).json() + work = data["profile"]["activeSection"]["work"] + yield from work["projects"] + if not work["hasMore"]: + return + params["offset"] += len(work["projects"]) + + +class BehanceCollectionExtractor(BehanceExtractor): + """Extractor for a collection's galleries from www.behance.net""" + subcategory = "collection" + categorytransfer = True + pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)" + test = ("https://www.behance.net/collection/170615607/Sky", { + "count": ">= 13", + "pattern": BehanceGalleryExtractor.pattern, + }) + + def __init__(self, match): + BehanceExtractor.__init__(self, match) + self.collection_id = match.group(1) + + def galleries(self): + url = "{}/collection/{}/a".format(self.root, self.collection_id) + headers = {"X-Requested-With": "XMLHttpRequest"} + params = {} + + while True: + data = self.request(url, headers=headers, params=params).json() + yield from data["output"] + if not data.get("offset"): + return + params["offset"] = data["offset"] |
