summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/behance.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/behance.py')
-rw-r--r--gallery_dl/extractor/behance.py179
1 files changed, 179 insertions, 0 deletions
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
new file mode 100644
index 0000000..111d560
--- /dev/null
+++ b/gallery_dl/extractor/behance.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018-2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://www.behance.net/"""
+
+from .common import Extractor, Message
+from .. import text
+import json
+
+
+class BehanceExtractor(Extractor):
+ """Base class for behance extractors"""
+ category = "behance"
+ root = "https://www.behance.net"
+
+ def items(self):
+ yield Message.Version, 1
+ for gallery in self.galleries():
+ gallery["_extractor"] = BehanceGalleryExtractor
+ yield Message.Queue, gallery["url"], self._update(gallery)
+
+ def galleries(self):
+ """Return all relevant gallery URLs"""
+
+ @staticmethod
+ def _update(data):
+ # compress data to simple lists
+ data["fields"] = [field["name"] for field in data["fields"]]
+ data["owners"] = [owner["display_name"] for owner in data["owners"]]
+ if "tags" in data:
+ data["tags"] = [tag["title"] for tag in data["tags"]]
+
+ # backwards compatibility
+ data["gallery_id"] = data["id"]
+ data["title"] = data["name"]
+ data["user"] = ", ".join(data["owners"])
+
+ return data
+
+
+class BehanceGalleryExtractor(BehanceExtractor):
+ """Extractor for image galleries from www.behance.net"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "{owners:J, }", "{id} {name}")
+ filename_fmt = "{category}_{id}_{num:>02}.{extension}"
+ archive_fmt = "{id}_{num}"
+ pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
+ test = (
+ ("https://www.behance.net/gallery/17386197/A-Short-Story", {
+ "count": 2,
+ "url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2",
+ "keyword": {
+ "id": 17386197,
+ "name": 're:"Hi". A short story about the important things ',
+ "owners": ["Place Studio", "Julio César Velazquez"],
+ "fields": ["Animation", "Character Design", "Directing"],
+ "tags": list,
+ "module": dict,
+ },
+ }),
+ ("https://www.behance.net/gallery/21324767/Nevada-City", {
+ "count": 6,
+ "url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
+ "keyword": {"owners": ["Alex Strohl"]},
+ }),
+ )
+
+ def __init__(self, match):
+ BehanceExtractor.__init__(self, match)
+ self.gallery_id = match.group(1)
+
+ def items(self):
+ data = self.get_gallery_data()
+ imgs = self.get_images(data)
+ data["count"] = len(imgs)
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for data["num"], (url, module) in enumerate(imgs, 1):
+ data["module"] = module
+ data["extension"] = text.ext_from_url(url)
+ yield Message.Url, url, data
+
+ def get_gallery_data(self):
+ """Collect gallery info dict"""
+ url = "{}/gallery/{}/a".format(self.root, self.gallery_id)
+ cookies = {
+ "_evidon_consent_cookie":
+ '{"consent_date":"2019-01-31T09:41:15.132Z"}',
+ "bcp": "815b5eee-8bdf-4898-ac79-33c2bcc0ed19",
+ "gk_suid": "66981391",
+ "gki": '{"feature_project_view":false,'
+ '"feature_discover_login_prompt":false,'
+ '"feature_project_login_prompt":false}',
+ "ilo0": "true",
+ }
+ page = self.request(url, cookies=cookies).text
+
+ data = json.loads(text.extract(
+ page, 'id="beconfig-store_state">', '</script>')[0])
+ return self._update(data["project"]["project"])
+
+ @staticmethod
+ def get_images(data):
+ """Extract image results from an API response"""
+ results = []
+
+ for module in data["modules"]:
+
+ if module["type"] == "image":
+ url = module["sizes"]["original"]
+ results.append((url, module))
+
+ elif module["type"] == "embed":
+ embed = module.get("original_embed") or module.get("embed")
+ url = "ytdl:" + text.extract(embed, 'src="', '"')[0]
+ results.append((url, module))
+
+ return results
+
+
+class BehanceUserExtractor(BehanceExtractor):
+ """Extractor for a user's galleries from www.behance.net"""
+ subcategory = "user"
+ categorytransfer = True
+ pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"
+ test = ("https://www.behance.net/alexstrohl", {
+ "count": ">= 8",
+ "pattern": BehanceGalleryExtractor.pattern,
+ })
+
+ def __init__(self, match):
+ BehanceExtractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def galleries(self):
+ url = "{}/{}/projects".format(self.root, self.user)
+ headers = {"X-Requested-With": "XMLHttpRequest"}
+ params = {"offset": 0}
+
+ while True:
+ data = self.request(url, headers=headers, params=params).json()
+ work = data["profile"]["activeSection"]["work"]
+ yield from work["projects"]
+ if not work["hasMore"]:
+ return
+ params["offset"] += len(work["projects"])
+
+
+class BehanceCollectionExtractor(BehanceExtractor):
+ """Extractor for a collection's galleries from www.behance.net"""
+ subcategory = "collection"
+ categorytransfer = True
+ pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
+ test = ("https://www.behance.net/collection/170615607/Sky", {
+ "count": ">= 13",
+ "pattern": BehanceGalleryExtractor.pattern,
+ })
+
+ def __init__(self, match):
+ BehanceExtractor.__init__(self, match)
+ self.collection_id = match.group(1)
+
+ def galleries(self):
+ url = "{}/collection/{}/a".format(self.root, self.collection_id)
+ headers = {"X-Requested-With": "XMLHttpRequest"}
+ params = {}
+
+ while True:
+ data = self.request(url, headers=headers, params=params).json()
+ yield from data["output"]
+ if not data.get("offset"):
+ return
+ params["offset"] = data["offset"]