summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/behance.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/behance.py')
-rw-r--r--gallery_dl/extractor/behance.py362
1 files changed, 259 insertions, 103 deletions
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index d8cc51d..fc5f9ef 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.behance.net/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text, util, exception
class BehanceExtractor(Extractor):
@@ -18,6 +18,12 @@ class BehanceExtractor(Extractor):
root = "https://www.behance.net"
request_interval = (2.0, 4.0)
+ def _init(self):
+ self._bcp = self.cookies.get("bcp", domain="www.behance.net")
+ if not self._bcp:
+ self._bcp = "4c34489d-914c-46cd-b44c-dfd0e661136d"
+ self.cookies.set("bcp", self._bcp, domain="www.behance.net")
+
def items(self):
for gallery in self.galleries():
gallery["_extractor"] = BehanceGalleryExtractor
@@ -26,14 +32,29 @@ class BehanceExtractor(Extractor):
def galleries(self):
"""Return all relevant gallery URLs"""
- @staticmethod
- def _update(data):
+ def _request_graphql(self, endpoint, variables):
+ url = self.root + "/v3/graphql"
+ headers = {
+ "Origin": self.root,
+ "X-BCP" : self._bcp,
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ data = {
+ "query" : GRAPHQL_QUERIES[endpoint],
+ "variables": variables,
+ }
+
+ return self.request(url, method="POST", headers=headers,
+ json=data).json()["data"]
+
+ def _update(self, data):
# compress data to simple lists
if data["fields"] and isinstance(data["fields"][0], dict):
data["fields"] = [
field.get("name") or field.get("label")
for field in data["fields"]
]
+
data["owners"] = [
owner.get("display_name") or owner.get("displayName")
for owner in data["owners"]
@@ -44,6 +65,9 @@ class BehanceExtractor(Extractor):
tags = [tag["title"] for tag in tags]
data["tags"] = tags
+ data["date"] = text.parse_timestamp(
+ data.get("publishedOn") or data.get("conceived_on") or 0)
+
# backwards compatibility
data["gallery_id"] = data["id"]
data["title"] = data["name"]
@@ -59,38 +83,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
filename_fmt = "{category}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
- test = (
- ("https://www.behance.net/gallery/17386197/A-Short-Story", {
- "count": 2,
- "url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2",
- "keyword": {
- "id": 17386197,
- "name": 're:"Hi". A short story about the important things ',
- "owners": ["Place Studio", "Julio César Velazquez"],
- "fields": ["Animation", "Character Design", "Directing"],
- "tags": list,
- "module": dict,
- },
- }),
- ("https://www.behance.net/gallery/21324767/Nevada-City", {
- "count": 6,
- "url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
- "keyword": {"owners": ["Alex Strohl"]},
- }),
- # 'media_collection' modules
- ("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
- "count": 20,
- "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
- "pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules"
- r"/source/[0-9a-f]+.[0-9a-f]+\.jpg"
- }),
- # 'video' modules (#1282)
- ("https://www.behance.net/gallery/101185577/COLCCI", {
- "pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+"
- r"/rend/\w+_720\.mp4\?",
- "count": 3,
- }),
- )
+ example = "https://www.behance.net/gallery/12345/TITLE"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
@@ -111,10 +104,6 @@ class BehanceGalleryExtractor(BehanceExtractor):
"""Collect gallery info dict"""
url = "{}/gallery/{}/a".format(self.root, self.gallery_id)
cookies = {
- "_evidon_consent_cookie":
- '{"consent_date":"2019-01-31T09:41:15.132Z"}',
- "bcp": "4c34489d-914c-46cd-b44c-dfd0e661136d",
- "gk_suid": "66981391",
"gki": '{"feature_project_view":false,'
'"feature_discover_login_prompt":false,'
'"feature_project_login_prompt":false}',
@@ -128,6 +117,18 @@ class BehanceGalleryExtractor(BehanceExtractor):
def get_images(self, data):
"""Extract image results from an API response"""
+ if not data["modules"]:
+ access = data.get("matureAccess")
+ if access == "logged-out":
+ raise exception.AuthorizationError(
+ "Mature content galleries require logged-in cookies")
+ if access == "restricted-safe":
+ raise exception.AuthorizationError(
+ "Mature content blocked in account settings")
+ if access and access != "allowed":
+ raise exception.AuthorizationError()
+ return ()
+
result = []
append = result.append
@@ -139,7 +140,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
append((url, module))
elif mtype == "VideoModule":
- renditions = module["videoData"]["renditions"]
+ try:
+ renditions = module["videoData"]["renditions"]
+ except Exception:
+ self.log.warning("No download URLs for video %s",
+ module.get("id") or "???")
+ continue
+
try:
url = [
r["url"] for r in renditions
@@ -148,6 +155,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
url = "ytdl:" + renditions[-1]["url"]
+
append((url, module))
elif mtype == "MediaCollectionModule":
@@ -172,27 +180,27 @@ class BehanceUserExtractor(BehanceExtractor):
subcategory = "user"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
- test = ("https://www.behance.net/alexstrohl", {
- "count": ">= 8",
- "pattern": BehanceGalleryExtractor.pattern,
- })
+ example = "https://www.behance.net/USER"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.user = match.group(1)
def galleries(self):
- url = "{}/{}/projects".format(self.root, self.user)
- params = {"offset": 0}
- headers = {"X-Requested-With": "XMLHttpRequest"}
+ endpoint = "GetProfileProjects"
+ variables = {
+ "username": self.user,
+ "after" : "MAo=", # "0" in base64
+ }
while True:
- data = self.request(url, params=params, headers=headers).json()
- work = data["profile"]["activeSection"]["work"]
- yield from work["projects"]
- if not work["hasMore"]:
+ data = self._request_graphql(endpoint, variables)
+ items = data["user"]["profileProjects"]
+ yield from items["nodes"]
+
+ if not items["pageInfo"]["hasNextPage"]:
return
- params["offset"] += len(work["projects"])
+ variables["after"] = items["pageInfo"]["endCursor"]
class BehanceCollectionExtractor(BehanceExtractor):
@@ -200,31 +208,193 @@ class BehanceCollectionExtractor(BehanceExtractor):
subcategory = "collection"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
- test = ("https://www.behance.net/collection/71340149/inspiration", {
- "count": ">= 145",
- "pattern": BehanceGalleryExtractor.pattern,
- })
+ example = "https://www.behance.net/collection/12345/TITLE"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.collection_id = match.group(1)
def galleries(self):
- url = self.root + "/v3/graphql"
- headers = {
- "Origin" : self.root,
- "Referer": self.root + "/collection/" + self.collection_id,
- "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
- "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
- "X-Requested-With": "XMLHttpRequest",
+ endpoint = "GetMoodboardItemsAndRecommendations"
+ variables = {
+ "afterItem": "MAo=", # "0" in base64
+ "firstItem": 40,
+ "id" : int(self.collection_id),
+ "shouldGetItems" : True,
+ "shouldGetMoodboardFields": False,
+ "shouldGetRecommendations": False,
}
- cookies = {
- "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
- "gk_suid": "66981391",
- "ilo0" : "true",
+
+ while True:
+ data = self._request_graphql(endpoint, variables)
+ items = data["moodboard"]["items"]
+
+ for node in items["nodes"]:
+ yield node["entity"]
+
+ if not items["pageInfo"]["hasNextPage"]:
+ return
+ variables["afterItem"] = items["pageInfo"]["endCursor"]
+
+
+GRAPHQL_QUERIES = {
+ "GetProfileProjects": """\
+query GetProfileProjects($username: String, $after: String) {
+ user(username: $username) {
+ profileProjects(first: 12, after: $after) {
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ nodes {
+ __typename
+ adminFlags {
+ mature_lock
+ privacy_lock
+ dmca_lock
+ flagged_lock
+ privacy_violation_lock
+ trademark_lock
+ spam_lock
+ eu_ip_lock
+ }
+ colors {
+ r
+ g
+ b
+ }
+ covers {
+ size_202 {
+ url
+ }
+ size_404 {
+ url
+ }
+ size_808 {
+ url
+ }
+ }
+ features {
+ url
+ name
+ featuredOn
+ ribbon {
+ image
+ image2x
+ image3x
+ }
}
+ fields {
+ id
+ label
+ slug
+ url
+ }
+ hasMatureContent
+ id
+ isFeatured
+ isHiddenFromWorkTab
+ isMatureReviewSubmitted
+ isOwner
+ isFounder
+ isPinnedToSubscriptionOverview
+ isPrivate
+ linkedAssets {
+ ...sourceLinkFields
+ }
+ linkedAssetsCount
+ sourceFiles {
+ ...sourceFileFields
+ }
+ matureAccess
+ modifiedOn
+ name
+ owners {
+ ...OwnerFields
+ images {
+ size_50 {
+ url
+ }
+ }
+ }
+ premium
+ publishedOn
+ stats {
+ appreciations {
+ all
+ }
+ views {
+ all
+ }
+ comments {
+ all
+ }
+ }
+ slug
+ tools {
+ id
+ title
+ category
+ categoryLabel
+ categoryId
+ approved
+ url
+ backgroundColor
+ }
+ url
+ }
+ }
+ }
+}
+
+fragment sourceFileFields on SourceFile {
+ __typename
+ sourceFileId
+ projectId
+ userId
+ title
+ assetId
+ renditionUrl
+ mimeType
+ size
+ category
+ licenseType
+ unitAmount
+ currency
+ tier
+ hidden
+ extension
+ hasUserPurchased
+}
+
+fragment sourceLinkFields on LinkedAsset {
+ __typename
+ name
+ premium
+ url
+ category
+ licenseType
+}
+
+fragment OwnerFields on User {
+ displayName
+ hasPremiumAccess
+ id
+ isFollowing
+ isProfileOwner
+ location
+ locationUrl
+ url
+ username
+ availabilityInfo {
+ availabilityTimeline
+ isAvailableFullTime
+ isAvailableFreelance
+ }
+}
+""",
- query = """
+ "GetMoodboardItemsAndRecommendations": """\
query GetMoodboardItemsAndRecommendations(
$id: Int!
$firstItem: Int!
@@ -269,13 +439,7 @@ fragment moodboardFields on Moodboard {
url
isOwner
owners {
- id
- displayName
- url
- firstName
- location
- locationUrl
- isFollowing
+ ...OwnerFields
images {
size_50 {
url
@@ -300,6 +464,7 @@ fragment moodboardFields on Moodboard {
}
fragment projectFields on Project {
+ __typename
id
isOwner
publishedOn
@@ -328,13 +493,7 @@ fragment projectFields on Project {
b
}
owners {
- url
- displayName
- id
- location
- locationUrl
- isProfileOwner
- isFollowing
+ ...OwnerFields
images {
size_50 {
url
@@ -468,26 +627,23 @@ fragment nodesFields on MoodboardItem {
}
}
}
-"""
- variables = {
- "afterItem": "MAo=",
- "firstItem": 40,
- "id" : int(self.collection_id),
- "shouldGetItems" : True,
- "shouldGetMoodboardFields": False,
- "shouldGetRecommendations": False,
- }
- data = {"query": query, "variables": variables}
-
- while True:
- items = self.request(
- url, method="POST", headers=headers,
- cookies=cookies, json=data,
- ).json()["data"]["moodboard"]["items"]
- for node in items["nodes"]:
- yield node["entity"]
+fragment OwnerFields on User {
+ displayName
+ hasPremiumAccess
+ id
+ isFollowing
+ isProfileOwner
+ location
+ locationUrl
+ url
+ username
+ availabilityInfo {
+ availabilityTimeline
+ isAvailableFullTime
+ isAvailableFreelance
+ }
+}
+""",
- if not items["pageInfo"]["hasNextPage"]:
- return
- variables["afterItem"] = items["pageInfo"]["endCursor"]
+}