diff options
Diffstat (limited to 'gallery_dl/extractor/behance.py')
| -rw-r--r-- | gallery_dl/extractor/behance.py | 362 |
1 files changed, 259 insertions, 103 deletions
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index d8cc51d..fc5f9ef 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -9,7 +9,7 @@ """Extractors for https://www.behance.net/""" from .common import Extractor, Message -from .. import text, util +from .. import text, util, exception class BehanceExtractor(Extractor): @@ -18,6 +18,12 @@ class BehanceExtractor(Extractor): root = "https://www.behance.net" request_interval = (2.0, 4.0) + def _init(self): + self._bcp = self.cookies.get("bcp", domain="www.behance.net") + if not self._bcp: + self._bcp = "4c34489d-914c-46cd-b44c-dfd0e661136d" + self.cookies.set("bcp", self._bcp, domain="www.behance.net") + def items(self): for gallery in self.galleries(): gallery["_extractor"] = BehanceGalleryExtractor @@ -26,14 +32,29 @@ class BehanceExtractor(Extractor): def galleries(self): """Return all relevant gallery URLs""" - @staticmethod - def _update(data): + def _request_graphql(self, endpoint, variables): + url = self.root + "/v3/graphql" + headers = { + "Origin": self.root, + "X-BCP" : self._bcp, + "X-Requested-With": "XMLHttpRequest", + } + data = { + "query" : GRAPHQL_QUERIES[endpoint], + "variables": variables, + } + + return self.request(url, method="POST", headers=headers, + json=data).json()["data"] + + def _update(self, data): # compress data to simple lists if data["fields"] and isinstance(data["fields"][0], dict): data["fields"] = [ field.get("name") or field.get("label") for field in data["fields"] ] + data["owners"] = [ owner.get("display_name") or owner.get("displayName") for owner in data["owners"] @@ -44,6 +65,9 @@ class BehanceExtractor(Extractor): tags = [tag["title"] for tag in tags] data["tags"] = tags + data["date"] = text.parse_timestamp( + data.get("publishedOn") or data.get("conceived_on") or 0) + # backwards compatibility data["gallery_id"] = data["id"] data["title"] = data["name"] @@ -59,38 +83,7 @@ class BehanceGalleryExtractor(BehanceExtractor): filename_fmt = "{category}_{id}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)" - test = ( - ("https://www.behance.net/gallery/17386197/A-Short-Story", { - "count": 2, - "url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2", - "keyword": { - "id": 17386197, - "name": 're:"Hi". A short story about the important things ', - "owners": ["Place Studio", "Julio César Velazquez"], - "fields": ["Animation", "Character Design", "Directing"], - "tags": list, - "module": dict, - }, - }), - ("https://www.behance.net/gallery/21324767/Nevada-City", { - "count": 6, - "url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d", - "keyword": {"owners": ["Alex Strohl"]}, - }), - # 'media_collection' modules - ("https://www.behance.net/gallery/88276087/Audi-R8-RWD", { - "count": 20, - "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f", - "pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules" - r"/source/[0-9a-f]+.[0-9a-f]+\.jpg" - }), - # 'video' modules (#1282) - ("https://www.behance.net/gallery/101185577/COLCCI", { - "pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+" - r"/rend/\w+_720\.mp4\?", - "count": 3, - }), - ) + example = "https://www.behance.net/gallery/12345/TITLE" def __init__(self, match): BehanceExtractor.__init__(self, match) @@ -111,10 +104,6 @@ class BehanceGalleryExtractor(BehanceExtractor): """Collect gallery info dict""" url = "{}/gallery/{}/a".format(self.root, self.gallery_id) cookies = { - "_evidon_consent_cookie": - '{"consent_date":"2019-01-31T09:41:15.132Z"}', - "bcp": "4c34489d-914c-46cd-b44c-dfd0e661136d", - "gk_suid": "66981391", "gki": '{"feature_project_view":false,' '"feature_discover_login_prompt":false,' '"feature_project_login_prompt":false}', @@ -128,6 +117,18 @@ class BehanceGalleryExtractor(BehanceExtractor): def get_images(self, data): """Extract image results from an API response""" + if not data["modules"]: + access = data.get("matureAccess") + if access == "logged-out": + raise exception.AuthorizationError( + "Mature content galleries require logged-in cookies") + if access == "restricted-safe": + raise exception.AuthorizationError( + "Mature content blocked in account settings") + if access and access != "allowed": + raise exception.AuthorizationError() + return () + result = [] append = result.append @@ -139,7 +140,13 @@ class BehanceGalleryExtractor(BehanceExtractor): append((url, module)) elif mtype == "VideoModule": - renditions = module["videoData"]["renditions"] + try: + renditions = module["videoData"]["renditions"] + except Exception: + self.log.warning("No download URLs for video %s", + module.get("id") or "???") + continue + try: url = [ r["url"] for r in renditions @@ -148,6 +155,7 @@ class BehanceGalleryExtractor(BehanceExtractor): except Exception as exc: self.log.debug("%s: %s", exc.__class__.__name__, exc) url = "ytdl:" + renditions[-1]["url"] + append((url, module)) elif mtype == "MediaCollectionModule": @@ -172,27 +180,27 @@ class BehanceUserExtractor(BehanceExtractor): subcategory = "user" categorytransfer = True pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$" - test = ("https://www.behance.net/alexstrohl", { - "count": ">= 8", - "pattern": BehanceGalleryExtractor.pattern, - }) + example = "https://www.behance.net/USER" def __init__(self, match): BehanceExtractor.__init__(self, match) self.user = match.group(1) def galleries(self): - url = "{}/{}/projects".format(self.root, self.user) - params = {"offset": 0} - headers = {"X-Requested-With": "XMLHttpRequest"} + endpoint = "GetProfileProjects" + variables = { + "username": self.user, + "after" : "MAo=", # "0" in base64 + } while True: - data = self.request(url, params=params, headers=headers).json() - work = data["profile"]["activeSection"]["work"] - yield from work["projects"] - if not work["hasMore"]: + data = self._request_graphql(endpoint, variables) + items = data["user"]["profileProjects"] + yield from items["nodes"] + + if not items["pageInfo"]["hasNextPage"]: return - params["offset"] += len(work["projects"]) + variables["after"] = items["pageInfo"]["endCursor"] class BehanceCollectionExtractor(BehanceExtractor): @@ -200,31 +208,193 @@ class BehanceCollectionExtractor(BehanceExtractor): subcategory = "collection" categorytransfer = True pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)" - test = ("https://www.behance.net/collection/71340149/inspiration", { - "count": ">= 145", - "pattern": BehanceGalleryExtractor.pattern, - }) + example = "https://www.behance.net/collection/12345/TITLE" def __init__(self, match): BehanceExtractor.__init__(self, match) self.collection_id = match.group(1) def galleries(self): - url = self.root + "/v3/graphql" - headers = { - "Origin" : self.root, - "Referer": self.root + "/collection/" + self.collection_id, - "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d", - "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==", - "X-Requested-With": "XMLHttpRequest", + endpoint = "GetMoodboardItemsAndRecommendations" + variables = { + "afterItem": "MAo=", # "0" in base64 + "firstItem": 40, + "id" : int(self.collection_id), + "shouldGetItems" : True, + "shouldGetMoodboardFields": False, + "shouldGetRecommendations": False, } - cookies = { - "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d", - "gk_suid": "66981391", - "ilo0" : "true", + + while True: + data = self._request_graphql(endpoint, variables) + items = data["moodboard"]["items"] + + for node in items["nodes"]: + yield node["entity"] + + if not items["pageInfo"]["hasNextPage"]: + return + variables["afterItem"] = items["pageInfo"]["endCursor"] + + +GRAPHQL_QUERIES = { + "GetProfileProjects": """\ +query GetProfileProjects($username: String, $after: String) { + user(username: $username) { + profileProjects(first: 12, after: $after) { + pageInfo { + endCursor + hasNextPage + } + nodes { + __typename + adminFlags { + mature_lock + privacy_lock + dmca_lock + flagged_lock + privacy_violation_lock + trademark_lock + spam_lock + eu_ip_lock + } + colors { + r + g + b + } + covers { + size_202 { + url + } + size_404 { + url + } + size_808 { + url + } + } + features { + url + name + featuredOn + ribbon { + image + image2x + image3x + } } + fields { + id + label + slug + url + } + hasMatureContent + id + isFeatured + isHiddenFromWorkTab + isMatureReviewSubmitted + isOwner + isFounder + isPinnedToSubscriptionOverview + isPrivate + linkedAssets { + ...sourceLinkFields + } + linkedAssetsCount + sourceFiles { + ...sourceFileFields + } + matureAccess + modifiedOn + name + owners { + ...OwnerFields + images { + size_50 { + url + } + } + } + premium + publishedOn + stats { + appreciations { + all + } + views { + all + } + comments { + all + } + } + slug + tools { + id + title + category + categoryLabel + categoryId + approved + url + backgroundColor + } + url + } + } + } +} + +fragment sourceFileFields on SourceFile { + __typename + sourceFileId + projectId + userId + title + assetId + renditionUrl + mimeType + size + category + licenseType + unitAmount + currency + tier + hidden + extension + hasUserPurchased +} + +fragment sourceLinkFields on LinkedAsset { + __typename + name + premium + url + category + licenseType +} + +fragment OwnerFields on User { + displayName + hasPremiumAccess + id + isFollowing + isProfileOwner + location + locationUrl + url + username + availabilityInfo { + availabilityTimeline + isAvailableFullTime + isAvailableFreelance + } +} +""", - query = """ + "GetMoodboardItemsAndRecommendations": """\ query GetMoodboardItemsAndRecommendations( $id: Int! $firstItem: Int! @@ -269,13 +439,7 @@ fragment moodboardFields on Moodboard { url isOwner owners { - id - displayName - url - firstName - location - locationUrl - isFollowing + ...OwnerFields images { size_50 { url @@ -300,6 +464,7 @@ fragment moodboardFields on Moodboard { } fragment projectFields on Project { + __typename id isOwner publishedOn @@ -328,13 +493,7 @@ fragment projectFields on Project { b } owners { - url - displayName - id - location - locationUrl - isProfileOwner - isFollowing + ...OwnerFields images { size_50 { url @@ -468,26 +627,23 @@ fragment nodesFields on MoodboardItem { } } } -""" - variables = { - "afterItem": "MAo=", - "firstItem": 40, - "id" : int(self.collection_id), - "shouldGetItems" : True, - "shouldGetMoodboardFields": False, - "shouldGetRecommendations": False, - } - data = {"query": query, "variables": variables} - - while True: - items = self.request( - url, method="POST", headers=headers, - cookies=cookies, json=data, - ).json()["data"]["moodboard"]["items"] - for node in items["nodes"]: - yield node["entity"] +fragment OwnerFields on User { + displayName + hasPremiumAccess + id + isFollowing + isProfileOwner + location + locationUrl + url + username + availabilityInfo { + availabilityTimeline + isAvailableFullTime + isAvailableFreelance + } +} +""", - if not items["pageInfo"]["hasNextPage"]: - return - variables["afterItem"] = items["pageInfo"]["endCursor"] +} |
