summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/unsplash.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/unsplash.py')
-rw-r--r--gallery_dl/extractor/unsplash.py208
1 files changed, 208 insertions, 0 deletions
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
new file mode 100644
index 0000000..545eb31
--- /dev/null
+++ b/gallery_dl/extractor/unsplash.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://unsplash.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?unsplash\.com"
+
+
+class UnsplashExtractor(Extractor):
+ """Base class for unsplash extractors"""
+ category = "unsplash"
+ directory_fmt = ("{category}", "{user[username]}")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://unsplash.com"
+ page_start = 1
+ per_page = 20
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item = match.group(1)
+
+ def items(self):
+ fmt = self.config("format") or "raw"
+ for photo in self.photos():
+ util.delete_items(
+ photo, ("current_user_collections", "related_collections"))
+ url = photo["urls"][fmt]
+ text.nameext_from_url(url, photo)
+
+ photo["extension"] = "jpg"
+ photo["date"] = text.parse_datetime(photo["created_at"])
+ if "tags" in photo:
+ photo["tags"] = [t["title"] for t in photo["tags"]]
+
+ yield Message.Directory, photo
+ yield Message.Url, url, photo
+
+ def skip(self, num):
+ pages = num // self.per_page
+ self.page_start += pages
+ return pages * self.per_page
+
+ def _pagination(self, url, params, results=False):
+ params["per_page"] = self.per_page
+ params["page"] = self.page_start
+
+ while True:
+ photos = self.request(url, params=params).json()
+ if results:
+ photos = photos["results"]
+ yield from photos
+
+ if len(photos) < self.per_page:
+ return
+ params["page"] += 1
+
+
+class UnsplashImageExtractor(UnsplashExtractor):
+ """Extractor for a single unsplash photo"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
+ test = ("https://unsplash.com/photos/lsoogGC_5dg", {
+ "url": "00accb0a64d5a0df0db911f8b425892718dce524",
+ "keyword": {
+ "alt_description": "re:silhouette of trees near body of water ",
+ "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
+ "categories": list,
+ "color": "#f3c08c",
+ "created_at": "2020-04-08T08:29:42-04:00",
+ "date": "dt:2020-04-08 12:29:42",
+ "description": "The Island",
+ "downloads": int,
+ "exif": {
+ "aperture": "11",
+ "exposure_time": "30",
+ "focal_length": "70.0",
+ "iso": 200,
+ "make": "Canon",
+ "model": "Canon EOS 5D Mark IV"
+ },
+ "extension": "jpg",
+ "filename": "photo-1586348943529-beaae6c28db9",
+ "height": 6272,
+ "id": "lsoogGC_5dg",
+ "liked_by_user": False,
+ "likes": int,
+ "location": {
+ "city": "Beaver Dam",
+ "country": "United States",
+ "name": "Beaver Dam, WI 53916, USA",
+ "position": {
+ "latitude": 43.457769,
+ "longitude": -88.837329
+ },
+ "title": "Beaver Dam, WI 53916, USA"
+ },
+ "promoted_at": "2020-04-08T11:12:03-04:00",
+ "sponsorship": None,
+ "tags": list,
+ "updated_at": str,
+ "user": {
+ "accepted_tos": True,
+ "bio": str,
+ "first_name": "Dave",
+ "id": "uMJXuywXLiU",
+ "instagram_username": "just_midwest_rock",
+ "last_name": "Hoefler",
+ "location": "Madison, WI",
+ "name": "Dave Hoefler",
+ "portfolio_url": str,
+ "total_collections": int,
+ "total_likes": int,
+ "total_photos": int,
+ "twitter_username": None,
+ "updated_at": str,
+ "username": "johnwestrock"
+ },
+ "views": int,
+ "width": 4480,
+ },
+ })
+
+ def photos(self):
+ url = "{}/napi/photos/{}".format(self.root, self.item)
+ return (self.request(url).json(),)
+
+
+class UnsplashUserExtractor(UnsplashExtractor):
+ """Extractor for all photos of an unsplash user"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/@(\w+)/?$"
+ test = ("https://unsplash.com/@johnwestrock", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def photos(self):
+ url = "{}/napi/users/{}/photos".format(self.root, self.item)
+ params = {"order_by": "latest"}
+ return self._pagination(url, params)
+
+
+class UnsplashFavoriteExtractor(UnsplashExtractor):
+ """Extractor for all likes of an unsplash user"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/@(\w+)/likes"
+ test = ("https://unsplash.com/@johnwestrock/likes", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def photos(self):
+ url = "{}/napi/users/{}/likes".format(self.root, self.item)
+ params = {"order_by": "latest"}
+ return self._pagination(url, params)
+
+
+class UnsplashCollectionExtractor(UnsplashExtractor):
+ """Extractor for an unsplash collection"""
+ subcategory = "collection"
+ pattern = BASE_PATTERN + r"/collections/(\d+)"
+ test = ("https://unsplash.com/collections/3178572/winter", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def photos(self):
+ url = "{}/napi/collections/{}/photos".format(self.root, self.item)
+ params = {"order_by": "latest"}
+ return self._pagination(url, params)
+
+
+class UnsplashSearchExtractor(UnsplashExtractor):
+ """Extractor for unsplash search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
+ test = ("https://unsplash.com/s/photos/nature", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def __init__(self, match):
+ UnsplashExtractor.__init__(self, match)
+ self.query = match.group(2)
+
+ def photos(self):
+ url = self.root + "/napi/search/photos"
+ params = {"query": text.unquote(self.item)}
+ if self.query:
+ params.update(text.parse_query(self.query))
+ return self._pagination(url, params, True)