diff options
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 992 |
1 files changed, 992 insertions, 0 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py new file mode 100644 index 0000000..ebab040 --- /dev/null +++ b/gallery_dl/extractor/deviantart.py @@ -0,0 +1,992 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015-2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.deviantart.com/""" + +from .common import Extractor, Message +from .. import text, exception +from ..cache import cache, memcache +import collections +import itertools +import mimetypes +import math +import time +import re + + +BASE_PATTERN = ( + r"(?:https?://)?(?:" + r"(?:www\.)?deviantart\.com/([\w-]+)|" + r"(?!www\.)([\w-]+)\.deviantart\.com)" +) + + +class DeviantartExtractor(Extractor): + """Base class for deviantart extractors""" + category = "deviantart" + directory_fmt = ("{category}", "{author[username]!l}") + filename_fmt = "{category}_{index}_{title}.{extension}" + root = "https://www.deviantart.com" + + def __init__(self, match=None): + Extractor.__init__(self, match) + self.offset = 0 + self.flat = self.config("flat", True) + self.extra = self.config("extra", False) + self.original = self.config("original", True) + self.user = match.group(1) or match.group(2) + self.group = False + self.api = DeviantartAPI(self) + + if self.original != "image": + self._update_content = self._update_content_default + else: + self._update_content = self._update_content_image + self.original = True + + self.commit_journal = { + "html": self._commit_journal_html, + "text": self._commit_journal_text, + }.get(self.config("journals", "html")) + + def skip(self, num): + self.offset += num + return num + + def items(self): + if self.user: + self.group = not self.api.user_profile(self.user) + if self.group: + self.subcategory = "group-" + self.subcategory + + yield Message.Version, 1 + for deviation in self.deviations(): + if isinstance(deviation, tuple): + url, data = deviation + yield Message.Queue, url, data + continue + + self.prepare(deviation) + yield Message.Directory, deviation + + if "content" in deviation: + content = deviation["content"] + + if self.original and deviation["is_downloadable"] and \ + text.ext_from_url(content["src"]) != "gif": + self._update_content(deviation, content) + + if deviation["index"] <= 790677560 and \ + content["src"].startswith("https://images-wixmp-"): + # https://github.com/r888888888/danbooru/issues/4069 + content["src"] = re.sub( + r"(/f/[^/]+/[^/]+)/v\d+/.*", + r"/intermediary\1", content["src"]) + + yield self.commit(deviation, content) + + elif deviation["is_downloadable"]: + content = self.api.deviation_download(deviation["deviationid"]) + yield self.commit(deviation, content) + + if "videos" in deviation: + video = max(deviation["videos"], + key=lambda x: text.parse_int(x["quality"][:-1])) + yield self.commit(deviation, video) + + if "flash" in deviation: + yield self.commit(deviation, deviation["flash"]) + + if "excerpt" in deviation and self.commit_journal: + journal = self.api.deviation_content(deviation["deviationid"]) + yield self.commit_journal(deviation, journal) + + if self.extra: + for match in DeviantartStashExtractor.pattern.finditer( + deviation.get("description", "")): + deviation["_extractor"] = DeviantartStashExtractor + yield Message.Queue, match.group(0), deviation + + def deviations(self): + """Return an iterable containing all relevant Deviation-objects""" + + def prepare(self, deviation): + """Adjust the contents of a Deviation-object""" + try: + deviation["index"] = text.parse_int( + deviation["url"].rpartition("-")[2]) + except KeyError: + deviation["index"] = 0 + if self.user: + deviation["username"] = self.user + deviation["da_category"] = deviation["category"] + deviation["published_time"] = text.parse_int( + deviation["published_time"]) + deviation["date"] = text.parse_timestamp( + deviation["published_time"]) + + @staticmethod + def commit(deviation, target): + url = target["src"] + deviation["target"] = text.nameext_from_url(url, target.copy()) + deviation["extension"] = deviation["target"]["extension"] + return Message.Url, url, deviation + + def _commit_journal_html(self, deviation, journal): + title = text.escape(deviation["title"]) + url = deviation["url"] + thumbs = deviation["thumbs"] + html = journal["html"] + shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else "" + + if "css" in journal: + css, cls = journal["css"], "withskin" + else: + css, cls = "", "journal-green" + + if html.find('<div class="boxtop journaltop">', 0, 250) != -1: + needle = '<div class="boxtop journaltop">' + header = HEADER_CUSTOM_TEMPLATE.format( + title=title, url=url, date=deviation["date"], + ) + else: + needle = '<div usr class="gr">' + catlist = deviation["category_path"].split("/") + categories = " / ".join( + ('<span class="crumb"><a href="{}/{}/"><span>{}</span></a>' + '</span>').format(self.root, cpath, cat.capitalize()) + for cat, cpath in zip( + catlist, + itertools.accumulate(catlist, lambda t, c: t + "/" + c) + ) + ) + username = deviation["author"]["username"] + urlname = deviation.get("username") or username.lower() + header = HEADER_TEMPLATE.format( + title=title, + url=url, + userurl="{}/{}/".format(self.root, urlname), + username=username, + date=deviation["date"], + categories=categories, + ) + + html = JOURNAL_TEMPLATE_HTML.format( + title=title, + html=html.replace(needle, header, 1), + shadow=shadow, + css=css, + cls=cls, + ) + + deviation["extension"] = "htm" + return Message.Url, html, deviation + + @staticmethod + def _commit_journal_text(deviation, journal): + content = "\n".join( + text.unescape(text.remove_html(txt)) + for txt in journal["html"].rpartition("<script")[0].split("<br />") + ) + txt = JOURNAL_TEMPLATE_TEXT.format( + title=deviation["title"], + username=deviation["author"]["username"], + date=deviation["date"], + content=content, + ) + + deviation["extension"] = "txt" + return Message.Url, txt, deviation + + @staticmethod + def _find_folder(folders, name): + pattern = re.compile( + r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$") + for folder in folders: + if pattern.match(folder["name"]): + return folder + raise exception.NotFoundError("folder") + + def _folder_urls(self, folders, category): + url = "{}/{}/{}/0/".format(self.root, self.user, category) + return [(url + folder["name"], folder) for folder in folders] + + def _update_content_default(self, deviation, content): + content.update(self.api.deviation_download(deviation["deviationid"])) + + def _update_content_image(self, deviation, content): + data = self.api.deviation_download(deviation["deviationid"]) + url = data["src"].partition("?")[0] + mtype = mimetypes.guess_type(url, False)[0] + if mtype and mtype.startswith("image/"): + content.update(data) + + def _html_request(self, url, **kwargs): + cookies = {"userinfo": ( + '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0' + 'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU' + ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}' + )} + return self.request(url, cookies=cookies, **kwargs) + + +class DeviantartGalleryExtractor(DeviantartExtractor): + """Extractor for all deviations from an artist's gallery""" + subcategory = "gallery" + archive_fmt = "g_{username}_{index}.{extension}" + pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$" + test = ( + ("https://www.deviantart.com/shimoda7/gallery/", { + "pattern": r"https://(s3.amazonaws.com/origin-(img|orig)" + r".deviantart.net/|images-wixmp-\w+.wixmp.com/)", + "count": ">= 30", + "keyword": { + "allows_comments": bool, + "author": { + "type": "regular", + "usericon": str, + "userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B", + "username": "shimoda7", + }, + "category_path": str, + "content": { + "filesize": int, + "height": int, + "src": str, + "transparency": bool, + "width": int, + }, + "da_category": str, + "date": "type:datetime", + "deviationid": str, + "?download_filesize": int, + "extension": str, + "index": int, + "is_deleted": bool, + "is_downloadable": bool, + "is_favourited": bool, + "is_mature": bool, + "preview": { + "height": int, + "src": str, + "transparency": bool, + "width": int, + }, + "published_time": int, + "stats": { + "comments": int, + "favourites": int, + }, + "target": dict, + "thumbs": list, + "title": str, + "url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+", + "username": "shimoda7", + }, + }), + # group + ("https://www.deviantart.com/yakuzafc", { + "pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/", + "count": ">= 15", + }), + # 'folders' option (#276) + ("https://www.deviantart.com/justatest235723", { + "count": 2, + "options": (("metadata", 1), ("folders", 1), ("original", 0)), + "keyword": { + "description": str, + "folders": list, + "is_watching": bool, + "license": str, + "tags": list, + }, + }), + ("https://www.deviantart.com/shimoda8/gallery/", { + "exception": exception.NotFoundError, + }), + # old-style URLs + ("https://www.deviantart.com/shimoda7/gallery/?catpath=/"), + ("https://shimoda7.deviantart.com/gallery/"), + ("https://yakuzafc.deviantart.com/"), + ("https://shimoda7.deviantart.com/gallery/?catpath=/"), + ) + + def deviations(self): + if self.flat and not self.group: + return self.api.gallery_all(self.user, self.offset) + folders = self.api.gallery_folders(self.user) + return self._folder_urls(folders, "gallery") + + +class DeviantartFolderExtractor(DeviantartExtractor): + """Extractor for deviations inside an artist's gallery folder""" + subcategory = "folder" + directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}") + archive_fmt = "F_{folder[uuid]}_{index}.{extension}" + pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)" + test = ( + # user + ("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", { + "count": 5, + "options": (("original", False),), + }), + # group + ("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", { + "count": ">= 4", + "options": (("original", False),), + }), + ("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"), + ("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"), + ) + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.fname = match.group(4) + self.folder = {"owner": self.user, "index": match.group(3)} + + def deviations(self): + folders = self.api.gallery_folders(self.user) + folder = self._find_folder(folders, self.fname) + self.folder["title"] = folder["name"] + self.folder["uuid"] = folder["folderid"] + return self.api.gallery(self.user, folder["folderid"], self.offset) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["folder"] = self.folder + + +class DeviantartDeviationExtractor(DeviantartExtractor): + """Extractor for single deviations""" + subcategory = "deviation" + archive_fmt = "{index}.{extension}" + pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)" + test = ( + (("https://www.deviantart.com/shimoda7/art/" + "For-the-sake-of-a-memory-10073852"), { + "options": (("original", 0),), + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + }), + ("https://www.deviantart.com/zzz/art/zzz-1234567890", { + "exception": exception.NotFoundError, + }), + (("https://www.deviantart.com/myria-moon/art/" + "Aime-Moi-part-en-vadrouille-261986576"), { + "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\." + r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"), + }), + # wixmp URL rewrite + (("https://www.deviantart.com/citizenfresh/art/" + "Hverarond-14-the-beauty-of-the-earth-789295466"), { + "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" + r"/intermediary/f/[^/]+/[^.]+\.jpg$") + }), + # non-download URL for GIFs (#242) + (("https://www.deviantart.com/skatergators/art/" + "COM-Monique-Model-781571783"), { + "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" + r"/f/[^/]+/[^.]+\.gif\?token="), + }), + # external URLs from description (#302) + (("https://www.deviantart.com/uotapo/art/" + "INANAKI-Memorial-Humane7-590297498"), { + "options": (("extra", 1), ("original", 0)), + "pattern": r"https?://sta\.sh/\w+$", + "range": "2-", + "count": 4, + }), + # old-style URLs + ("https://shimoda7.deviantart.com" + "/art/For-the-sake-of-a-memory-10073852"), + ("https://myria-moon.deviantart.com" + "/art/Aime-Moi-part-en-vadrouille-261986576"), + ("https://zzz.deviantart.com/art/zzz-1234567890"), + ) + + skip = Extractor.skip + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.path = match.group(3) + + def deviations(self): + url = "{}/{}/{}".format(self.root, self.user, self.path) + response = self._html_request(url, expect=range(400, 500)) + deviation_id = text.extract(response.text, '//deviation/', '"')[0] + if response.status_code >= 400 or not deviation_id: + raise exception.NotFoundError("image") + return (self.api.deviation(deviation_id),) + + +class DeviantartStashExtractor(DeviantartExtractor): + """Extractor for sta.sh-ed deviations""" + subcategory = "stash" + archive_fmt = "{index}.{extension}" + pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" + test = ( + ("https://sta.sh/022c83odnaxc", { + "pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net", + "count": 1, + }), + # multiple stash items + ("https://sta.sh/21jf51j7pzl2", { + "pattern": pattern, + "count": 4, + }), + # downloadable, but no "content" field (#307) + ("https://sta.sh/024t4coz16mi", { + "count": 1, + }), + ("https://sta.sh/abcdefghijkl", { + "exception": exception.HttpError, + }), + ) + + skip = Extractor.skip + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.user = None + self.stash_id = match.group(1) + + def deviations(self): + url = "https://sta.sh/" + self.stash_id + page = self.request(url).text + deviation_id = text.extract(page, '//deviation/', '"')[0] + + if deviation_id: + yield self.api.deviation(deviation_id) + else: + data = {"_extractor": DeviantartStashExtractor} + page = text.extract( + page, '<div id="stash-body"', '<div class="footer"')[0] + for url in text.extract_iter(page, '<a href="', '"'): + yield url, data + + +class DeviantartFavoriteExtractor(DeviantartExtractor): + """Extractor for an artist's favorites""" + subcategory = "favorite" + directory_fmt = ("{category}", "{username}", "Favourites") + archive_fmt = "f_{username}_{index}.{extension}" + pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$" + test = ( + ("https://www.deviantart.com/h3813067/favourites/", { + "options": (("metadata", True), ("flat", False)), # issue #271 + "count": 1, + }), + ("https://www.deviantart.com/h3813067/favourites/", { + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + }), + ("https://www.deviantart.com/h3813067/favourites/?catpath=/"), + ("https://h3813067.deviantart.com/favourites/"), + ("https://h3813067.deviantart.com/favourites/?catpath=/"), + ) + + def deviations(self): + folders = self.api.collections_folders(self.user) + if self.flat: + return itertools.chain.from_iterable( + self.api.collections(self.user, folder["folderid"]) + for folder in folders + ) + return self._folder_urls(folders, "favourites") + + +class DeviantartCollectionExtractor(DeviantartExtractor): + """Extractor for a single favorite collection""" + subcategory = "collection" + directory_fmt = ("{category}", "{collection[owner]}", + "Favourites", "{collection[title]}") + archive_fmt = "C_{collection[uuid]}_{index}.{extension}" + pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)" + test = ( + (("https://www.deviantart.com/pencilshadings" + "/favourites/70595441/3D-Favorites"), { + "count": ">= 20", + "options": (("original", False),), + }), + ("https://pencilshadings.deviantart.com" + "/favourites/70595441/3D-Favorites"), + ) + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + _, _, cid, self.cname = match.groups() + self.collection = {"owner": self.user, "index": cid} + + def deviations(self): + folders = self.api.collections_folders(self.user) + folder = self._find_folder(folders, self.cname) + self.collection["title"] = folder["name"] + self.collection["uuid"] = folder["folderid"] + return self.api.collections(self.user, folder["folderid"], self.offset) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["collection"] = self.collection + + +class DeviantartJournalExtractor(DeviantartExtractor): + """Extractor for an artist's journals""" + subcategory = "journal" + directory_fmt = ("{category}", "{username}", "Journal") + archive_fmt = "j_{username}_{index}.{extension}" + pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$" + test = ( + ("https://www.deviantart.com/angrywhitewanker/journal/", { + "url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44", + }), + ("https://www.deviantart.com/angrywhitewanker/journal/", { + "url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e", + "options": (("journals", "text"),), + }), + ("https://www.deviantart.com/angrywhitewanker/journal/", { + "count": 0, + "options": (("journals", "none"),), + }), + ("https://www.deviantart.com/shimoda7/journal/?catpath=/"), + ("https://shimoda7.deviantart.com/journal/"), + ("https://shimoda7.deviantart.com/journal/?catpath=/"), + ) + + def deviations(self): + return self.api.browse_user_journals(self.user, self.offset) + + +class DeviantartScrapsExtractor(DeviantartExtractor): + """Extractor for an artist's scraps""" + subcategory = "scraps" + directory_fmt = ("{category}", "{username}", "Scraps") + archive_fmt = "s_{username}_{index}.{extension}" + pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b" + test = ( + ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", { + "count": 12, + "options": (("original", False),), + }), + ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), + ) + + def deviations(self): + url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user) + page = self._html_request(url).text + csrf, pos = text.extract(page, '"csrf":"', '"') + iid , pos = text.extract(page, '"requestid":"', '"', pos) + + url = "https://www.deviantart.com/dapi/v1/gallery/0" + data = { + "username": self.user, + "offset": self.offset, + "limit": "24", + "catpath": "scraps", + "_csrf": csrf, + "dapiIid": iid + "-jsok7403-1.1" + } + + while True: + content = self.request( + url, method="POST", data=data).json()["content"] + + for item in content["results"]: + if item["html"].startswith('<div class="ad-container'): + continue + deviation_url = text.extract(item["html"], 'href="', '"')[0] + page = self._html_request(deviation_url).text + deviation_id = text.extract(page, '//deviation/', '"')[0] + if deviation_id: + yield self.api.deviation(deviation_id) + + if not content["has_more"]: + return + data["offset"] = content["next_offset"] + + +class DeviantartPopularExtractor(DeviantartExtractor): + """Extractor for popular deviations""" + subcategory = "popular" + directory_fmt = ("{category}", "Popular", + "{popular[range]}", "{popular[search]}") + archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}" + pattern = (r"(?:https?://)?www\.deviantart\.com" + r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?") + test = ( + ("https://www.deviantart.com/popular-24-hours/?q=tree+house", { + "options": (("original", False),), + }), + ("https://www.deviantart.com/artisan/popular-all-time/?q=tree"), + ) + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.search_term = self.time_range = self.category_path = None + self.user = "" + + path, trange, query = match.groups() + if path: + self.category_path = path.lstrip("/") + if trange: + self.time_range = trange.replace("-", "").replace("hours", "hr") + if query: + self.search_term = text.parse_query(query).get("q") + + self.popular = { + "search": self.search_term or "", + "range": trange or "24-hours", + "path": self.category_path, + } + + def deviations(self): + return self.api.browse_popular( + self.search_term, self.time_range, self.category_path, self.offset) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["popular"] = self.popular + + +class DeviantartAPI(): + """Minimal interface for the DeviantArt API + + Ref: https://www.deviantart.com/developers/http/v1/20160316 + """ + CLIENT_ID = "5388" + CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1" + + def __init__(self, extractor): + self.extractor = extractor + self.log = extractor.log + self.headers = {} + + delay = extractor.config("wait-min", 0) + self.delay = math.ceil(math.log2(delay)) if delay >= 1 else -1 + self.delay_min = max(2, self.delay) + + self.mature = extractor.config("mature", "true") + if not isinstance(self.mature, str): + self.mature = "true" if self.mature else "false" + + self.folders = extractor.config("folders", False) + self.metadata = extractor.extra or extractor.config("metadata", False) + + self.refresh_token = extractor.config("refresh-token") + self.client_id = extractor.config("client-id", self.CLIENT_ID) + self.client_secret = extractor.config( + "client-secret", self.CLIENT_SECRET) + + def browse_popular(self, query=None, timerange=None, + category_path=None, offset=0): + """Yield popular deviations""" + endpoint = "browse/popular" + params = {"q": query, "offset": offset, "limit": 120, + "timerange": timerange, "category_path": category_path, + "mature_content": self.mature} + return self._pagination(endpoint, params) + + def browse_user_journals(self, username, offset=0): + """Yield all journal entries of a specific user""" + endpoint = "browse/user/journals" + params = {"username": username, "offset": offset, "limit": 50, + "mature_content": self.mature, "featured": "false"} + return self._pagination(endpoint, params) + + def collections(self, username, folder_id, offset=0): + """Yield all Deviation-objects contained in a collection folder""" + endpoint = "collections/" + folder_id + params = {"username": username, "offset": offset, "limit": 24, + "mature_content": self.mature} + return self._pagination(endpoint, params) + + @memcache(keyarg=1) + def collections_folders(self, username, offset=0): + """Yield all collection folders of a specific user""" + endpoint = "collections/folders" + params = {"username": username, "offset": offset, "limit": 50, + "mature_content": self.mature} + return self._pagination_folders(endpoint, params) + + def deviation(self, deviation_id): + """Query and return info about a single Deviation""" + endpoint = "deviation/" + deviation_id + deviation = self._call(endpoint) + if self.metadata: + self._metadata((deviation,)) + if self.folders: + self._folders((deviation,)) + return deviation + + def deviation_content(self, deviation_id): + """Get extended content of a single Deviation""" + endpoint = "deviation/content" + params = {"deviationid": deviation_id} + return self._call(endpoint, params) + + def deviation_download(self, deviation_id): + """Get the original file download (if allowed)""" + endpoint = "deviation/download/" + deviation_id + params = {"mature_content": self.mature} + return self._call(endpoint, params) + + def deviation_metadata(self, deviations): + """ Fetch deviation metadata for a set of deviations""" + endpoint = "deviation/metadata?" + "&".join( + "deviationids[{}]={}".format(num, deviation["deviationid"]) + for num, deviation in enumerate(deviations) + ) + params = {"mature_content": self.mature} + return self._call(endpoint, params)["metadata"] + + def gallery(self, username, folder_id="", offset=0, extend=True): + """Yield all Deviation-objects contained in a gallery folder""" + endpoint = "gallery/" + folder_id + params = {"username": username, "offset": offset, "limit": 24, + "mature_content": self.mature, "mode": "newest"} + return self._pagination(endpoint, params, extend) + + def gallery_all(self, username, offset=0): + """Yield all Deviation-objects of a specific user""" + endpoint = "gallery/all" + params = {"username": username, "offset": offset, "limit": 24, + "mature_content": self.mature} + return self._pagination(endpoint, params) + + @memcache(keyarg=1) + def gallery_folders(self, username, offset=0): + """Yield all gallery folders of a specific user""" + endpoint = "gallery/folders" + params = {"username": username, "offset": offset, "limit": 50, + "mature_content": self.mature} + return self._pagination_folders(endpoint, params) + + @memcache(keyarg=1) + def user_profile(self, username): + """Get user profile information""" + endpoint = "user/profile/" + username + return self._call(endpoint, expect_error=True) + + def authenticate(self, refresh_token): + """Authenticate the application by requesting an access token""" + self.headers["Authorization"] = self._authenticate_impl(refresh_token) + + @cache(maxage=3600, keyarg=1) + def _authenticate_impl(self, refresh_token): + """Actual authenticate implementation""" + url = "https://www.deviantart.com/oauth2/token" + if refresh_token: + self.log.info("Refreshing private access token") + data = {"grant_type": "refresh_token", + "refresh_token": _refresh_token_cache(refresh_token)} + else: + self.log.info("Requesting public access token") + data = {"grant_type": "client_credentials"} + + auth = (self.client_id, self.client_secret) + response = self.extractor.request( + url, method="POST", data=data, auth=auth) + data = response.json() + + if response.status_code != 200: + raise exception.AuthenticationError('"{} ({})"'.format( + data.get("error_description"), data.get("error"))) + if refresh_token: + _refresh_token_cache.update(refresh_token, data["refresh_token"]) + return "Bearer " + data["access_token"] + + def _call(self, endpoint, params=None, expect_error=False, public=True): + """Call an API endpoint""" + url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint + while True: + if self.delay >= 0: + time.sleep(2 ** self.delay) + + self.authenticate(None if public else self.refresh_token) + response = self.extractor.request( + url, + params=params, + headers=self.headers, + expect=range(400, 500), + ) + data = response.json() + status = response.status_code + + if 200 <= status < 400: + if self.delay > self.delay_min: + self.delay -= 1 + return data + if expect_error: + return None + if data.get("error_description") == "User not found.": + raise exception.NotFoundError("user or group") + + self.log.debug(response.text) + msg = "API responded with {} {}".format( + status, response.reason) + if status == 429: + self.delay += 1 + self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay) + else: + self.log.error(msg) + return data + + def _pagination(self, endpoint, params, extend=True): + public = True + while True: + data = self._call(endpoint, params, public=public) + if "results" not in data: + self.log.error("Unexpected API response: %s", data) + return + if (public and self.refresh_token and + len(data["results"]) < params["limit"]): + self.log.debug("Switching to private access token") + public = False + continue + + if extend: + if self.metadata: + self._metadata(data["results"]) + if self.folders: + self._folders(data["results"]) + yield from data["results"] + + if not data["has_more"]: + return + params["offset"] = data["next_offset"] + + def _pagination_folders(self, endpoint, params): + result = [] + result.extend(self._pagination(endpoint, params, False)) + return result + + def _metadata(self, deviations): + """Add extended metadata to each deviation object""" + for deviation, metadata in zip( + deviations, self.deviation_metadata(deviations)): + deviation.update(metadata) + deviation["tags"] = [t["tag_name"] for t in deviation["tags"]] + return deviations + + def _folders(self, deviations): + """Add a list of all containing folders to each deviation object""" + for deviation in deviations: + deviation["folders"] = self._folders_map( + deviation["author"]["username"])[deviation["deviationid"]] + + @memcache(keyarg=1) + def _folders_map(self, username): + """Generate a deviation_id -> folders mapping for 'username'""" + self.log.info("Collecting folder information for '%s'", username) + folders = self.gallery_folders(username) + + # add parent names to folders, but ignore "Featured" as parent + fmap = {} + featured = folders[0]["folderid"] + for folder in folders: + if folder["parent"] and folder["parent"] != featured: + folder["name"] = fmap[folder["parent"]] + "/" + folder["name"] + fmap[folder["folderid"]] = folder["name"] + + # map deviationids to folder names + dmap = collections.defaultdict(list) + for folder in folders: + for deviation in self.gallery( + username, folder["folderid"], 0, False): + dmap[deviation["deviationid"]].append(folder["name"]) + return dmap + + +@cache(maxage=10*365*24*3600, keyarg=0) +def _refresh_token_cache(original_token, new_token=None): + return new_token or original_token + + +SHADOW_TEMPLATE = """ +<span class="shadow"> + <img src="{src}" class="smshadow" width="{width}" height="{height}"> +</span> +<br><br> +""" + +HEADER_TEMPLATE = """<div usr class="gr"> +<div class="metadata"> + <h2><a href="{url}">{title}</a></h2> + <ul> + <li class="author"> + by <span class="name"><span class="username-with-symbol u"> + <a class="u regular username" href="{userurl}">{username}</a>\ +<span class="user-symbol regular"></span></span></span>, + <span>{date}</span> + </li> + <li class="category"> + {categories} + </li> + </ul> +</div> +""" + +HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'> +<h2> + <img src="https://st.deviantart.net/minish/gruzecontrol/icons/journal.gif\ +?2" style="vertical-align:middle" alt=""/> + <a href="{url}">{title}</a> +</h2> +Journal Entry: <span>{date}</span> +""" + +JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + <title>{title}</title> + <link rel="stylesheet" href="https://st.deviantart.net/\ +css/deviantart-network_lc.css?3843780832"> + <link rel="stylesheet" href="https://st.deviantart.net/\ +css/group_secrets_lc.css?3250492874"> + <link rel="stylesheet" href="https://st.deviantart.net/\ +css/v6core_lc.css?4246581581"> + <link rel="stylesheet" href="https://st.deviantart.net/\ +css/sidebar_lc.css?1490570941"> + <link rel="stylesheet" href="https://st.deviantart.net/\ +css/writer_lc.css?3090682151"> + <link rel="stylesheet" href="https://st.deviantart.net/\ +css/v6loggedin_lc.css?3001430805"> + <style>{css}</style> + <link rel="stylesheet" href="https://st.deviantart.net/\ +roses/cssmin/core.css?1488405371919" > + <link rel="stylesheet" href="https://st.deviantart.net/\ +roses/cssmin/peeky.css?1487067424177" > + <link rel="stylesheet" href="https://st.deviantart.net/\ +roses/cssmin/desktop.css?1491362542749" > +</head> +<body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart"> + <div id="output"> + <div class="dev-page-container bubbleview"> + <div class="dev-page-view view-mode-normal"> + <div class="dev-view-main-content"> + <div class="dev-view-deviation"> + {shadow} + <div class="journal-wrapper tt-a"> + <div class="journal-wrapper2"> + <div class="journal {cls} journalcontrol"> + {html} + </div> + </div> + </div> + </div> + </div> + </div> + </div> + </div> +</body> +</html> +""" + +JOURNAL_TEMPLATE_TEXT = """text:{title} +by {username}, {date} + +{content} +""" |
