diff options
Diffstat (limited to 'gallery_dl/extractor')
23 files changed, 577 insertions, 185 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 85fbddb..561b484 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -48,6 +48,7 @@ modules = [ "hypnohub", "idolcomplex", "imagebam", + "imagechest", "imagefap", "imgbb", "imgbox", @@ -94,6 +95,7 @@ modules = [ "readcomiconline", "realbooru", "reddit", + "redgifs", "rule34", "safebooru", "sankaku", @@ -113,6 +115,7 @@ modules = [ "vsco", "wallhaven", "warosu", + "webtoons", "weibo", "wikiart", "xhamster", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 3a282c2..dd685df 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -40,6 +40,7 @@ class Extractor(): self._cookiefile = None self._cookiejar = self.session.cookies self._parentdir = "" + self._write_pages = self.config("write-pages", False) self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) @@ -91,6 +92,8 @@ class Extractor(): raise exception.HttpError(exc) else: code = response.status_code + if self._write_pages: + self._dump_response(response) if 200 <= code < 400 or fatal is None and \ (400 <= code < 500) or not fatal and \ (400 <= code < 429 or 431 <= code < 500): @@ -325,6 +328,33 @@ class Extractor(): test = (test, None) yield test + def _dump_response(self, response): + """Write the response content to a .dump file in the current directory. + + The file name is derived from the response url, + replacing special characters with "_" + """ + for resp in response.history: + self._dump_response(resp) + + if hasattr(Extractor, "_dump_index"): + Extractor._dump_index += 1 + else: + Extractor._dump_index = 1 + Extractor._dump_sanitize = re.compile(r"[\\\\|/<>:\"?*&=#]+").sub + + fname = "{:>02}_{}".format( + Extractor._dump_index, + Extractor._dump_sanitize('_', response.url) + )[:250] + + try: + with open(fname + ".dump", 'wb') as fp: + util.dump_response(response, fp) + except Exception as e: + self.log.warning("Failed to dump HTTP request (%s: %s)", + e.__class__.__name__, e) + class GalleryExtractor(Extractor): @@ -460,7 +490,7 @@ class SharedConfigMixin(): """Enable sharing of config settings based on 'basecategory'""" basecategory = "" - def config(self, key, default=None, *, sentinel=object()): + def config(self, key, default=None, *, sentinel=util.SENTINEL): value = Extractor.config(self, key, sentinel) return value if value is not sentinel else config.interpolate( ("extractor", self.basecategory, self.subcategory), key, default) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 3a0d0ef..e0edf89 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -32,7 +32,7 @@ class DanbooruExtractor(SharedConfigMixin, Extractor): def __init__(self, match): super().__init__(match) self.root = "https://{}.donmai.us".format(match.group(1)) - self.ugoira = self.config("ugoira", True) + self.ugoira = self.config("ugoira", False) self.params = {} username, api_key = self._get_auth_info() @@ -156,8 +156,8 @@ class DanbooruPostExtractor(DanbooruExtractor): "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", }), ("https://danbooru.donmai.us/posts/3613024", { - "pattern": r"https?://.+\.webm$", - "options": (("ugoira", False),) + "pattern": r"https?://.+\.zip$", + "options": (("ugoira", True),) }) ) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 2631052..cda357a 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -126,8 +126,9 @@ class DeviantartExtractor(Extractor): if self.extra: for match in DeviantartStashExtractor.pattern.finditer( deviation.get("description", "")): + url = text.ensure_http_scheme(match.group(0)) deviation["_extractor"] = DeviantartStashExtractor - yield Message.Queue, match.group(0), deviation + yield Message.Queue, url, deviation def deviations(self): """Return an iterable containing all relevant Deviation-objects""" @@ -849,9 +850,12 @@ class DeviantartOAuthAPI(): self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) - self.refresh_token = extractor.config("refresh-token") - if self.refresh_token == "cache": - self.refresh_token = "#" + str(self.client_id) + token = extractor.config("refresh-token") + if token is None or token == "cache": + token = "#" + str(self.client_id) + if not _refresh_token_cache(token): + token = None + self.refresh_token_key = token self.log.debug( "Using %s API credentials (client-id %s)", @@ -904,7 +908,7 @@ class DeviantartOAuthAPI(): """Get extended content of a single Deviation""" endpoint = "deviation/content" params = {"deviationid": deviation_id} - return self._call(endpoint, params) + return self._call(endpoint, params, public=False) def deviation_download(self, deviation_id): """Get the original file download (if allowed)""" @@ -951,18 +955,19 @@ class DeviantartOAuthAPI(): endpoint = "user/profile/" + username return self._call(endpoint, fatal=False) - def authenticate(self, refresh_token): + def authenticate(self, refresh_token_key): """Authenticate the application by requesting an access token""" - self.headers["Authorization"] = self._authenticate_impl(refresh_token) + self.headers["Authorization"] = \ + self._authenticate_impl(refresh_token_key) @cache(maxage=3600, keyarg=1) - def _authenticate_impl(self, refresh_token): + def _authenticate_impl(self, refresh_token_key): """Actual authenticate implementation""" url = "https://www.deviantart.com/oauth2/token" - if refresh_token: + if refresh_token_key: self.log.info("Refreshing private access token") data = {"grant_type": "refresh_token", - "refresh_token": _refresh_token_cache(refresh_token)} + "refresh_token": _refresh_token_cache(refresh_token_key)} else: self.log.info("Requesting public access token") data = {"grant_type": "client_credentials"} @@ -976,8 +981,9 @@ class DeviantartOAuthAPI(): self.log.debug("Server response: %s", data) raise exception.AuthenticationError('"{}" ({})'.format( data.get("error_description"), data.get("error"))) - if refresh_token: - _refresh_token_cache.update(refresh_token, data["refresh_token"]) + if refresh_token_key: + _refresh_token_cache.update( + refresh_token_key, data["refresh_token"]) return "Bearer " + data["access_token"] def _call(self, endpoint, params=None, fatal=True, public=True): @@ -987,7 +993,7 @@ class DeviantartOAuthAPI(): if self.delay >= 0: time.sleep(2 ** self.delay) - self.authenticate(None if public else self.refresh_token) + self.authenticate(None if public else self.refresh_token_key) response = self.extractor.request( url, headers=self.headers, params=params, fatal=None) data = response.json() @@ -1023,7 +1029,7 @@ class DeviantartOAuthAPI(): if extend: if public and len(data["results"]) < params["limit"]: - if self.refresh_token: + if self.refresh_token_key: self.log.debug("Switching to private access token") public = False continue @@ -1154,9 +1160,11 @@ class DeviantartEclipseAPI(): return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') -@cache(maxage=10*365*24*3600, keyarg=0) -def _refresh_token_cache(original_token, new_token=None): - return new_token or original_token +@cache(maxage=100*365*24*3600, keyarg=0) +def _refresh_token_cache(token): + if token and token[0] == "#": + return None + return token ############################################################################### diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 0c05a97..612c742 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,7 +10,7 @@ from . import booru from .common import Message -from .. import text, util +from .. import text class GelbooruExtractor(booru.XmlParserMixin, @@ -31,6 +31,7 @@ class GelbooruExtractor(booru.XmlParserMixin, else: self.items = self.items_noapi self.session.cookies["fringeBenefits"] = "yup" + self.per_page = 42 def items_noapi(self): yield Message.Version, 1 @@ -46,6 +47,19 @@ class GelbooruExtractor(booru.XmlParserMixin, def get_posts(self): """Return an iterable containing all relevant post objects""" + url = "https://gelbooru.com/index.php?page=post&s=list" + params = { + "tags": self.params["tags"], + "pid" : self.page_start * self.per_page + } + + while True: + page = self.request(url, params=params).text + ids = list(text.extract_iter(page, '<a id="p', '"')) + yield from ids + if len(ids) < self.per_page: + return + params["pid"] += self.per_page def get_post_data(self, post_id): """Extract metadata of a single post""" @@ -88,34 +102,20 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor): }), ) - def __init__(self, match): - super().__init__(match) - if not self.use_api: - self.per_page = 42 - - def get_posts(self): - url = "https://gelbooru.com/index.php?page=post&s=list" - params = {"tags": self.tags, "pid": self.page_start * self.per_page} - while True: - page = self.request(url, params=params).text - ids = list(text.extract_iter(page, '<a id="p', '"')) - yield from ids - if len(ids) < self.per_page: - return - params["pid"] += self.per_page - - -class GelbooruPoolExtractor(booru.GelbooruPoolMixin, GelbooruExtractor): +class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor): """Extractor for image-pools from gelbooru.com""" pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" r"\?page=pool&s=show&id=(?P<pool>\d+)") - test = ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { - "count": 6, - }) - - def get_posts(self): - return util.advance(self.posts, self.page_start) + test = ( + ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { + "count": 6, + }), + ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { + "options": (("api", False),), + "count": 6, + }), + ) class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor): diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py index ef64942..aa41836 100644 --- a/gallery_dl/extractor/hentainexus.py +++ b/gallery_dl/extractor/hentainexus.py @@ -51,20 +51,38 @@ class HentainexusGalleryExtractor(GalleryExtractor): "description": rmve(extr('viewcolumn">Description</td>', '</td>')), } data["lang"] = util.language_to_code(data["language"]) - data["type"] = "Doujinshi" if 'doujin' in data["tags"] else "Manga" - data["title_conventional"] = self.join_title( - data["event"], - data["circle"], - data["artist"], - data["title"], - data["parody"], - data["book"], - data["magazine"], - ) + if 'doujin' in data['tags']: + data['type'] = 'Doujinshi' + elif 'illustration' in data['tags']: + data['type'] = 'Illustration' + else: + data['type'] = 'Manga' + data["title_conventional"] = self._join_title(data) return data + def images(self, page): + url = "{}/read/{}".format(self.root, self.gallery_id) + extr = text.extract_from(self.request(url).text) + urls = extr("initReader(", "]") + "]" + return [(url, None) for url in json.loads(urls)] + @staticmethod - def join_title(event, circle, artist, title, parody, book, magazine): + def _join_title(data): + event = data['event'] + artist = data['artist'] + circle = data['circle'] + title = data['title'] + parody = data['parody'] + book = data['book'] + magazine = data['magazine'] + + # a few galleries have a large number of artists or parodies, + # which get replaced with "Various" in the title string + if artist.count(',') >= 3: + artist = 'Various' + if parody.count(',') >= 3: + parody = 'Various' + jt = '' if event: jt += '({}) '.format(event) @@ -81,12 +99,6 @@ class HentainexusGalleryExtractor(GalleryExtractor): jt += ' ({})'.format(magazine) return jt - def images(self, page): - url = "{}/read/{}".format(self.root, self.gallery_id) - extr = text.extract_from(self.request(url).text) - urls = extr("initReader(", "]") + "]" - return [(url, None) for url in json.loads(urls)] - class HentainexusSearchExtractor(Extractor): """Extractor for search results on hentainexus.com""" diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 3883445..1c53723 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -14,6 +14,9 @@ from ..cache import memcache import re +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net|info)" + + class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" @@ -61,11 +64,10 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for manga chapters from hiperdex.com""" - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" - r"(/manga/([^/?&#]+)/([^/?&#]+))") + pattern = BASE_PATTERN + r"(/manga/([^/?&#]+)/([^/?&#]+))" test = ( ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { - "pattern": r"https://hiperdex.com/wp-content/uploads" + "pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads" r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", "count": 9, "keyword": { @@ -82,6 +84,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): }, }), ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"), + ("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"), ) def __init__(self, match): @@ -102,8 +105,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for manga from hiperdex.com""" chapterclass = HiperdexChapterExtractor - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" - r"(/manga/([^/?&#]+))/?$") + pattern = BASE_PATTERN + r"(/manga/([^/?&#]+))/?$" test = ( ("https://hiperdex.com/manga/youre-not-that-special/", { "count": 51, @@ -123,6 +125,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): }, }), ("https://hiperdex.net/manga/youre-not-that-special/"), + ("https://hiperdex.info/manga/youre-not-that-special/"), ) def __init__(self, match): @@ -154,11 +157,11 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): categorytransfer = False chapterclass = HiperdexMangaExtractor reverse = False - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" - r"(/manga-a(?:rtist|uthor)/([^/?&#]+))") + pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?&#]+))" test = ( ("https://hiperdex.com/manga-artist/beck-ho-an/"), ("https://hiperdex.net/manga-artist/beck-ho-an/"), + ("https://hiperdex.info/manga-artist/beck-ho-an/"), ("https://hiperdex.com/manga-author/viagra/", { "pattern": HiperdexMangaExtractor.pattern, "count": ">= 6", diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py new file mode 100644 index 0000000..a1ba0c3 --- /dev/null +++ b/gallery_dl/extractor/imagechest.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Leonid "Bepis" Pavel +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from galleries at https://imgchest.com/""" + +from .common import GalleryExtractor +from .. import text, exception + + +class ImagechestGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from imgchest.com""" + category = "imagechest" + root = "https://imgchest.com" + pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})" + test = ( + ("https://imgchest.com/p/3na7kr3by8d", { + "url": "f095b4f78c051e5a94e7c663814d1e8d4c93c1f7", + "content": "076959e65be30249a2c651fbe6090dc30ba85193", + "count": 3 + }), + ) + + def __init__(self, match): + self.gallery_id = match.group(1) + url = self.root + "/p/" + self.gallery_id + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + if "Sorry, but the page you requested could not be found." in page: + raise exception.NotFoundError("gallery") + + return { + "gallery_id": self.gallery_id, + "title": text.unescape(text.extract( + page, 'property="og:title" content="', '"')[0].strip()) + } + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, 'property="og:image" content="', '"') + ] diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 0813ea9..44fa5f2 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -34,7 +34,11 @@ class ImgurExtractor(Extractor): except KeyError: pass - url = image["mp4"] if image["animated"] and self.mp4 else image["link"] + if image["animated"] and self.mp4 and "mp4" in image: + url = image["mp4"] + else: + url = image["link"] + image["date"] = text.parse_timestamp(image["datetime"]) text.nameext_from_url(url, image) @@ -100,6 +104,9 @@ class ImgurImageExtractor(ImgurExtractor): ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1' "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e", }), + ("https://imgur.com/1Nily2P", { # animated png + "pattern": "https://i.imgur.com/1Nily2P.png", + }), ("https://imgur.com/zzzzzzz", { # not found "exception": exception.HttpError, }), @@ -130,7 +137,7 @@ class ImgurAlbumExtractor(ImgurExtractor): directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}") filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}" archive_fmt = "{album[id]}_{id}" - pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" + pattern = BASE_PATTERN + r"/a/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", @@ -192,9 +199,6 @@ class ImgurAlbumExtractor(ImgurExtractor): ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash "url": "695ef0c950023362a0163ee5041796300db76674", }), - ("https://imgur.com/t/unmuted/YMqBcua", { # unmuted URL - "url": "86b4747f8147cec7602f0214e267309af73a8655", - }), ("https://imgur.com/a/TcBmQ", { "exception": exception.HttpError, }), @@ -225,7 +229,7 @@ class ImgurAlbumExtractor(ImgurExtractor): class ImgurGalleryExtractor(ImgurExtractor): """Extractor for imgur galleries""" subcategory = "gallery" - pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})" + pattern = BASE_PATTERN + r"/(?:gallery|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380) "pattern": "https://imgur.com/zf2fIms", @@ -233,6 +237,9 @@ class ImgurGalleryExtractor(ImgurExtractor): ("https://imgur.com/gallery/eD9CT", { "pattern": "https://imgur.com/a/eD9CT", }), + ("https://imgur.com/t/unmuted/26sEhNr", { # unmuted URL + "pattern": "https://imgur.com/26sEhNr", + }), ) def items(self): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index ea39cab..3781711 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -14,6 +14,8 @@ from .. import text, exception from ..cache import cache import itertools import json +import time +import re class InstagramExtractor(Extractor): @@ -26,6 +28,10 @@ class InstagramExtractor(Extractor): cookiedomain = ".instagram.com" cookienames = ("sessionid",) + def __init__(self, match): + Extractor.__init__(self, match) + self._find_tags = re.compile(r'#\w+').findall + def get_metadata(self): return {} @@ -78,9 +84,10 @@ class InstagramExtractor(Extractor): url = self.root + "/accounts/login/ajax/" data = { "username" : username, - "password" : password, + "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format( + int(time.time()), password), "queryParams" : "{}", - "optIntoOneTap": "true", + "optIntoOneTap": "false", } response = self.request(url, method="POST", headers=headers, data=data) @@ -133,12 +140,24 @@ class InstagramExtractor(Extractor): 'fullname': media['owner']['full_name'], 'post_id': media['id'], 'post_shortcode': media['shortcode'], + 'post_url': url, 'description': text.parse_unicode_escapes('\n'.join( edge['node']['text'] for edge in media['edge_media_to_caption']['edges'] )), } + tags = self._find_tags(common['description']) + if tags: + common['tags'] = sorted(set(tags)) + + location = media['location'] + if location: + common['location_id'] = location['id'] + common['location_slug'] = location['slug'] + common['location_url'] = "{}/explore/locations/{}/{}/".format( + self.root, location['id'], location['slug']) + medias = [] if media['__typename'] == 'GraphSidecar': for num, edge in enumerate( @@ -156,6 +175,7 @@ class InstagramExtractor(Extractor): 'sidecar_media_id': media['id'], 'sidecar_shortcode': media['shortcode'], } + self._extract_tagged_users(children, media_data) media_data.update(common) medias.append(media_data) @@ -169,6 +189,7 @@ class InstagramExtractor(Extractor): 'height': text.parse_int(media['dimensions']['height']), 'width': text.parse_int(media['dimensions']['width']), } + self._extract_tagged_users(media, media_data) media_data.update(common) medias.append(media_data) @@ -189,12 +210,12 @@ class InstagramExtractor(Extractor): user_id = '"{}"'.format( shared_data['entry_data']['StoriesPage'][0]['user']['id']) highlight_id = '' - query_hash = 'cda12de4f7fd3719c0569ce03589f4c4' + query_hash = '0a85e6ea60a4c99edc58ab2f3d17cfdf' variables = ( '{{' '"reel_ids":[{}],"tag_names":[],"location_ids":[],' - '"highlight_reel_ids":[{}],"precomposed_overlay":true,' + '"highlight_reel_ids":[{}],"precomposed_overlay":false,' '"show_story_viewer_list":true,' '"story_viewer_fetch_count":50,"story_viewer_cursor":"",' '"stories_video_dash_manifest":false' @@ -250,7 +271,7 @@ class InstagramExtractor(Extractor): data = self._request_graphql( variables, - 'aec5501414615eca36a9acf075655b1e', + 'ad99dd9d3646cc3c0dda65debcd266a7', shared_data['config']['csrf_token'], ) @@ -305,6 +326,18 @@ class InstagramExtractor(Extractor): variables, psdf['query_hash'], csrf, ) + def _extract_tagged_users(self, src_media, dest_dict): + edges = src_media['edge_media_to_tagged_user']['edges'] + if edges: + dest_dict['tagged_users'] = tagged_users = [] + for edge in edges: + user = edge['node']['user'] + tagged_users.append({ + 'id' : user['id'], + 'username' : user['username'], + 'full_name': user['full_name'], + }) + class InstagramImageExtractor(InstagramExtractor): """Extractor for PostPage""" @@ -321,10 +354,15 @@ class InstagramImageExtractor(InstagramExtractor): "description": str, "height": int, "likes": int, + "location_id": "214424288", + "location_slug": "hong-kong", + "location_url": "re:/explore/locations/214424288/hong-kong/", "media_id": "1922949326347663701", "shortcode": "BqvsDleB3lV", "post_id": "1922949326347663701", "post_shortcode": "BqvsDleB3lV", + "post_url": "https://www.instagram.com/p/BqvsDleB3lV/", + "tags": ["#WHPsquares"], "typename": "GraphImage", "username": "instagram", "width": int, @@ -339,6 +377,7 @@ class InstagramImageExtractor(InstagramExtractor): "sidecar_shortcode": "BoHk1haB5tM", "post_id": "1875629777499953996", "post_shortcode": "BoHk1haB5tM", + "post_url": "https://www.instagram.com/p/BoHk1haB5tM/", "num": int, "likes": int, "username": "instagram", @@ -354,7 +393,9 @@ class InstagramImageExtractor(InstagramExtractor): "height": int, "likes": int, "media_id": "1923502432034620000", + "post_url": "https://www.instagram.com/p/Bqxp0VSBgJg/", "shortcode": "Bqxp0VSBgJg", + "tags": ["#ASMR"], "typename": "GraphVideo", "username": "instagram", "width": int, @@ -370,6 +411,7 @@ class InstagramImageExtractor(InstagramExtractor): "height": int, "likes": int, "media_id": "1806097553666903266", + "post_url": "https://www.instagram.com/p/BkQjCfsBIzi/", "shortcode": "BkQjCfsBIzi", "typename": "GraphVideo", "username": "instagram", @@ -381,11 +423,23 @@ class InstagramImageExtractor(InstagramExtractor): ("https://www.instagram.com/p/BtOvDOfhvRr/", { "count": 2, "keyword": { + "post_url": "https://www.instagram.com/p/BtOvDOfhvRr/", "sidecar_media_id": "1967717017113261163", "sidecar_shortcode": "BtOvDOfhvRr", "video_url": str, } - }) + }), + + # GraphImage with tagged user + ("https://www.instagram.com/p/B_2lf3qAd3y/", { + "keyword": { + "tagged_users": [{ + "id": "1246468638", + "username": "kaaymbl", + "full_name": "Call Me Kay", + }] + } + }), ) def __init__(self, match): @@ -476,7 +530,7 @@ class InstagramUserExtractor(InstagramExtractor): 'node_id': 'id', 'variables_id': 'id', 'edge_to_medias': 'edge_owner_to_timeline_media', - 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', + 'query_hash': '44efc15d3c13342d02df0b5a9fa3d33f', }) if self.config('highlights'): @@ -545,5 +599,5 @@ class InstagramTagExtractor(InstagramExtractor): 'node_id': 'name', 'variables_id': 'tag_name', 'edge_to_medias': 'edge_hashtag_to_media', - 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744', + 'query_hash': '7dabc71d3e758b1ec19ffb85639e427b', }) diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 38c90df..72465f7 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -51,7 +51,7 @@ class MangadexChapterExtractor(MangadexExtractor): test = ( ("https://mangadex.org/chapter/122094", { "keyword": "ef1084c2845825979e150512fed8fdc209baf05a", - "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", + "content": "50383a4c15124682057b197d40261641a98db514", }), # oneshot ("https://mangadex.cc/chapter/138086", { diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 4f0e38d..002c8f7 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -27,11 +27,9 @@ class MastodonExtractor(Extractor): Extractor.__init__(self, match) self.api = MastodonAPI(self) - def config(self, key, default=None, *, sentinel=object()): + def config(self, key, default=None, *, sentinel=util.SENTINEL): value = Extractor.config(self, key, sentinel) - if value is not sentinel: - return value - return config.interpolate( + return value if value is not sentinel else config.interpolate( ("extractor", "mastodon", self.instance, self.subcategory), key, default, ) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 17fe935..84794ad 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -224,10 +224,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor): self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format( self.user, match.group(3)) else: - url = match.group(0) - if not url.startswith("http"): - url = "https://" + url - self.post_url = url + self.post_url = text.ensure_http_scheme(match.group(0)) def posts(self): return (self.post_url,) @@ -414,6 +411,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): @staticmethod def _extract_favorites(page): return [ - "https://" + user.rpartition('"')[2].lstrip("/:") + text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '"><img') ] diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index c06721c..c07c4b7 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -10,9 +10,8 @@ from .common import Extractor, Message from . import deviantart, flickr, reddit, smugmug, tumblr -from .. import text, oauth, config, exception +from .. import text, oauth, util, config, exception from ..cache import cache -import os import urllib.parse REDIRECT_URI_LOCALHOST = "http://localhost:6414/" @@ -27,6 +26,7 @@ class OAuthBase(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.client = None + self.cache = config.get(("extractor", self.category), "cache", True) def oauth_config(self, key, default=None): return config.interpolate( @@ -42,7 +42,7 @@ class OAuthBase(Extractor): server.listen(1) # workaround for ctrl+c not working during server.accept on Windows - if os.name == "nt": + if util.WINDOWS: server.settimeout(1.0) while True: try: @@ -87,12 +87,20 @@ class OAuthBase(Extractor): # exchange the request token for an access token data = self.session.get(access_token_url, params=data).text - data = text.parse_query(data) - self.send(OAUTH1_MSG_TEMPLATE.format( - category=self.subcategory, - token=data["oauth_token"], - token_secret=data["oauth_token_secret"], + token = data["oauth_token"] + token_secret = data["oauth_token_secret"] + + # write to cache + if self.cache: + key = (self.subcategory, self.session.auth.consumer_key) + oauth._token_cache.update(key, (token, token_secret)) + self.log.info("Writing tokens to cache") + + # display tokens + self.send(self._generate_message( + ("access-token", "access-token-secret"), + (token, token_secret), )) def _oauth2_authorization_code_grant( @@ -149,24 +157,66 @@ class OAuthBase(Extractor): self.send(data["error"]) return - # display token - part = key.partition("_")[0] - template = message_template or OAUTH2_MSG_TEMPLATE - self.send(template.format( - category=self.subcategory, - key=part, - Key=part.capitalize(), - token=data[key], - instance=getattr(self, "instance", ""), - client_id=client_id, - client_secret=client_secret, - )) - # write to cache - if cache and config.get(("extractor", self.category), "cache"): + if self.cache and cache: cache.update("#" + str(client_id), data[key]) self.log.info("Writing 'refresh-token' to cache") + # display token + if message_template: + msg = message_template.format( + category=self.subcategory, + key=key.partition("_")[0], + token=data[key], + instance=getattr(self, "instance", ""), + client_id=client_id, + client_secret=client_secret, + ) + else: + msg = self._generate_message( + ("refresh-token",), + (data[key],), + ) + self.send(msg) + + def _generate_message(self, names, values): + if len(names) == 1: + _vh = "This value has" + _is = "is" + _it = "it" + _va = "this value" + else: + _vh = "These values have" + _is = "are" + _it = "them" + _va = "these values" + + msg = "\nYour {} {}\n\n{}\n\n".format( + " and ".join("'" + n + "'" for n in names), + _is, + "\n".join(values), + ) + + if self.cache: + opt = self.oauth_config(names[0]) + if opt is None or opt == "cache": + msg += _vh + " been cached and will automatically be used." + else: + msg += ( + "Set 'extractor.{}.{}' to \"cache\" to use {}.".format( + self.subcategory, names[0], _it, + ) + ) + else: + msg += "Put " + _va + " into your configuration file as \n" + msg += " and\n".join( + "'extractor." + self.subcategory + "." + n + "'" + for n in names + ) + msg += "." + + return msg + class OAuthDeviantart(OAuthBase): subcategory = "deviantart" @@ -224,6 +274,7 @@ class OAuthReddit(OAuthBase): "https://www.reddit.com/api/v1/authorize", "https://www.reddit.com/api/v1/access_token", scope="read history", + cache=reddit._refresh_token_cache, ) @@ -318,49 +369,8 @@ class OAuthMastodon(OAuthBase): return data -OAUTH1_MSG_TEMPLATE = """ -Your Access Token and Access Token Secret are - -{token} -{token_secret} - -Put these values into your configuration file as -'extractor.{category}.access-token' and -'extractor.{category}.access-token-secret'. - -Example: -{{ - "extractor": {{ - "{category}": {{ - "access-token": "{token}", - "access-token-secret": "{token_secret}" - }} - }} -}} -""" - - -OAUTH2_MSG_TEMPLATE = """ -Your {Key} Token is - -{token} - -Put this value into your configuration file as -'extractor.{category}.{key}-token'. - -Example: -{{ - "extractor": {{ - "{category}": {{ - "{key}-token": "{token}" - }} - }} -}} -""" - - MASTODON_MSG_TEMPLATE = """ -Your {Key} Token is +Your 'access-token' is {token} diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 570bd72..a14ec9c 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -98,8 +98,7 @@ class PatreonExtractor(Extractor): headers = {"Referer": self.root} while url: - if not url.startswith("http"): - url = "https://" + url.lstrip("/:") + url = text.ensure_http_scheme(url) posts = self.request(url, headers=headers).json() if "included" in posts: diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py index 1a793a0..ead5c35 100644 --- a/gallery_dl/extractor/recursive.py +++ b/gallery_dl/extractor/recursive.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -28,6 +28,7 @@ class RecursiveExtractor(Extractor): self.session.mount("file://", FileAdapter()) page = self.request(self.url.partition(":")[2]).text + del self.session.adapters["file://"] yield Message.Version, 1 with extractor.blacklist(blist): diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index d0232cc..2e3864a 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -222,20 +222,25 @@ class RedditAPI(): self.extractor = extractor self.comments = text.parse_int(extractor.config("comments", 0)) self.morecomments = extractor.config("morecomments", False) - self.refresh_token = extractor.config("refresh-token") self.log = extractor.log client_id = extractor.config("client-id", self.CLIENT_ID) user_agent = extractor.config("user-agent", self.USER_AGENT) if (client_id == self.CLIENT_ID) ^ (user_agent == self.USER_AGENT): - self.client_id = None - self.log.warning( + raise exception.StopExtraction( "Conflicting values for 'client-id' and 'user-agent': " "overwrite either both or none of them.") + + self.client_id = client_id + self.headers = {"User-Agent": user_agent} + + token = extractor.config("refresh-token") + if token is None or token == "cache": + key = "#" + self.client_id + self.refresh_token = _refresh_token_cache(key) else: - self.client_id = client_id - extractor.session.headers["User-Agent"] = user_agent + self.refresh_token = token def submission(self, submission_id): """Fetch the (submission, comments)=-tuple for a submission id""" @@ -277,13 +282,15 @@ class RedditAPI(): def authenticate(self): """Authenticate the application by requesting an access token""" - access_token = self._authenticate_impl(self.refresh_token) - self.extractor.session.headers["Authorization"] = access_token + self.headers["Authorization"] = \ + self._authenticate_impl(self.refresh_token) @cache(maxage=3600, keyarg=1) def _authenticate_impl(self, refresh_token=None): """Actual authenticate implementation""" url = "https://www.reddit.com/api/v1/access_token" + self.headers["Authorization"] = None + if refresh_token: self.log.info("Refreshing private access token") data = {"grant_type": "refresh_token", @@ -294,9 +301,9 @@ class RedditAPI(): "grants/installed_client"), "device_id": "DO_NOT_TRACK_THIS_DEVICE"} - auth = (self.client_id, "") response = self.extractor.request( - url, method="POST", data=data, auth=auth, fatal=False) + url, method="POST", headers=self.headers, + data=data, auth=(self.client_id, ""), fatal=False) data = response.json() if response.status_code != 200: @@ -307,9 +314,10 @@ class RedditAPI(): def _call(self, endpoint, params): url = "https://oauth.reddit.com" + endpoint - params["raw_json"] = 1 + params["raw_json"] = "1" self.authenticate() - response = self.extractor.request(url, params=params, fatal=None) + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) remaining = response.headers.get("x-ratelimit-remaining") if remaining and float(remaining) < 2: @@ -380,3 +388,10 @@ class RedditAPI(): @staticmethod def _decode(sid): return util.bdecode(sid, "0123456789abcdefghijklmnopqrstuvwxyz") + + +@cache(maxage=100*365*24*3600, keyarg=0) +def _refresh_token_cache(token): + if token and token[0] == "#": + return None + return token diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py new file mode 100644 index 0000000..7855eab --- /dev/null +++ b/gallery_dl/extractor/redgifs.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://redgifs.com/""" + +from .gfycat import GfycatImageExtractor +from ..cache import cache + + +class RedgifsImageExtractor(GfycatImageExtractor): + """Extractor for individual images from redgifs.com""" + category = "redgifs" + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/watch/([A-Za-z]+)" + test = ("https://redgifs.com/watch/foolishforkedabyssiniancat", { + "pattern": r"https://\w+.redgifs.com/FoolishForkedAbyssiniancat.mp4", + "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533", + }) + + def _get_info(self, gfycat_id): + api = RedgifsAPI(self) + return api.gfycat(gfycat_id) + + +class RedgifsAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.headers = {} + + def gfycat(self, gfycat_id): + endpoint = "v1/gfycats/" + gfycat_id + return self._call(endpoint)["gfyItem"] + + @cache(maxage=3600) + def _authenticate_impl(self): + url = "https://weblogin.redgifs.com/oauth/webtoken" + headers = { + "Referer": "https://www.redgifs.com/", + "Origin" : "https://www.redgifs.com", + } + data = { + "access_key": "dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe" + "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9", + } + + response = self.extractor.request( + url, method="POST", headers=headers, json=data) + return "Bearer " + response.json()["access_token"] + + def _call(self, endpoint): + self.headers["Authorization"] = self._authenticate_impl() + url = "https://napi.redgifs.com/" + endpoint + return self.extractor.request(url, headers=self.headers).json() diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index b21ad32..2cef430 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -121,9 +121,9 @@ class SexcomPinExtractor(SexcomExtractor): }, }), # gif - ("https://www.sex.com/pin/11465040-big-titted-hentai-gif/", { - "pattern": "https://cdn.sex.com/images/.+/2014/01/26/4829951.gif", - "content": "af6726d74d11d819e1c885fe5303f711862eae96", + ("https://www.sex.com/pin/55435122-ecchi/", { + "pattern": "https://cdn.sex.com/images/.+/2017/12/07/18760842.gif", + "content": "176cc63fa05182cb0438c648230c0f324a5965fe", }), # video ("https://www.sex.com/pin/55748341/", { diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 7e99823..3e3a5a0 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -108,11 +108,11 @@ class TumblrExtractor(Extractor): del photo["alt_sizes"] yield self._prepare_image(photo["url"], post) - url = post.get("audio_url") # type: "audio" + url = post.get("audio_url") # type "audio" if url and url.startswith("https://a.tumblr.com/"): yield self._prepare(url, post) - url = post.get("video_url") # type: "video" + url = post.get("video_url") # type "video" if url: yield self._prepare(_original_video(url), post) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index c409f54..4c7b757 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -360,12 +360,13 @@ class TwitterTweetExtractor(TwitterExtractor): "pattern": r"ytdl:https://video.twimg.com/ext_tw_video/.*.m3u8", }), # content with emoji, newlines, hashtags (#338) - ("https://twitter.com/yumi_san0112/status/1151144618936823808", { + ("https://twitter.com/playpokemon/status/1263832915173048321", { "options": (("content", True),), "keyword": {"content": ( - "re:晴、お誕生日おめでとう🎉!\n実は下の名前が同じなので結構親近感ある" - "アイドルです✨\n今年の晴ちゃんめちゃくちゃ可愛い路線攻めてるから、そろ" - "そろまたかっこいい晴が見たいですねw\n#結城晴生誕祭2019\n#結城晴生誕祭" + r"re:Gear up for #PokemonSwordShieldEX with special Mystery " + "Gifts! \n\nYou’ll be able to receive four Galarian form " + "Pokémon with Hidden Abilities, plus some very useful items. " + "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ " )}, }), # Reply to another tweet (#403) diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py new file mode 100644 index 0000000..86ada49 --- /dev/null +++ b/gallery_dl/extractor/webtoons.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Leonardo Taccari +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.webtoons.com/""" + +from .common import Extractor, Message +from .. import exception, text, util + + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/((en|fr)" + + +class WebtoonsExtractor(Extractor): + category = "webtoons" + root = "https://www.webtoons.com" + cookiedomain = "www.webtoons.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.session.cookies.set("ageGatePass", "true", + domain=self.cookiedomain) + self.path, self.lang, self.genre , self.comic, self.query = \ + match.groups() + + +class WebtoonsEpisodeExtractor(WebtoonsExtractor): + """Extractor for an episode on webtoons.com""" + subcategory = "episode" + directory_fmt = ("{category}", "{comic}") + filename_fmt = "{episode}-{num:>02}.{extension}" + archive_fmt = "{title_no}_{episode}_{num}" + pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+)/(?:[^/?&#]+))" + r"/viewer(?:\?([^#]+))") + test = ( + (("https://www.webtoons.com/en/comedy/safely-endangered" + "/ep-572-earth/viewer?title_no=352&episode_no=572"), { + "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", + "content": "4f7701a750368e377d65900e6e8f64a5f9cb9c86", + "count": 5, + }), + ) + + def __init__(self, match): + WebtoonsExtractor.__init__(self, match) + query = text.parse_query(self.query) + self.title_no = query.get("title_no") + if not self.title_no: + raise exception.NotFoundError("title_no") + self.episode = query.get("episode_no") + if not self.episode: + raise exception.NotFoundError("episode_no") + + def items(self): + url = "{}/{}/viewer?{}".format(self.root, self.path, self.query) + self.session.headers["Referer"] = url + + page = self.request(url).text + data = self.get_job_metadata(page) + imgs = self.get_image_urls(page) + data["count"] = len(imgs) + + yield Message.Version, 1 + yield Message.Directory, data + for data["num"], url in enumerate(imgs, 1): + yield Message.Url, url, text.nameext_from_url(url, data) + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + title, pos = text.extract( + page, '<meta property="og:title" content="', '"') + descr, pos = text.extract( + page, '<meta property="og:description" content="', '"', pos) + + return { + "genre": self.genre, + "comic": self.comic, + "title_no": self.title_no, + "episode": self.episode, + "title": text.unescape(title), + "description": text.unescape(descr), + "lang": self.lang, + "language": util.code_to_language(self.lang), + } + + @staticmethod + def get_image_urls(page): + """Extract and return a list of all image urls""" + return list(text.extract_iter(page, 'class="_images" data-url="', '"')) + + +class WebtoonsComicExtractor(WebtoonsExtractor): + """Extractor for an entire comic on webtoons.com""" + subcategory = "comic" + pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+))" + r"/list(?:\?([^#]+))") + test = ( + # english + (("https://www.webtoons.com/en/comedy/live-with-yourself/" + "list?title_no=919"), { + "pattern": WebtoonsEpisodeExtractor.pattern, + "range": "1-15", + "count": ">= 15", + }), + # french + (("https://www.webtoons.com/fr/romance/subzero/" + "list?title_no=1845&page=3"), { + "count": ">= 15", + }), + ) + + def __init__(self, match): + WebtoonsExtractor.__init__(self, match) + query = text.parse_query(self.query) + self.title_no = query.get("title_no") + if not self.title_no: + raise exception.NotFoundError("title_no") + self.page_no = int(query.get("page", 1)) + + def items(self): + page = None + data = {"_extractor": WebtoonsEpisodeExtractor} + + while True: + path = "/{}/list?title_no={}&page={}".format( + self.path, self.title_no, self.page_no) + + if page and path not in page: + return + + page = self.request(self.root + path).text + data["page"] = self.page_no + + for url in self.get_episode_urls(page): + yield Message.Queue, url, data + + self.page_no += 1 + + @staticmethod + def get_episode_urls(page): + """Extract and return all episode urls in 'page'""" + pos = page.find('id="_listUl"') + return text.extract_iter( + page, '<a href="', '" class="NPI=a:list', pos) diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index b614cab..0ada118 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -69,8 +69,8 @@ class WikiartArtistExtractor(WikiartExtractor): directory_fmt = ("{category}", "{artist[artistName]}") pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)" test = ("https://www.wikiart.org/en/thomas-cole", { - "url": "9049e52e897b9ae6586df4c2c4f827d0a19dafa3", - "keyword": "c3168b21a993707c41efb7674e8c90d53a79d483", + "url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98", + "keyword": "6d92913c55675e05553f000cfee5daff0b4107cf", }) def __init__(self, match): |
