diff options
Diffstat (limited to 'gallery_dl/extractor/wikiart.py')
| -rw-r--r-- | gallery_dl/extractor/wikiart.py | 48 |
1 files changed, 41 insertions, 7 deletions
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index 9238590..428c6b5 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -40,7 +40,7 @@ class WikiartExtractor(Extractor): def paintings(self): """Return an iterable containing all relevant 'painting' objects""" - def _pagination(self, url, extra_params=None, key="Paintings"): + def _pagination(self, url, extra_params=None, key="Paintings", stop=False): headers = { "X-Requested-With": "XMLHttpRequest", "Referer": url, @@ -60,6 +60,8 @@ class WikiartExtractor(Extractor): if not items: return yield from items + if stop: + return params["page"] += 1 @@ -67,7 +69,7 @@ class WikiartArtistExtractor(WikiartExtractor): """Extractor for an artist's paintings on wikiart.org""" subcategory = "artist" directory_fmt = ("{category}", "{artist[artistName]}") - pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)" + pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$" test = ("https://www.wikiart.org/en/thomas-cole", { "url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98", "keyword": "6d92913c55675e05553f000cfee5daff0b4107cf", @@ -75,18 +77,50 @@ class WikiartArtistExtractor(WikiartExtractor): def __init__(self, match): WikiartExtractor.__init__(self, match) - self.artist = match.group(2) + self.artist_name = match.group(2) + self.artist = None def metadata(self): - url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist) - return {"artist": self.request(url).json()} + url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist_name) + self.artist = self.request(url).json() + return {"artist": self.artist} def paintings(self): url = "{}/{}/{}/mode/all-paintings".format( - self.root, self.lang, self.artist) + self.root, self.lang, self.artist_name) return self._pagination(url) +class WikiartImageExtractor(WikiartArtistExtractor): + """Extractor for individual paintings on wikiart.org""" + subcategory = "image" + pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)" + test = ( + ("https://www.wikiart.org/en/thomas-cole/the-departure-1838", { + "url": "4d9fd87680a2620eaeaf1f13e3273475dec93231", + "keyword": "a1b083d500ce2fd364128e35b026e4ca526000cc", + }), + # no year or '-' in slug + ("https://www.wikiart.org/en/huang-shen/summer", { + "url": "d7f60118c34067b2b37d9577e412dc1477b94207", + }), + ) + + def __init__(self, match): + WikiartArtistExtractor.__init__(self, match) + self.title = match.group(3) + + def paintings(self): + title, sep, year = self.title.rpartition("-") + if not sep or not year.isdecimal(): + title = self.title + url = "{}/{}/Search/{} {}".format( + self.root, self.lang, + self.artist.get("artistName") or self.artist_name, title, + ) + return self._pagination(url, stop=True) + + class WikiartArtworksExtractor(WikiartExtractor): """Extractor for artwork collections on wikiart.org""" subcategory = "artworks" |
