diff options
Diffstat (limited to 'gallery_dl/extractor/newgrounds.py')
| -rw-r--r-- | gallery_dl/extractor/newgrounds.py | 84 |
1 files changed, 69 insertions, 15 deletions
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 54e60b0..21afeae 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text, exception from ..cache import cache +import itertools import json @@ -35,16 +36,17 @@ class NewgroundsExtractor(Extractor): for post_url in self.posts(): try: - file = self.extract_post(post_url) - url = file["url"] - # except Exception: + post = self.extract_post(post_url) + url = post.get("url") except OSError: url = None - if not url: - self.log.warning("Unable to get download URL for %s", post_url) - continue - yield Message.Directory, file - yield Message.Url, url, text.nameext_from_url(url, file) + + if url: + yield Message.Directory, post + yield Message.Url, url, text.nameext_from_url(url, post) + else: + self.log.warning( + "Unable to get download URL for '%s'", post_url) def posts(self): """Return urls of all relevant image pages""" @@ -82,7 +84,10 @@ class NewgroundsExtractor(Extractor): } def extract_post(self, post_url): - page = self.request(post_url).text + response = self.request(post_url, fatal=False) + if response.status_code >= 400: + return {} + page = response.text extr = text.extract_from(page) if "/art/view/" in post_url: @@ -97,8 +102,7 @@ class NewgroundsExtractor(Extractor): data["favorites"] = text.parse_int(extr( 'id="faves_load">', '<').replace(",", "")) data["score"] = text.parse_float(extr('id="score_number">', '<')) - data["tags"] = text.split_html(extr( - '<dd class="tags">', '</dd>')) + data["tags"] = text.split_html(extr('<dd class="tags">', '</dd>')) data["artist"] = [ text.extract(user, '//', '.')[0] for user in text.extract_iter(page, '<div class="item-user">', '>') @@ -194,7 +198,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor): "keyword": { "artist" : ["tomfulp"], "comment" : "re:Consider this the bottom threshold for ", - "date" : "type:datetime", + "date" : "dt:2009-06-04 14:44:05", "description": "re:Consider this the bottom threshold for ", "favorites" : int, "filename" : "94_tomfulp_ryu-is-hawt", @@ -241,7 +245,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): "keyword": { "artist" : ["psychogoldfish", "tomfulp"], "comment" : "re:People have been asking me how I like the ", - "date" : "type:datetime", + "date" : "dt:2012-02-08 21:40:56", "description": "re:People have been asking how I like the ", "favorites" : int, "filename" : "527818_alternate_1896", @@ -259,7 +263,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): "keyword": { "artist" : ["zj", "tomfulp"], "comment" : "re:RECORDED 12-09-2014\n\nFrom The ZJ \"Late ", - "date" : "type:datetime", + "date" : "dt:2015-02-23 19:31:59", "description": "From The ZJ Report Show!", "favorites" : int, "index" : 609768, @@ -334,3 +338,53 @@ class NewgroundsUserExtractor(NewgroundsExtractor): (NewgroundsAudioExtractor , base + "audio"), (NewgroundsMoviesExtractor, base + "movies"), ), ("art",)) + + +class NewgroundsFavoriteExtractor(NewgroundsExtractor): + """Extractor for posts favorited by a newgrounds user""" + subcategory = "favorite" + directory_fmt = ("{category}", "{user}", "Favorites") + pattern = (r"(?:https?://)?([^.]+)\.newgrounds\.com" + r"/favorites(?:/(art|audio|movies))?/?") + test = ( + ("https://tomfulp.newgrounds.com/favorites/art", { + "range": "1-10", + "count": ">= 10", + }), + ("https://tomfulp.newgrounds.com/favorites/audio"), + ("https://tomfulp.newgrounds.com/favorites/movies"), + ("https://tomfulp.newgrounds.com/favorites/"), + ) + + def __init__(self, match): + NewgroundsExtractor.__init__(self, match) + self.kind = match.group(2) + + def posts(self): + if self.kind: + return self._pagination(self.kind) + return itertools.chain.from_iterable( + self._pagination(k) for k in ("art", "audio", "movies") + ) + + def _pagination(self, kind): + num = 1 + headers = { + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + "Referer": self.user_root, + } + + while True: + url = "{}/favorites/{}/{}".format(self.user_root, kind, num) + response = self.request(url, headers=headers) + if response.history: + return + + favs = list(text.extract_iter( + response.text, 'href="//www.newgrounds.com', '"')) + for path in favs: + yield self.root + path + if len(favs) < 24: + return + num += 1 |
