diff options
| author | 2022-01-08 20:39:28 -0500 | |
|---|---|---|
| committer | 2022-01-08 20:39:28 -0500 | |
| commit | 8de58070ee3e55f29966a787fd618632dbf4309b (patch) | |
| tree | c140f8a0f97445413a1681424cc93308592070c4 /gallery_dl/extractor/newgrounds.py | |
| parent | 7bc30b43b70556630b4a93c03fefc0d888e3d19f (diff) | |
New upstream version 1.20.1.upstream/1.20.1
Diffstat (limited to 'gallery_dl/extractor/newgrounds.py')
| -rw-r--r-- | gallery_dl/extractor/newgrounds.py | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 4351b3e..8bcbc20 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor): def items(self): self.login() + metadata = self.metadata() for post_url in self.posts(): try: @@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor): url = None if url: + if metadata: + post.update(metadata) yield Message.Directory, post yield Message.Url, url, text.nameext_from_url(url, post) @@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor): "Unable to get download URL for '%s'", post_url) def posts(self): - """Return urls of all relevant image pages""" + """Return URLs of all relevant post pages""" return self._pagination(self._path) + def metadata(self): + """Return general metadata""" + def login(self): username, password = self._get_auth_info() if username: @@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '"><img') ] + + +class NewgroundsSearchExtractor(NewgroundsExtractor): + """Extractor for newgrounds.com search reesults""" + subcategory = "search" + directory_fmt = ("{category}", "search", "{search_tags}") + pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com" + r"/search/conduct/([^/?#]+)/?\?([^#]+)") + test = ( + ("https://www.newgrounds.com/search/conduct/art?terms=tree", { + "pattern": NewgroundsImageExtractor.pattern, + "keyword": {"search_tags": "tree"}, + "range": "1-10", + "count": 10, + }), + ("https://www.newgrounds.com/search/conduct/movies?terms=tree", { + "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+", + "range": "1-10", + "count": 10, + }), + ("https://www.newgrounds.com/search/conduct/audio?advanced=1" + "&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm"), + ) + + def __init__(self, match): + NewgroundsExtractor.__init__(self, match) + self._path, query = match.groups() + self.query = text.parse_query(query) + + def posts(self): + return self._pagination("/search/conduct/" + self._path, self.query) + + def metadata(self): + return {"search_tags": self.query.get("terms", "")} + + def _pagination(self, path, params): + url = self.root + path + headers = { + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + "Referer": self.root, + } + params["inner"] = "1" + params["page"] = 1 + + while True: + data = self.request(url, params=params, headers=headers).json() + + post_url = None + for post_url in text.extract_iter(data["content"], 'href="', '"'): + if not post_url.startswith("/search/"): + yield post_url + + if post_url is None: + return + params["page"] += 1 |
