diff options
| author | 2025-07-31 01:22:01 -0400 | |
|---|---|---|
| committer | 2025-07-31 01:22:01 -0400 | |
| commit | a6e995c093de8aae2e91a0787281bb34c0b871eb (patch) | |
| tree | 2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/newgrounds.py | |
| parent | 7672a750cb74bf31e21d76aad2776367fd476155 (diff) | |
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/newgrounds.py')
| -rw-r--r-- | gallery_dl/extractor/newgrounds.py | 71 |
1 files changed, 30 insertions, 41 deletions
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 648f7df..de96aa0 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2023 Mike Fährmann +# Copyright 2018-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,11 +8,10 @@ """Extractors for https://www.newgrounds.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache import itertools -import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com" USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com" @@ -31,11 +30,11 @@ class NewgroundsExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.user = match.group(1) - self.user_root = "https://{}.newgrounds.com".format(self.user) + self.user = match[1] + self.user_root = f"https://{self.user}.newgrounds.com" def _init(self): - self._extract_comment_urls = re.compile( + self._extract_comment_urls = util.re( r'(?:<img |data-smartload-)src="([^"]+)').findall self.flash = self.config("flash", True) @@ -72,8 +71,7 @@ class NewgroundsExtractor(Extractor): if "_multi" in post: for data in post["_multi"]: post["num"] += 1 - post["_index"] = "{}_{:>02}".format( - post["index"], post["num"]) + post["_index"] = f"{post['index']}_{post['num']:>02}" post.update(data) url = data["image"] @@ -85,8 +83,7 @@ class NewgroundsExtractor(Extractor): for url in self._extract_comment_urls(post["_comment"]): post["num"] += 1 - post["_index"] = "{}_{:>02}".format( - post["index"], post["num"]) + post["_index"] = f"{post['index']}_{post['num']:>02}" url = text.ensure_http_scheme(url) text.nameext_from_url(url, post) yield Message.Url, url, post @@ -153,7 +150,7 @@ class NewgroundsExtractor(Extractor): data["codehint"] = " " elif result.get("requiresEmailMfa"): email = result.get("obfuscatedEmail") - prompt = "Email Verification Code ({}): ".format(email) + prompt = f"Email Verification Code ({email}): " data["code"] = self.input(prompt) data["codehint"] = " " @@ -198,7 +195,10 @@ class NewgroundsExtractor(Extractor): data["favorites"] = text.parse_int(extr( 'id="faves_load">', '<').replace(",", "")) data["score"] = text.parse_float(extr('id="score_number">', '<')) - data["tags"] = text.split_html(extr('<dd class="tags">', '</dd>')) + data["tags"] = [ + t for t in text.split_html(extr('<dd class="tags">', '</dd>')) + if "(function(" not in t + ] data["artist"] = [ text.extr(user, '//', '.') for user in text.extract_iter(page, '<div class="item-user">', '>') @@ -228,12 +228,10 @@ class NewgroundsExtractor(Extractor): data["index"] = text.parse_int(index) data["_index"] = index - image_data = extr("let imageData =", "\n];") - if image_data: + if image_data := extr("let imageData =", "\n];"): data["_multi"] = self._extract_images_multi(image_data) else: - art_images = extr('<div class="art-images', '\n\t\t</div>') - if art_images: + if art_images := extr('<div class="art-images', '\n\t\t</div>'): data["_multi"] = self._extract_images_art(art_images, data) return data @@ -258,8 +256,7 @@ class NewgroundsExtractor(Extractor): else: yield {"image": url} - @staticmethod - def _extract_audio_data(extr, url): + def _extract_audio_data(self, extr, url): index = url.split("/")[5] return { "title" : text.unescape(extr('"og:title" content="', '"')), @@ -297,7 +294,7 @@ class NewgroundsExtractor(Extractor): "Accept": "application/json, text/javascript, */*; q=0.01", "X-Requested-With": "XMLHttpRequest", } - sources = self.request(url, headers=headers).json()["sources"] + sources = self.request_json(url, headers=headers)["sources"] formats = self._video_formats(sources) src = next(formats, "") date = text.parse_timestamp(src.rpartition("?")[2]) @@ -319,7 +316,7 @@ class NewgroundsExtractor(Extractor): def _video_formats(self, sources): src = sources["360p"][0]["src"] - sub = re.compile(r"\.360p\.\w+").sub + sub = util.re(r"\.360p\.\w+").sub for fmt in self.format: try: @@ -345,7 +342,7 @@ class NewgroundsExtractor(Extractor): yield fmt[1][0]["src"] def _pagination(self, kind, pnum=1): - url = "{}/{}".format(self.user_root, kind) + url = f"{self.user_root}/{kind}" params = { "page": text.parse_int(pnum, 1), "isAjaxRequest": "1", @@ -367,7 +364,7 @@ class NewgroundsExtractor(Extractor): return if "errors" in data: msg = ", ".join(text.unescape(e) for e in data["errors"]) - raise exception.StopExtraction(msg) + raise exception.AbortExtraction(msg) items = data.get("items") if not items: @@ -396,12 +393,11 @@ class NewgroundsImageExtractor(NewgroundsExtractor): def __init__(self, match): NewgroundsExtractor.__init__(self, match) - if match.group(2): - self.user = match.group(2) - self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format( - self.user, match.group(3)) + if match[2]: + self.user = match[2] + self.post_url = f"{self.root}/art/view/{self.user}/{match[3]}" else: - self.post_url = text.ensure_http_scheme(match.group(0)) + self.post_url = text.ensure_http_scheme(match[0]) def posts(self): return (self.post_url,) @@ -416,7 +412,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): def __init__(self, match): NewgroundsExtractor.__init__(self, match) self.user = "" - self.post_url = self.root + match.group(1) + self.post_url = self.root + match[1] def posts(self): return (self.post_url,) @@ -450,15 +446,11 @@ class NewgroundsGamesExtractor(NewgroundsExtractor): example = "https://USER.newgrounds.com/games" -class NewgroundsUserExtractor(NewgroundsExtractor): +class NewgroundsUserExtractor(Dispatch, NewgroundsExtractor): """Extractor for a newgrounds user profile""" - subcategory = "user" pattern = USER_PATTERN + r"/?$" example = "https://USER.newgrounds.com" - def initialize(self): - pass - def items(self): base = self.user_root + "/" return self._dispatch_extractors(( @@ -486,7 +478,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): ) def _pagination_favorites(self, kind, pnum=1): - url = "{}/favorites/{}".format(self.user_root, kind) + url = f"{self.user_root}/favorites/{kind}" params = { "page": text.parse_int(pnum, 1), "isAjaxRequest": "1", @@ -512,8 +504,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): def _extract_favorites(self, page): return [ self.root + path - for path in text.extract_iter( - page, 'href="https://www.newgrounds.com', '"') + for path in text.extract_iter(page, f'href="{self.root}', '"') ] @@ -531,8 +522,7 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): for url in self._pagination_favorites(kind, pnum): yield Message.Queue, url, data - @staticmethod - def _extract_favorites(page): + def _extract_favorites(self, page): return [ text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '"><img') @@ -552,8 +542,7 @@ class NewgroundsSearchExtractor(NewgroundsExtractor): self.query = text.parse_query(query) def posts(self): - suitabilities = self.query.get("suitabilities") - if suitabilities: + if suitabilities := self.query.get("suitabilities"): data = {"view_suitability_" + s: "on" for s in suitabilities.split(",")} self.request(self.root + "/suitabilities", @@ -574,7 +563,7 @@ class NewgroundsSearchExtractor(NewgroundsExtractor): } while True: - data = self.request(url, params=params, headers=headers).json() + data = self.request_json(url, params=params, headers=headers) post_url = None for post_url in text.extract_iter(data["content"], 'href="', '"'): |
