From 2a63a9c9b7032a76894c48ac4d9cea732fcaee49 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 20 Jul 2019 05:51:44 -0400 Subject: New upstream version 1.9.0 --- gallery_dl/extractor/35photo.py | 17 +++-- gallery_dl/extractor/500px.py | 38 ++++------- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/artstation.py | 4 +- gallery_dl/extractor/common.py | 117 +++++++++++++++++++++------------ gallery_dl/extractor/deviantart.py | 17 ++--- gallery_dl/extractor/erolord.py | 64 ++++++++++++++++++ gallery_dl/extractor/exhentai.py | 4 +- gallery_dl/extractor/imgur.py | 15 ++--- gallery_dl/extractor/instagram.py | 46 ++++++++++++- gallery_dl/extractor/newgrounds.py | 20 +++--- gallery_dl/extractor/nijie.py | 19 ++---- gallery_dl/extractor/pinterest.py | 4 +- gallery_dl/extractor/pixiv.py | 11 ++-- gallery_dl/extractor/reddit.py | 19 +----- gallery_dl/extractor/sankakucomplex.py | 4 +- gallery_dl/extractor/seiga.py | 4 +- gallery_dl/extractor/sexcom.py | 42 ++++++++++-- gallery_dl/extractor/shopify.py | 28 ++++---- gallery_dl/extractor/simplyhentai.py | 56 ++++++++-------- gallery_dl/extractor/smugmug.py | 22 +++---- gallery_dl/extractor/tsumino.py | 4 +- gallery_dl/extractor/tumblr.py | 18 ++++- gallery_dl/extractor/twitter.py | 27 ++++++-- gallery_dl/extractor/xvideos.py | 12 +--- 25 files changed, 380 insertions(+), 233 deletions(-) create mode 100644 gallery_dl/extractor/erolord.py (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py index 50dbfe8..d3e9276 100644 --- a/gallery_dl/extractor/35photo.py +++ b/gallery_dl/extractor/35photo.py @@ -72,7 +72,6 @@ class _35photoExtractor(Extractor): "user" : data["user_login"], "user_id" : data["user_id"], "user_name" : data["user_name"], - "other" : data["otherData"], } if "series" in data: @@ -89,6 +88,8 @@ class _35photoExtractor(Extractor): def _photo_ids(page): """Extract unique photo IDs and return them as sorted list""" # searching for photo-id="..." doesn't always work (see unit tests) + if not page: + return () return sorted( set(text.extract_iter(page, "/photo_", "/")), key=text.parse_int, @@ -100,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor): """Extractor for all images of a user on 35photo.pro""" subcategory = "user" pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro" - r"/(?!photo_|genre_)([^/?&#]+)") + r"/(?!photo_|genre_|rating/)([^/?&#]+)") test = ( ("https://35photo.pro/liya", { "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg", @@ -146,7 +147,14 @@ class _35photoGenreExtractor(_35photoExtractor): ("https://35photo.pro/genre_109/", { "range": "1-30", }), - ("https://35photo.pro/genre_109/new/"), + ("https://35photo.pro/genre_103/", { + "range": "1-30", + "count": 30, + }), + ("https://35photo.pro/genre_103/new/", { + "range": "1-30", + "count": 30, + }), ) def __init__(self, match): @@ -165,6 +173,8 @@ class _35photoGenreExtractor(_35photoExtractor): } def photos(self): + if not self.photo_ids: + return () return self._pagination({ "page": "genre", "community_id": self.genre_id, @@ -193,7 +203,6 @@ class _35photoImageExtractor(_35photoExtractor): "user" : "liya", "user_id" : 20415, "user_name" : "Liya Mirzaeva", - "other" : str, }, }) diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 00b8ab5..07c2e14 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -31,8 +31,7 @@ class _500pxExtractor(Extractor): for photo in self.photos(): url = photo["images"][-1]["url"] - fmt = photo["image_format"] - photo["extension"] = "jpg" if fmt == "jpeg" else fmt + photo["extension"] = photo["image_format"] if data: photo.update(data) if first: @@ -59,7 +58,7 @@ class _500pxExtractor(Extractor): "include_releases" : "true", "liked_by" : "1", "following_sample" : "100", - "image_size" : "32768", + "image_size" : "4096", "ids" : ",".join(str(p["id"]) for p in photos), } @@ -90,7 +89,7 @@ class _500pxUserExtractor(_500pxExtractor): pattern = (r"(?:https?://)?500px\.com" r"/(?!photo/)([^/?&#]+)/?(?:$|\?|#)") test = ("https://500px.com/light_expression_photography", { - "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D5000/v2", + "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2", "range": "1-99", "count": 99, }) @@ -124,7 +123,7 @@ class _500pxGalleryExtractor(_500pxExtractor): pattern = (r"(?:https?://)?500px\.com" r"/(?!photo/)([^/?&#]+)/galleries/([^/?&#]+)") test = ("https://500px.com/fashvamp/galleries/lera", { - "url": "8a520272ece83278166b4f8556f9c9da43c43c45", + "url": "002dc81dee5b4a655f0e31ad8349e8903b296df6", "count": 3, "keyword": { "gallery": dict, @@ -144,7 +143,7 @@ class _500pxGalleryExtractor(_500pxExtractor): page = self.request(url).text self.csrf_token, pos = text.extract(page, 'csrf-token" content="', '"') self.user_id , pos = text.extract(page, 'App.CuratorId =', '\n', pos) - self.user_id = self.user_id.strip() + self.user_id = self.user_id.strip(" '\";") # get gallery metadata; transform gallery name into id url = "https://api.500px.com/v1/users/{}/galleries/{}".format( @@ -174,37 +173,30 @@ class _500pxImageExtractor(_500pxExtractor): subcategory = "image" pattern = r"(?:https?://)?500px\.com/photo/(\d+)" test = ("https://500px.com/photo/222049255/queen-of-coasts", { - "url": "d1eda7afeaa589f71f05b9bb5c0694e3ffb357cd", + "url": "fbdf7df39325cae02f5688e9f92935b0e7113315", "count": 1, "keyword": { "camera": "Canon EOS 600D", "camera_info": dict, - "collections_count": int, "comments": list, "comments_count": int, - "converted": False, - "converted_bits": int, - "created_at": "2017-08-01T04:40:05-04:00", - "crop_version": 0, + "created_at": "2017-08-01T08:40:05+00:00", "description": str, - "editored_by": dict, + "editored_by": None, "editors_choice": False, "extension": "jpg", - "favorites_count": int, "feature": "popular", "feature_date": "2017-08-01T09:58:28+00:00", "focal_length": "208", "height": 3111, "id": 222049255, - "image_format": "jpeg", - "image_url": str, + "image_format": "jpg", + "image_url": list, "images": list, "iso": "100", "lens": "EF-S55-250mm f/4-5.6 IS II", "lens_info": dict, - "license_type": 0, - "licensed_at": None, - "liked": False, + "liked": None, "location": None, "location_details": dict, "name": "Queen Of Coasts", @@ -212,15 +204,11 @@ class _500pxImageExtractor(_500pxExtractor): "privacy": False, "profile": True, "rating": float, - "sales_count": int, "status": 1, - "store_download": False, - "store_height": 3111, - "store_width": 4637, "tags": list, - "taken_at": "2017-05-04T13:36:51-04:00", + "taken_at": "2017-05-04T17:36:51+00:00", "times_viewed": int, - "url": "/photo/222049255/queen-of-coasts-by-olesya-nabieva", + "url": "/photo/222049255/Queen-Of-Coasts-by-Olesya-Nabieva", "user": dict, "user_id": 12847235, "votes_count": int, diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 81d480e..189c163 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "deviantart", "dynastyscans", "e621", + "erolord", "exhentai", "fallenangels", "flickr", diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 24197ad..f7b3bc1 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -79,9 +79,7 @@ class ArtstationExtractor(Extractor): def get_user_info(self, username): """Return metadata for a specific user""" url = "{}/users/{}/quick.json".format(self.root, username.lower()) - response = self.request(url, expect=(404,)) - if response.status_code == 404: - raise exception.NotFoundError("user") + response = self.request(url, notfound="user") return response.json() def _pagination(self, url, params=None): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 175af63..5c40e2a 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -13,6 +13,7 @@ import time import netrc import queue import logging +import datetime import requests import threading import http.cookiejar @@ -39,10 +40,13 @@ class Extractor(): self._init_headers() self._init_cookies() self._init_proxies() - self._retries = self.config("retries", 5) + self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) + if self._retries < 0: + self._retries = float("inf") + @classmethod def from_url(cls, url): if isinstance(cls.pattern, str): @@ -63,11 +67,11 @@ class Extractor(): return config.interpolate( ("extractor", self.category, self.subcategory, key), default) - def request(self, url, method="GET", *, session=None, - encoding=None, expect=(), retries=None, **kwargs): - tries = 0 - retries = retries or self._retries - session = session or self.session + def request(self, url, method="GET", *, session=None, retries=None, + encoding=None, fatal=True, notfound=None, **kwargs): + tries = 1 + retries = self._retries if retries is None else retries + session = self.session if session is None else session kwargs.setdefault("timeout", self._timeout) kwargs.setdefault("verify", self._verify) @@ -83,26 +87,37 @@ class Extractor(): raise exception.HttpError(exc) else: code = response.status_code - if 200 <= code < 400 or code in expect: + if 200 <= code < 400 or not fatal and \ + (400 <= code < 429 or 431 <= code < 500): if encoding: response.encoding = encoding return response + if notfound and code == 404: + raise exception.NotFoundError(notfound) if cloudflare.is_challenge(response): self.log.info("Solving Cloudflare challenge") url, domain, cookies = cloudflare.solve_challenge( session, response, kwargs) cloudflare.cookies.update(self.category, (domain, cookies)) continue + if cloudflare.is_captcha(response): + try: + import OpenSSL # noqa + except ImportError: + msg = " - Install 'pyOpenSSL' and try again" + else: + msg = "" + self.log.warning("Cloudflare CAPTCHA" + msg) msg = "{}: {} for url: {}".format(code, response.reason, url) - if code < 500 and code != 429: + if code < 500 and code != 429 and code != 430: break - tries += 1 - self.log.debug("%s (%d/%d)", msg, tries, retries) - if tries >= retries: + self.log.debug("%s (%s/%s)", msg, tries, retries+1) + if tries > retries: break - time.sleep(2 ** tries) + time.sleep(min(2 ** (tries-1), 1800)) + tries += 1 raise exception.HttpError(msg) @@ -130,8 +145,8 @@ class Extractor(): headers.clear() headers["User-Agent"] = self.config( - "user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) " - "Gecko/20100101 Firefox/62.0")) + "user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:68.0) " + "Gecko/20100101 Firefox/68.0")) headers["Accept"] = "*/*" headers["Accept-Language"] = "en-US,en;q=0.5" headers["Accept-Encoding"] = "gzip, deflate" @@ -203,6 +218,20 @@ class Extractor(): return False return True + def _get_date_min_max(self, dmin=None, dmax=None): + """Retrieve and parse 'date-min' and 'date-max' config values""" + def get(key, default): + ts = self.config(key, default) + if isinstance(ts, str): + try: + ts = int(datetime.datetime.strptime(ts, fmt).timestamp()) + except ValueError as exc: + self.log.warning("Unable to parse '%s': %s", key, exc) + ts = default + return ts + fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S") + return get("date-min", dmin), get("date-max", dmax) + @classmethod def _get_tests(cls): """Yield an extractor's test cases as (URL, RESULTS) tuples""" @@ -403,30 +432,36 @@ def generate_extractors(extractor_data, symtable, classes): http.cookiejar.MozillaCookieJar.magic_re = re.compile( "#( Netscape)? HTTP Cookie File", re.IGNORECASE) -# Update default cipher list of urllib3 -# to fix issues with Cloudflare and, by extension, Artstation (#227) -from requests.packages.urllib3.util import ssl_ # noqa -logging.getLogger("gallery-dl").debug("updating default urllib3 ciphers") - -# cipher list taken from urllib3 1.25 -# https://github.com/urllib3/urllib3/blob/1.25/src/urllib3/util/ssl_.py -# with additions from -# https://github.com/Anorov/cloudflare-scrape/pull/242 -ssl_.DEFAULT_CIPHERS = ( - "ECDHE+AESGCM:" - "ECDHE+CHACHA20:" - "DHE+AESGCM:" - "DHE+CHACHA20:" - "ECDH+AESGCM:" - "DH+AESGCM:" - "ECDH+AES:" - "DH+AES:" - "RSA+AESGCM:" - "RSA+AES:" - "!ECDHE+SHA:" - "!AES128-SHA:" - "!aNULL:" - "!eNULL:" - "!MD5:" - "!DSS" -) +# Replace default cipher list of urllib3 to avoid Cloudflare CAPTCHAs +ciphers = config.get(("ciphers",), True) +if ciphers: + logging.getLogger("gallery-dl").debug("Updating urllib3 ciphers") + + if ciphers is True: + ciphers = ( + # Firefox's list + "TLS_AES_128_GCM_SHA256:" + "TLS_CHACHA20_POLY1305_SHA256:" + "TLS_AES_256_GCM_SHA384:" + "ECDHE-ECDSA-AES128-GCM-SHA256:" + "ECDHE-RSA-AES128-GCM-SHA256:" + "ECDHE-ECDSA-CHACHA20-POLY1305:" + "ECDHE-RSA-CHACHA20-POLY1305:" + "ECDHE-ECDSA-AES256-GCM-SHA384:" + "ECDHE-RSA-AES256-GCM-SHA384:" + "ECDHE-ECDSA-AES256-SHA:" + "ECDHE-ECDSA-AES128-SHA:" + "ECDHE-RSA-AES128-SHA:" + "ECDHE-RSA-AES256-SHA:" + "DHE-RSA-AES128-SHA:" + "DHE-RSA-AES256-SHA:" + "AES128-SHA:" + "AES256-SHA:" + "DES-CBC3-SHA" + ) + elif isinstance(ciphers, list): + ciphers = ":".join(ciphers) + + from requests.packages.urllib3.util import ssl_ # noqa + ssl_.DEFAULT_CIPHERS = ciphers + del ssl_ diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index ebab040..63e2913 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -205,8 +205,7 @@ class DeviantartExtractor(Extractor): @staticmethod def _find_folder(folders, name): - pattern = re.compile( - r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$") + pattern = re.compile(r"(?i)\W*" + name.replace("-", r"\W+") + r"\W*$") for folder in folders: if pattern.match(folder["name"]): return folder @@ -416,7 +415,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): def deviations(self): url = "{}/{}/{}".format(self.root, self.user, self.path) - response = self._html_request(url, expect=range(400, 500)) + response = self._html_request(url, fatal=False) deviation_id = text.extract(response.text, '//deviation/', '"')[0] if response.status_code >= 400 or not deviation_id: raise exception.NotFoundError("image") @@ -767,7 +766,7 @@ class DeviantartAPI(): def user_profile(self, username): """Get user profile information""" endpoint = "user/profile/" + username - return self._call(endpoint, expect_error=True) + return self._call(endpoint, fatal=False) def authenticate(self, refresh_token): """Authenticate the application by requesting an access token""" @@ -797,7 +796,7 @@ class DeviantartAPI(): _refresh_token_cache.update(refresh_token, data["refresh_token"]) return "Bearer " + data["access_token"] - def _call(self, endpoint, params=None, expect_error=False, public=True): + def _call(self, endpoint, params=None, fatal=True, public=True): """Call an API endpoint""" url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint while True: @@ -806,11 +805,7 @@ class DeviantartAPI(): self.authenticate(None if public else self.refresh_token) response = self.extractor.request( - url, - params=params, - headers=self.headers, - expect=range(400, 500), - ) + url, headers=self.headers, params=params, fatal=False) data = response.json() status = response.status_code @@ -818,7 +813,7 @@ class DeviantartAPI(): if self.delay > self.delay_min: self.delay -= 1 return data - if expect_error: + if not fatal: return None if data.get("error_description") == "User not found.": raise exception.NotFoundError("user or group") diff --git a/gallery_dl/extractor/erolord.py b/gallery_dl/extractor/erolord.py new file mode 100644 index 0000000..8628039 --- /dev/null +++ b/gallery_dl/extractor/erolord.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from http://erolord.com/""" + +from .common import GalleryExtractor +from .. import text, util +import json + + +class ErolordGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from erolord.com""" + category = "erolord" + root = "http://erolord.com" + pattern = r"(?:https?://)?(?:www\.)?erolord.com(/doujin/(\d+)/?)" + test = ("http://erolord.com/doujin/2189055/", { + "url": "7ce6d10a3934102b95c9718a34ccd3d35f55d85f", + "keyword": { + "title" : "Amazon No Hiyaku | Amazon Elixir", + "gallery_id": 2189055, + "count" : 16, + "artist" : ["Morris"], + "group" : list, + "parody" : list, + "characters": list, + "tags" : list, + "lang" : "en", + "language" : "English", + }, + }) + + def __init__(self, match): + GalleryExtractor.__init__(self, match) + self.gallery_id = match.group(2) + + def metadata(self, page): + extr = text.extract_from(page) + split = text.split_html + title, _, language = extr('

', '

').rpartition(" ") + language = language.strip("[]") + + return { + "gallery_id": text.parse_int(self.gallery_id), + "title" : text.unescape(title), + # double quotes for anime, circle, tags + # single quotes for characters, artist + "parody" : split(extr('class="sp1">Anime:' , "\r")), + "characters": split(extr("class='sp1'>Characters:", "\r")), + "artist" : split(extr("class='sp1'>Artist:" , "\r")), + "group" : split(extr('class="sp1">Circle:' , "\r")), + "tags" : split(extr('class="sp1">Tags:' , "\r")), + "lang" : util.language_to_code(language), + "language" : language, + } + + def images(self, page): + url = self.root + text.extract(page, 'id="d1"> len(images): - url = ("https://imgur.com/ajaxalbums/getimages/" + - self.item_id + "/hit.json") + url = "{}/ajaxalbums/getimages/{}/hit.json".format( + self.root, self.item_id) images = self.request(url).json()["data"]["images"] yield Message.Version, 1 diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 871236b..475e24b 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -11,7 +11,8 @@ import hashlib import json from .common import Extractor, Message -from .. import text +from .. import text, exception +from ..cache import cache class InstagramExtractor(Extractor): @@ -21,11 +22,14 @@ class InstagramExtractor(Extractor): filename_fmt = "{sidecar_media_id:?/_/}{media_id}.{extension}" archive_fmt = "{media_id}" root = "https://www.instagram.com" + cookiedomain = ".instagram.com" + cookienames = ("sessionid",) def get_metadata(self): return {} def items(self): + self.login() yield Message.Version, 1 metadata = self.get_metadata() @@ -40,6 +44,46 @@ class InstagramExtractor(Extractor): yield Message.Url, \ 'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data + def login(self): + if self._check_cookies(self.cookienames): + return + username, password = self._get_auth_info() + if username: + self.session.cookies.set("ig_cb", "1", domain="www.instagram.com") + self._update_cookies(self._login_impl(username, password)) + + @cache(maxage=360*24*3600, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + page = self.request(self.root + "/accounts/login/").text + headers = { + "Referer" : self.root + "/accounts/login/", + "X-IG-App-ID" : "936619743392459", + "X-Requested-With": "XMLHttpRequest", + } + + response = self.request(self.root + "/web/__mid/", headers=headers) + headers["X-CSRFToken"] = response.cookies["csrftoken"] + headers["X-Instagram-AJAX"] = text.extract( + page, '"rollout_hash":"', '"')[0] + + url = self.root + "/accounts/login/ajax/" + data = { + "username" : username, + "password" : password, + "queryParams" : "{}", + "optIntoOneTap": "true", + } + response = self.request(url, method="POST", headers=headers, data=data) + + if not response.json().get("authenticated"): + raise exception.AuthenticationError() + return { + key: self.session.cookies.get(key) + for key in ("sessionid", "mid", "csrftoken") + } + def _extract_shared_data(self, page): return json.loads(text.extract(page, 'window._sharedData = ', ';')[0]) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 9e0aaa3..282c389 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -48,22 +48,20 @@ class NewgroundsExtractor(Extractor): extr = text.extract_from(self.request(page_url).text) full = text.extract_from(json.loads(extr('"full_image_text":', '});'))) data = { + "title" : text.unescape(extr('"og:title" content="', '"')), "description": text.unescape(extr(':description" content="', '"')), - "date" : extr('itemprop="datePublished" content="', '"'), + "date" : text.parse_datetime(extr( + 'itemprop="datePublished" content="', '"')), "rating" : extr('class="rated-', '"'), "favorites" : text.parse_int(extr('id="faves_load">', '<')), "score" : text.parse_float(extr('id="score_number">', '<')), + "tags" : text.split_html(extr( + '
', '
')), "url" : full('src="', '"'), - "title" : text.unescape(full('alt="', '"')), "width" : text.parse_int(full('width="', '"')), "height" : text.parse_int(full('height="', '"')), } - - tags = text.split_html(extr('
', '
')) - tags.sort() - data["tags"] = tags - - data["date"] = text.parse_datetime(data["date"]) + data["tags"].sort() data["index"] = text.parse_int( data["url"].rpartition("/")[2].partition("_")[0]) return data @@ -95,7 +93,7 @@ class NewgroundsUserExtractor(NewgroundsExtractor): test = ( ("https://blitzwuff.newgrounds.com/art", { "url": "24b19c4a135a09889fac7b46a74e427e4308d02b", - "keyword": "2aab0532a894ff3cf88dd01ce5c60f114011b268", + "keyword": "98566e0c8096a8099b8d71962fea7e31c8b098d4", }), ("https://blitzwuff.newgrounds.com/"), ) @@ -140,9 +138,9 @@ class NewgroundsVideoExtractor(NewgroundsExtractor): subcategory = "video" filename_fmt = "{category}_{index}.{extension}" pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$" - test = ("https://twistedgrim.newgrounds.com/movies", { + test = ("https://tomfulp.newgrounds.com/movies", { "pattern": r"ytdl:https?://www\.newgrounds\.com/portal/view/\d+", - "count": ">= 29", + "count": ">= 32", }) def get_page_urls(self): diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index abf1eaa..4c48d73 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -106,13 +106,8 @@ class NijieExtractor(AsynchronousMixin, Extractor): params = {"id": self.user_id, "p": 1} while True: - response = self.request(url, params=params, expect=(404,)) - if response.status_code == 404: - raise exception.NotFoundError("artist") - - page = response.text - ids = list(text.extract_iter(page, ' illust_id="', '"')) - yield from ids + page = self.request(url, params=params, notfound="artist").text + yield from text.extract_iter(page, 'illust_id="', '"') if '