diff options
Diffstat (limited to 'gallery_dl/extractor/common.py')
| -rw-r--r-- | gallery_dl/extractor/common.py | 85 |
1 files changed, 56 insertions, 29 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index d14e13a..8771261 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -14,6 +14,7 @@ import ssl import time import netrc import queue +import getpass import logging import datetime import requests @@ -21,6 +22,7 @@ import threading from requests.adapters import HTTPAdapter from .message import Message from .. import config, text, util, cache, exception +urllib3 = requests.packages.urllib3 class Extractor(): @@ -45,6 +47,8 @@ class Extractor(): def __init__(self, match): self.log = logging.getLogger(self.category) self.url = match.string + self.match = match + self.groups = match.groups() self._cfgpath = ("extractor", self.category, self.subcategory) self._parentdir = "" @@ -168,22 +172,25 @@ class Extractor(): requests.exceptions.ChunkedEncodingError, requests.exceptions.ContentDecodingError) as exc: msg = exc + code = 0 except (requests.exceptions.RequestException) as exc: raise exception.HttpError(exc) else: code = response.status_code if self._write_pages: self._dump_response(response) - if 200 <= code < 400 or fatal is None and \ - (400 <= code < 500) or not fatal and \ - (400 <= code < 429 or 431 <= code < 500): + if ( + code < 400 or + code < 500 and (not fatal and code != 429 or fatal is None) + ): if encoding: response.encoding = encoding return response if notfound and code == 404: raise exception.NotFoundError(notfound) - msg = "'{} {}' for '{}'".format(code, response.reason, url) + msg = "'{} {}' for '{}'".format( + code, response.reason, response.url) server = response.headers.get("Server") if server and server.startswith("cloudflare") and \ code in (403, 503): @@ -194,7 +201,10 @@ class Extractor(): if b'name="captcha-bypass"' in content: self.log.warning("Cloudflare CAPTCHA") break - if code not in retry_codes and code < 500: + + if code == 429 and self._interval_429: + pass + elif code not in retry_codes and code < 500: break finally: @@ -204,20 +214,24 @@ class Extractor(): if tries > retries: break + seconds = tries if self._interval: - seconds = self._interval() - if seconds < tries: - seconds = tries + s = self._interval() + if seconds < s: + seconds = s + if code == 429 and self._interval_429: + s = self._interval_429() + if seconds < s: + seconds = s + self.wait(seconds=seconds, reason="429 Too Many Requests") else: - seconds = tries - - self.sleep(seconds, "retry") + self.sleep(seconds, "retry") tries += 1 raise exception.HttpError(msg, response) def wait(self, seconds=None, until=None, adjust=1.0, - reason="rate limit reset"): + reason="rate limit"): now = time.time() if seconds: @@ -240,7 +254,7 @@ class Extractor(): if reason: t = datetime.datetime.fromtimestamp(until).time() isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second) - self.log.info("Waiting until %s for %s.", isotime, reason) + self.log.info("Waiting until %s (%s)", isotime, reason) time.sleep(seconds) def sleep(self, seconds, reason): @@ -248,6 +262,15 @@ class Extractor(): seconds, reason) time.sleep(seconds) + def input(self, prompt, echo=True): + if echo: + try: + return input(prompt) + except (EOFError, OSError): + return None + else: + return getpass.getpass(prompt) + def _get_auth_info(self): """Return authentication information as (username, password) tuple""" username = self.config("username") @@ -280,6 +303,9 @@ class Extractor(): self.config("sleep-request", self.request_interval), self.request_interval_min, ) + self._interval_429 = util.build_duration_func( + self.config("sleep-429", 60), + ) if self._retries < 0: self._retries = float("inf") @@ -439,9 +465,11 @@ class Extractor(): if not path: return + path_tmp = path + ".tmp" try: - with open(path, "w") as fp: + with open(path_tmp, "w") as fp: util.cookiestxt_store(fp, self.cookies) + os.replace(path_tmp, path) except OSError as exc: self.log.warning("cookies: %s", exc) @@ -599,7 +627,7 @@ class GalleryExtractor(Extractor): def __init__(self, match, url=None): Extractor.__init__(self, match) - self.gallery_url = self.root + match.group(1) if url is None else url + self.gallery_url = self.root + self.groups[0] if url is None else url def items(self): self.login() @@ -674,7 +702,7 @@ class MangaExtractor(Extractor): def __init__(self, match, url=None): Extractor.__init__(self, match) - self.manga_url = url or self.root + match.group(1) + self.manga_url = self.root + self.groups[0] if url is None else url if self.config("chapter-reverse", False): self.reverse = not self.reverse @@ -736,17 +764,18 @@ class BaseExtractor(Extractor): instances = () def __init__(self, match): - if not self.category: - self._init_category(match) Extractor.__init__(self, match) + if not self.category: + self._init_category() + self._cfgpath = ("extractor", self.category, self.subcategory) - def _init_category(self, match): - for index, group in enumerate(match.groups()): + def _init_category(self): + for index, group in enumerate(self.groups): if group is not None: if index: self.category, self.root, info = self.instances[index-1] if not self.root: - self.root = text.root_from_url(match.group(0)) + self.root = text.root_from_url(self.match.group(0)) self.config_instance = info.get else: self.root = group @@ -806,12 +835,12 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address): pass if ssl_options or ssl_ciphers: - ssl_context = ssl.create_default_context() - if ssl_options: - ssl_context.options |= ssl_options - if ssl_ciphers: - ssl_context.set_ecdh_curve("prime256v1") - ssl_context.set_ciphers(ssl_ciphers) + ssl_context = urllib3.connection.create_urllib3_context( + options=ssl_options or None, ciphers=ssl_ciphers) + if requests.__version__ > "2.31": + # https://github.com/psf/requests/pull/6731 + ssl_context.load_default_certs() + ssl_context.check_hostname = False else: ssl_context = None @@ -931,8 +960,6 @@ SSL_CIPHERS = { } -urllib3 = requests.packages.urllib3 - # detect brotli support try: BROTLI = urllib3.response.brotli is not None |
