summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/common.py')
-rw-r--r--gallery_dl/extractor/common.py85
1 files changed, 56 insertions, 29 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d14e13a..8771261 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -14,6 +14,7 @@ import ssl
import time
import netrc
import queue
+import getpass
import logging
import datetime
import requests
@@ -21,6 +22,7 @@ import threading
from requests.adapters import HTTPAdapter
from .message import Message
from .. import config, text, util, cache, exception
+urllib3 = requests.packages.urllib3
class Extractor():
@@ -45,6 +47,8 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
+ self.match = match
+ self.groups = match.groups()
self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = ""
@@ -168,22 +172,25 @@ class Extractor():
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ContentDecodingError) as exc:
msg = exc
+ code = 0
except (requests.exceptions.RequestException) as exc:
raise exception.HttpError(exc)
else:
code = response.status_code
if self._write_pages:
self._dump_response(response)
- if 200 <= code < 400 or fatal is None and \
- (400 <= code < 500) or not fatal and \
- (400 <= code < 429 or 431 <= code < 500):
+ if (
+ code < 400 or
+ code < 500 and (not fatal and code != 429 or fatal is None)
+ ):
if encoding:
response.encoding = encoding
return response
if notfound and code == 404:
raise exception.NotFoundError(notfound)
- msg = "'{} {}' for '{}'".format(code, response.reason, url)
+ msg = "'{} {}' for '{}'".format(
+ code, response.reason, response.url)
server = response.headers.get("Server")
if server and server.startswith("cloudflare") and \
code in (403, 503):
@@ -194,7 +201,10 @@ class Extractor():
if b'name="captcha-bypass"' in content:
self.log.warning("Cloudflare CAPTCHA")
break
- if code not in retry_codes and code < 500:
+
+ if code == 429 and self._interval_429:
+ pass
+ elif code not in retry_codes and code < 500:
break
finally:
@@ -204,20 +214,24 @@ class Extractor():
if tries > retries:
break
+ seconds = tries
if self._interval:
- seconds = self._interval()
- if seconds < tries:
- seconds = tries
+ s = self._interval()
+ if seconds < s:
+ seconds = s
+ if code == 429 and self._interval_429:
+ s = self._interval_429()
+ if seconds < s:
+ seconds = s
+ self.wait(seconds=seconds, reason="429 Too Many Requests")
else:
- seconds = tries
-
- self.sleep(seconds, "retry")
+ self.sleep(seconds, "retry")
tries += 1
raise exception.HttpError(msg, response)
def wait(self, seconds=None, until=None, adjust=1.0,
- reason="rate limit reset"):
+ reason="rate limit"):
now = time.time()
if seconds:
@@ -240,7 +254,7 @@ class Extractor():
if reason:
t = datetime.datetime.fromtimestamp(until).time()
isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)
- self.log.info("Waiting until %s for %s.", isotime, reason)
+ self.log.info("Waiting until %s (%s)", isotime, reason)
time.sleep(seconds)
def sleep(self, seconds, reason):
@@ -248,6 +262,15 @@ class Extractor():
seconds, reason)
time.sleep(seconds)
+ def input(self, prompt, echo=True):
+ if echo:
+ try:
+ return input(prompt)
+ except (EOFError, OSError):
+ return None
+ else:
+ return getpass.getpass(prompt)
+
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
username = self.config("username")
@@ -280,6 +303,9 @@ class Extractor():
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)
+ self._interval_429 = util.build_duration_func(
+ self.config("sleep-429", 60),
+ )
if self._retries < 0:
self._retries = float("inf")
@@ -439,9 +465,11 @@ class Extractor():
if not path:
return
+ path_tmp = path + ".tmp"
try:
- with open(path, "w") as fp:
+ with open(path_tmp, "w") as fp:
util.cookiestxt_store(fp, self.cookies)
+ os.replace(path_tmp, path)
except OSError as exc:
self.log.warning("cookies: %s", exc)
@@ -599,7 +627,7 @@ class GalleryExtractor(Extractor):
def __init__(self, match, url=None):
Extractor.__init__(self, match)
- self.gallery_url = self.root + match.group(1) if url is None else url
+ self.gallery_url = self.root + self.groups[0] if url is None else url
def items(self):
self.login()
@@ -674,7 +702,7 @@ class MangaExtractor(Extractor):
def __init__(self, match, url=None):
Extractor.__init__(self, match)
- self.manga_url = url or self.root + match.group(1)
+ self.manga_url = self.root + self.groups[0] if url is None else url
if self.config("chapter-reverse", False):
self.reverse = not self.reverse
@@ -736,17 +764,18 @@ class BaseExtractor(Extractor):
instances = ()
def __init__(self, match):
- if not self.category:
- self._init_category(match)
Extractor.__init__(self, match)
+ if not self.category:
+ self._init_category()
+ self._cfgpath = ("extractor", self.category, self.subcategory)
- def _init_category(self, match):
- for index, group in enumerate(match.groups()):
+ def _init_category(self):
+ for index, group in enumerate(self.groups):
if group is not None:
if index:
self.category, self.root, info = self.instances[index-1]
if not self.root:
- self.root = text.root_from_url(match.group(0))
+ self.root = text.root_from_url(self.match.group(0))
self.config_instance = info.get
else:
self.root = group
@@ -806,12 +835,12 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address):
pass
if ssl_options or ssl_ciphers:
- ssl_context = ssl.create_default_context()
- if ssl_options:
- ssl_context.options |= ssl_options
- if ssl_ciphers:
- ssl_context.set_ecdh_curve("prime256v1")
- ssl_context.set_ciphers(ssl_ciphers)
+ ssl_context = urllib3.connection.create_urllib3_context(
+ options=ssl_options or None, ciphers=ssl_ciphers)
+ if requests.__version__ > "2.31":
+ # https://github.com/psf/requests/pull/6731
+ ssl_context.load_default_certs()
+ ssl_context.check_hostname = False
else:
ssl_context = None
@@ -931,8 +960,6 @@ SSL_CIPHERS = {
}
-urllib3 = requests.packages.urllib3
-
# detect brotli support
try:
BROTLI = urllib3.response.brotli is not None