diff options
Diffstat (limited to 'gallery_dl/extractor/common.py')
| -rw-r--r-- | gallery_dl/extractor/common.py | 81 |
1 files changed, 54 insertions, 27 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 0d258eb..a1a4890 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -69,7 +69,7 @@ class Extractor(): def config(self, key, default=None): return config.interpolate( - ("extractor", self.category, self.subcategory, key), default) + ("extractor", self.category, self.subcategory), key, default) def request(self, url, *, method="GET", session=None, retries=None, encoding=None, fatal=True, notfound=None, **kwargs): @@ -101,18 +101,14 @@ class Extractor(): raise exception.NotFoundError(notfound) if cloudflare.is_challenge(response): self.log.info("Solving Cloudflare challenge") - url, domain, cookies = cloudflare.solve_challenge( + response, domain, cookies = cloudflare.solve_challenge( session, response, kwargs) + if response.status_code >= 400: + continue cloudflare.cookies.update(self.category, (domain, cookies)) - continue + return response if cloudflare.is_captcha(response): - try: - import OpenSSL # noqa - except ImportError: - msg = " - Install 'pyOpenSSL' and try again" - else: - msg = "" - self.log.warning("Cloudflare CAPTCHA" + msg) + self.log.warning("Cloudflare CAPTCHA") msg = "'{} {}' for '{}'".format(code, response.reason, url) if code < 500 and code != 429 and code != 430: @@ -200,7 +196,7 @@ class Extractor(): def _store_cookies(self): """Store the session's cookiejar in a cookies.txt file""" - if self._cookiefile and self.config("cookies-update", False): + if self._cookiefile and self.config("cookies-update", True): cookiejar = http.cookiejar.MozillaCookieJar() for cookie in self._cookiejar: cookiejar.set_cookie(cookie) @@ -233,12 +229,14 @@ class Extractor(): """Check if all 'cookienames' are in the session's cookiejar""" if domain is None: domain = self.cookiedomain - try: - for name in cookienames: - self._cookiejar._find(name, domain) - except KeyError: - return False - return True + + names = set(cookienames) + for cookie in self._cookiejar: + if cookie.domain == domain: + names.discard(cookie.name) + if not names: + return True + return False def _get_date_min_max(self, dmin=None, dmax=None): """Retrieve and parse 'date-min' and 'date-max' config values""" @@ -254,6 +252,26 @@ class Extractor(): fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S") return get("date-min", dmin), get("date-max", dmax) + def _dispatch_extractors(self, extractor_data, default=()): + """ """ + extractors = { + data[0].subcategory: data + for data in extractor_data + } + + include = self.config("include", default) or () + if include == "all": + include = extractors + elif isinstance(include, str): + include = include.split(",") + + result = [(Message.Version, 1)] + for category in include: + if category in extractors: + extr, url = extractors[category] + result.append((Message.Queue, url, {"_extractor": extr})) + return iter(result) + @classmethod def _get_tests(cls): """Yield an extractor's test cases as (URL, RESULTS) tuples""" @@ -284,7 +302,7 @@ class GalleryExtractor(Extractor): def items(self): self.login() - page = self.request(self.gallery_url).text + page = self.request(self.gallery_url, notfound=self.subcategory).text data = self.metadata(page) imgs = self.images(page) @@ -402,16 +420,13 @@ class SharedConfigMixin(): def config(self, key, default=None, *, sentinel=object()): value = Extractor.config(self, key, sentinel) - if value is sentinel: - cat, self.category = self.category, self.basecategory - value = Extractor.config(self, key, default) - self.category = cat - return value + return value if value is not sentinel else config.interpolate( + ("extractor", self.basecategory, self.subcategory), key, default) def generate_extractors(extractor_data, symtable, classes): """Dynamically generate Extractor classes""" - extractors = config.get(("extractor", classes[0].basecategory)) + extractors = config.get(("extractor",), classes[0].basecategory) ckey = extractor_data.get("_ckey") prev = None @@ -456,10 +471,21 @@ def generate_extractors(extractor_data, symtable, classes): http.cookiejar.MozillaCookieJar.magic_re = re.compile( "#( Netscape)? HTTP Cookie File", re.IGNORECASE) -# Replace default cipher list of urllib3 to avoid Cloudflare CAPTCHAs -ciphers = config.get(("ciphers",), True) + +# Undo automatic pyOpenSSL injection by requests +pyopenssl = config.get((), "pyopenssl", False) +if not pyopenssl: + try: + from requests.packages.urllib3.contrib import pyopenssl # noqa + pyopenssl.extract_from_urllib3() + except ImportError: + pass +del pyopenssl + + +# Replace urllib3's default cipher list to avoid Cloudflare CAPTCHAs +ciphers = config.get((), "ciphers", True) if ciphers: - logging.getLogger("gallery-dl").debug("Updating urllib3 ciphers") if ciphers is True: ciphers = ( @@ -489,3 +515,4 @@ if ciphers: from requests.packages.urllib3.util import ssl_ # noqa ssl_.DEFAULT_CIPHERS = ciphers del ssl_ +del ciphers |
