summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/common.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-12-25 19:40:28 -0500
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-12-25 19:40:28 -0500
commitf9a1a9dcb7df977eeac9544786df9c0b93795815 (patch)
tree8cb69cf7685da8d7e4deb7dc1d6b209098e1ddfb /gallery_dl/extractor/common.py
parent0c73e982fa596da07f23b377621ab894a9e64884 (diff)
New upstream version 1.12.1upstream/1.12.1
Diffstat (limited to 'gallery_dl/extractor/common.py')
-rw-r--r--gallery_dl/extractor/common.py81
1 files changed, 54 insertions, 27 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 0d258eb..a1a4890 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -69,7 +69,7 @@ class Extractor():
def config(self, key, default=None):
return config.interpolate(
- ("extractor", self.category, self.subcategory, key), default)
+ ("extractor", self.category, self.subcategory), key, default)
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
@@ -101,18 +101,14 @@ class Extractor():
raise exception.NotFoundError(notfound)
if cloudflare.is_challenge(response):
self.log.info("Solving Cloudflare challenge")
- url, domain, cookies = cloudflare.solve_challenge(
+ response, domain, cookies = cloudflare.solve_challenge(
session, response, kwargs)
+ if response.status_code >= 400:
+ continue
cloudflare.cookies.update(self.category, (domain, cookies))
- continue
+ return response
if cloudflare.is_captcha(response):
- try:
- import OpenSSL # noqa
- except ImportError:
- msg = " - Install 'pyOpenSSL' and try again"
- else:
- msg = ""
- self.log.warning("Cloudflare CAPTCHA" + msg)
+ self.log.warning("Cloudflare CAPTCHA")
msg = "'{} {}' for '{}'".format(code, response.reason, url)
if code < 500 and code != 429 and code != 430:
@@ -200,7 +196,7 @@ class Extractor():
def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file"""
- if self._cookiefile and self.config("cookies-update", False):
+ if self._cookiefile and self.config("cookies-update", True):
cookiejar = http.cookiejar.MozillaCookieJar()
for cookie in self._cookiejar:
cookiejar.set_cookie(cookie)
@@ -233,12 +229,14 @@ class Extractor():
"""Check if all 'cookienames' are in the session's cookiejar"""
if domain is None:
domain = self.cookiedomain
- try:
- for name in cookienames:
- self._cookiejar._find(name, domain)
- except KeyError:
- return False
- return True
+
+ names = set(cookienames)
+ for cookie in self._cookiejar:
+ if cookie.domain == domain:
+ names.discard(cookie.name)
+ if not names:
+ return True
+ return False
def _get_date_min_max(self, dmin=None, dmax=None):
"""Retrieve and parse 'date-min' and 'date-max' config values"""
@@ -254,6 +252,26 @@ class Extractor():
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
return get("date-min", dmin), get("date-max", dmax)
+ def _dispatch_extractors(self, extractor_data, default=()):
+ """ """
+ extractors = {
+ data[0].subcategory: data
+ for data in extractor_data
+ }
+
+ include = self.config("include", default) or ()
+ if include == "all":
+ include = extractors
+ elif isinstance(include, str):
+ include = include.split(",")
+
+ result = [(Message.Version, 1)]
+ for category in include:
+ if category in extractors:
+ extr, url = extractors[category]
+ result.append((Message.Queue, url, {"_extractor": extr}))
+ return iter(result)
+
@classmethod
def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
@@ -284,7 +302,7 @@ class GalleryExtractor(Extractor):
def items(self):
self.login()
- page = self.request(self.gallery_url).text
+ page = self.request(self.gallery_url, notfound=self.subcategory).text
data = self.metadata(page)
imgs = self.images(page)
@@ -402,16 +420,13 @@ class SharedConfigMixin():
def config(self, key, default=None, *, sentinel=object()):
value = Extractor.config(self, key, sentinel)
- if value is sentinel:
- cat, self.category = self.category, self.basecategory
- value = Extractor.config(self, key, default)
- self.category = cat
- return value
+ return value if value is not sentinel else config.interpolate(
+ ("extractor", self.basecategory, self.subcategory), key, default)
def generate_extractors(extractor_data, symtable, classes):
"""Dynamically generate Extractor classes"""
- extractors = config.get(("extractor", classes[0].basecategory))
+ extractors = config.get(("extractor",), classes[0].basecategory)
ckey = extractor_data.get("_ckey")
prev = None
@@ -456,10 +471,21 @@ def generate_extractors(extractor_data, symtable, classes):
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
-# Replace default cipher list of urllib3 to avoid Cloudflare CAPTCHAs
-ciphers = config.get(("ciphers",), True)
+
+# Undo automatic pyOpenSSL injection by requests
+pyopenssl = config.get((), "pyopenssl", False)
+if not pyopenssl:
+ try:
+ from requests.packages.urllib3.contrib import pyopenssl # noqa
+ pyopenssl.extract_from_urllib3()
+ except ImportError:
+ pass
+del pyopenssl
+
+
+# Replace urllib3's default cipher list to avoid Cloudflare CAPTCHAs
+ciphers = config.get((), "ciphers", True)
if ciphers:
- logging.getLogger("gallery-dl").debug("Updating urllib3 ciphers")
if ciphers is True:
ciphers = (
@@ -489,3 +515,4 @@ if ciphers:
from requests.packages.urllib3.util import ssl_ # noqa
ssl_.DEFAULT_CIPHERS = ciphers
del ssl_
+del ciphers