diff options
Diffstat (limited to 'gallery_dl/cloudflare.py')
| -rw-r--r-- | gallery_dl/cloudflare.py | 27 |
1 files changed, 17 insertions, 10 deletions
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 43ccdeb..0cf5a57 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -8,11 +8,11 @@ """Methods to access sites behind Cloudflare protection""" -import re import time import operator import collections import urllib.parse +from xml.etree import ElementTree from . import text from .cache import memcache @@ -41,12 +41,16 @@ def solve_challenge(session, response, kwargs): url = root + text.unescape(text.extract(page, 'action="', '"')[0]) headers["Referer"] = response.url - for inpt in text.extract_iter(page, "<input ", ">"): - name = text.extract(inpt, 'name="', '"')[0] + form = text.extract(page, 'id="challenge-form"', '</form>')[0] + for element in ElementTree.fromstring( + "<f>" + form + "</f>").findall("input"): + name = element.attrib.get("name") + if not name: + continue if name == "jschl_answer": value = solve_js_challenge(page, parsed.netloc) else: - value = text.unescape(text.extract(inpt, 'value="', '"')[0]) + value = element.attrib.get("value") params[name] = value time.sleep(4) @@ -84,6 +88,8 @@ def solve_js_challenge(page, netloc): variable = "{}.{}".format(data["var"], data["key"]) vlength = len(variable) + k = text.extract(page, "k = '", "'")[0] + # evaluate the initial expression solution = evaluate_expression(data["expr"], page, netloc) @@ -97,7 +103,7 @@ def solve_js_challenge(page, netloc): # select arithmetc function based on operator (+/-/*) func = OPERATORS[expr[vlength]] # evaluate the rest of the expression - value = evaluate_expression(expr[vlength+2:], page, netloc) + value = evaluate_expression(expr[vlength+2:], page, netloc, k) # combine expression value with our current solution solution = func(solution, value) @@ -110,17 +116,18 @@ def solve_js_challenge(page, netloc): solution = "{:.10f}".format(solution) return solution + elif expr.startswith("k+="): + k += str(evaluate_expression(expr[3:], page, netloc)) + -def evaluate_expression(expr, page, netloc, *, - split_re=re.compile(r"[(+]+([^)]*)\)")): +def evaluate_expression(expr, page, netloc, k=""): """Evaluate a single Javascript expression for the challenge""" if expr.startswith("function(p)"): # get HTML element with ID k and evaluate the expression inside # 'eval(eval("document.getElementById(k).innerHTML"))' - k, pos = text.extract(page, "k = '", "'") - e, pos = text.extract(page, 'id="'+k+'"', '<') - return evaluate_expression(e.partition(">")[2], page, netloc) + expr = text.extract(page, 'id="'+k+'"', '<')[0] + return evaluate_expression(expr.partition(">")[2], page, netloc) if "/" in expr: # split the expression in numerator and denominator subexpressions, |
