diff options
| author | 2025-07-31 01:22:01 -0400 | |
|---|---|---|
| committer | 2025-07-31 01:22:01 -0400 | |
| commit | a6e995c093de8aae2e91a0787281bb34c0b871eb (patch) | |
| tree | 2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/common.py | |
| parent | 7672a750cb74bf31e21d76aad2776367fd476155 (diff) | |
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/common.py')
| -rw-r--r-- | gallery_dl/extractor/common.py | 562 |
1 files changed, 366 insertions, 196 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index c430ec1..d46152b 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2023 Mike Fährmann +# Copyright 2014-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,9 +17,10 @@ import queue import random import getpass import logging -import datetime import requests import threading +from datetime import datetime +from xml.etree import ElementTree from requests.adapters import HTTPAdapter from .message import Message from .. import config, output, text, util, cache, exception @@ -35,6 +36,7 @@ class Extractor(): directory_fmt = ("{category}",) filename_fmt = "{filename}.{extension}" archive_fmt = "" + status = 0 root = "" cookies_domain = "" cookies_index = 0 @@ -53,6 +55,15 @@ class Extractor(): self.url = match.string self.match = match self.groups = match.groups() + self.kwdict = {} + + if self.category in CATEGORY_MAP: + catsub = f"{self.category}:{self.subcategory}" + if catsub in CATEGORY_MAP: + self.category, self.subcategory = CATEGORY_MAP[catsub] + else: + self.category = CATEGORY_MAP[self.category] + self._cfgpath = ("extractor", self.category, self.subcategory) self._parentdir = "" @@ -125,11 +136,10 @@ class Extractor(): if first: first = False values = config.accumulate(extr + path, key) - else: - conf = config.get(extr, path[0]) - if conf: - values[:0] = config.accumulate( - (self.subcategory,), key, conf=conf) + elif conf := config.get(extr, path[0]): + values[:0] = config.accumulate( + (self.subcategory,), key, conf=conf) + return values def request(self, url, method="GET", session=None, @@ -149,17 +159,15 @@ class Extractor(): kwargs["verify"] = self._verify if "json" in kwargs: - json = kwargs["json"] - if json is not None: + if (json := kwargs["json"]) is not None: kwargs["data"] = util.json_dumps(json).encode() del kwargs["json"] - headers = kwargs.get("headers") - if headers: + if headers := kwargs.get("headers"): headers["Content-Type"] = "application/json" else: kwargs["headers"] = {"Content-Type": "application/json"} - response = None + response = challenge = None tries = 1 if self._interval: @@ -172,21 +180,22 @@ class Extractor(): try: response = session.request(method, url, **kwargs) except requests.exceptions.ConnectionError as exc: - code = 0 try: reason = exc.args[0].reason cls = reason.__class__.__name__ pre, _, err = str(reason.args[-1]).partition(":") - msg = " {}: {}".format(cls, (err or pre).lstrip()) + msg = f" {cls}: {(err or pre).lstrip()}" except Exception: msg = exc + code = 0 except (requests.exceptions.Timeout, requests.exceptions.ChunkedEncodingError, requests.exceptions.ContentDecodingError) as exc: msg = exc code = 0 except (requests.exceptions.RequestException) as exc: - raise exception.HttpError(exc) + msg = exc + break else: code = response.status_code if self._write_pages: @@ -201,10 +210,10 @@ class Extractor(): response.encoding = encoding return response if notfound and code == 404: + self.status |= exception.NotFoundError.code raise exception.NotFoundError(notfound) - msg = "'{} {}' for '{}'".format( - code, response.reason, response.url) + msg = f"'{code} {response.reason}' for '{response.url}'" challenge = util.detect_challenge(response) if challenge is not None: @@ -238,13 +247,59 @@ class Extractor(): self.sleep(seconds, "retry") tries += 1 - raise exception.HttpError(msg, response) + if not fatal or fatal is ...: + self.log.warning(msg) + return util.NullResponse(url, msg) + + if challenge is None: + exc = exception.HttpError(msg, response) + else: + exc = exception.ChallengeError(challenge, response) + self.status |= exc.code + raise exc def request_location(self, url, **kwargs): kwargs.setdefault("method", "HEAD") kwargs.setdefault("allow_redirects", False) return self.request(url, **kwargs).headers.get("location", "") + def request_json(self, url, **kwargs): + response = self.request(url, **kwargs) + + try: + return util.json_loads(response.text) + except Exception as exc: + fatal = kwargs.get("fatal", True) + if not fatal or fatal is ...: + if challenge := util.detect_challenge(response): + self.log.warning(challenge) + else: + self.log.warning("%s: %s", exc.__class__.__name__, exc) + return {} + raise + + def request_xml(self, url, xmlns=True, **kwargs): + response = self.request(url, **kwargs) + + if xmlns: + text = response.text + else: + text = response.text.replace(" xmlns=", " ns=") + + parser = ElementTree.XMLParser() + try: + parser.feed(text) + return parser.close() + except Exception as exc: + fatal = kwargs.get("fatal", True) + if not fatal or fatal is ...: + if challenge := util.detect_challenge(response): + self.log.warning(challenge) + else: + self.log.warning("%s: %s", exc.__class__.__name__, exc) + return ElementTree.Element("") + raise + _handle_429 = util.false def wait(self, seconds=None, until=None, adjust=1.0, @@ -255,7 +310,7 @@ class Extractor(): seconds = float(seconds) until = now + seconds elif until: - if isinstance(until, datetime.datetime): + if isinstance(until, datetime): # convert to UTC timestamp until = util.datetime_to_timestamp(until) else: @@ -269,8 +324,8 @@ class Extractor(): return if reason: - t = datetime.datetime.fromtimestamp(until).time() - isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second) + t = datetime.fromtimestamp(until).time() + isotime = f"{t.hour:02}:{t.minute:02}:{t.second:02}" self.log.info("Waiting until %s (%s)", isotime, reason) time.sleep(seconds) @@ -295,8 +350,8 @@ class Extractor(): if input is None: input = output.TTY_STDIN if not input: - raise exception.StopExtraction( - "User input required (%s)", prompt.strip(" :")) + raise exception.AbortExtraction( + f"User input required ({prompt.strip(' :')})") def _get_auth_info(self): """Return authentication information as (username, password) tuple""" @@ -366,36 +421,31 @@ class Extractor(): elif platform == "linux": platform = "X11; Linux x86_64" elif platform == "macos": - platform = "Macintosh; Intel Mac OS X 11.5" + platform = "Macintosh; Intel Mac OS X 15.5" if browser == "chrome": if platform.startswith("Macintosh"): - platform = platform.replace(".", "_") + "_2" + platform = platform.replace(".", "_") else: browser = "firefox" - for key, value in HTTP_HEADERS[browser]: + for key, value in HEADERS[browser]: if value and "{}" in value: - headers[key] = value.format(platform) + headers[key] = value.replace("{}", platform) else: headers[key] = value ssl_options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1) - ssl_ciphers = SSL_CIPHERS[browser] + ssl_ciphers = CIPHERS[browser] else: - useragent = self.config("user-agent") - if useragent is None or useragent == "auto": - useragent = self.useragent - elif useragent == "browser": - useragent = _browser_useragent() - elif self.useragent is not Extractor.useragent and \ - useragent is config.get(("extractor",), "user-agent"): - useragent = self.useragent - headers["User-Agent"] = useragent + headers["User-Agent"] = self.useragent headers["Accept"] = "*/*" headers["Accept-Language"] = "en-US,en;q=0.5" + ssl_ciphers = self.ciphers + if ssl_ciphers is not None and ssl_ciphers in CIPHERS: + ssl_ciphers = CIPHERS[ssl_ciphers] if BROTLI: headers["Accept-Encoding"] = "gzip, deflate, br" @@ -404,26 +454,40 @@ class Extractor(): if ZSTD: headers["Accept-Encoding"] += ", zstd" - referer = self.config("referer", self.referer) - if referer: + if referer := self.config("referer", self.referer): if isinstance(referer, str): headers["Referer"] = referer elif self.root: headers["Referer"] = self.root + "/" - custom_headers = self.config("headers") - if custom_headers: + custom_ua = self.config("user-agent") + if custom_ua is None or custom_ua == "auto": + pass + elif custom_ua == "browser": + headers["User-Agent"] = _browser_useragent() + elif self.useragent is Extractor.useragent and not self.browser or \ + custom_ua is not config.get(("extractor",), "user-agent"): + headers["User-Agent"] = custom_ua + + if custom_headers := self.config("headers"): + if isinstance(custom_headers, str): + if custom_headers in HEADERS: + custom_headers = HEADERS[custom_headers] + else: + self.log.error("Invalid 'headers' value '%s'", + custom_headers) + custom_headers = () headers.update(custom_headers) - custom_ciphers = self.config("ciphers") - if custom_ciphers: + if custom_ciphers := self.config("ciphers"): if isinstance(custom_ciphers, list): ssl_ciphers = ":".join(custom_ciphers) + elif custom_ciphers in CIPHERS: + ssl_ciphers = CIPHERS[custom_ciphers] else: ssl_ciphers = custom_ciphers - source_address = self.config("source-address") - if source_address: + if source_address := self.config("source-address"): if isinstance(source_address, str): source_address = (source_address, 0) else: @@ -436,8 +500,17 @@ class Extractor(): ssl_options |= ssl.OP_NO_TLSv1_2 self.log.debug("TLS 1.2 disabled.") + if self.config("truststore"): + try: + from truststore import SSLContext as ssl_ctx + except ImportError as exc: + self.log.error("%s: %s", exc.__class__.__name__, exc) + ssl_ctx = None + else: + ssl_ctx = None + adapter = _build_requests_adapter( - ssl_options, ssl_ciphers, source_address) + ssl_options, ssl_ciphers, ssl_ctx, source_address) session.mount("https://", adapter) session.mount("http://", adapter) @@ -448,10 +521,8 @@ class Extractor(): if self.cookies_domain is None: return - cookies = self.config("cookies") - if cookies: - select = self.config("cookies-select") - if select: + if cookies := self.config("cookies"): + if select := self.config("cookies-select"): if select == "rotate": cookies = cookies[self.cookies_index % len(cookies)] Extractor.cookies_index += 1 @@ -469,9 +540,11 @@ class Extractor(): with open(path) as fp: cookies = util.cookiestxt_load(fp) except Exception as exc: - self.log.warning("cookies: %s", exc) + self.log.warning("cookies: Failed to load '%s' (%s: %s)", + cookies_source, exc.__class__.__name__, exc) else: - self.log.debug("Loading cookies from '%s'", cookies_source) + self.log.debug("cookies: Loading cookies from '%s'", + cookies_source) set_cookie = self.cookies.set_cookie for cookie in cookies: set_cookie(cookie) @@ -479,7 +552,7 @@ class Extractor(): elif isinstance(cookies_source, (list, tuple)): key = tuple(cookies_source) - cookies = _browser_cookies.get(key) + cookies = CACHE_COOKIES.get(key) if cookies is None: from ..cookies import load_cookies @@ -489,18 +562,18 @@ class Extractor(): self.log.warning("cookies: %s", exc) cookies = () else: - _browser_cookies[key] = cookies + CACHE_COOKIES[key] = cookies else: - self.log.debug("Using cached cookies from %s", key) + self.log.debug("cookies: Using cached cookies from %s", key) set_cookie = self.cookies.set_cookie for cookie in cookies: set_cookie(cookie) else: - self.log.warning( - "Expected 'dict', 'list', or 'str' value for 'cookies' " - "option, got '%s' (%s)", + self.log.error( + "cookies: Expected 'dict', 'list', or 'str' value for " + "'cookies' option, got '%s' instead (%r)", cookies_source.__class__.__name__, cookies_source) def cookies_store(self): @@ -522,7 +595,8 @@ class Extractor(): util.cookiestxt_store(fp, self.cookies) os.replace(path_tmp, path) except OSError as exc: - self.log.warning("cookies: %s", exc) + self.log.error("cookies: Failed to write to '%s' " + "(%s: %s)", path, exc.__class__.__name__, exc) def cookies_update(self, cookies, domain=""): """Update the session's cookiejar with 'cookies'""" @@ -568,14 +642,17 @@ class Extractor(): if diff <= 0: self.log.warning( - "Cookie '%s' has expired", cookie.name) + "cookies: %s/%s expired at %s", + cookie.domain.lstrip("."), cookie.name, + datetime.fromtimestamp(cookie.expires)) continue elif diff <= 86400: hours = diff // 3600 self.log.warning( - "Cookie '%s' will expire in less than %s hour%s", - cookie.name, hours + 1, "s" if hours else "") + "cookies: %s/%s will expire in less than %s hour%s", + cookie.domain.lstrip("."), cookie.name, + hours + 1, "s" if hours else "") names.discard(cookie.name) if not names: @@ -590,11 +667,6 @@ class Extractor(): return util.json_loads(text.extr( page, ' id="__NEXT_DATA__" type="application/json">', "</script>")) - def _prepare_ddosguard_cookies(self): - if not self.cookies.get("__ddg2", domain=self.cookies_domain): - self.cookies.set( - "__ddg2", util.generate_token(), domain=self.cookies_domain) - def _cache(self, func, maxage, keyarg=None): # return cache.DatabaseCacheDecorator(func, maxage, keyarg) return cache.DatabaseCacheDecorator(func, keyarg, maxage) @@ -608,7 +680,7 @@ class Extractor(): ts = self.config(key, default) if isinstance(ts, str): try: - ts = int(datetime.datetime.strptime(ts, fmt).timestamp()) + ts = int(datetime.strptime(ts, fmt).timestamp()) except ValueError as exc: self.log.warning("Unable to parse '%s': %s", key, exc) ts = default @@ -616,35 +688,12 @@ class Extractor(): fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S") return get("date-min", dmin), get("date-max", dmax) - def _dispatch_extractors(self, extractor_data, default=()): - """ """ - extractors = { - data[0].subcategory: data - for data in extractor_data - } - - include = self.config("include", default) or () - if include == "all": - include = extractors - elif isinstance(include, str): - include = include.replace(" ", "").split(",") - - result = [(Message.Version, 1)] - for category in include: - try: - extr, url = extractors[category] - except KeyError: - self.log.warning("Invalid include '%s'", category) - else: - result.append((Message.Queue, url, {"_extractor": extr})) - return iter(result) - @classmethod def _dump(cls, obj): util.dump_json(obj, ensure_ascii=False, indent=2) def _dump_response(self, response, history=True): - """Write the response content to a .dump file in the current directory. + """Write the response content to a .txt file in the current directory. The file name is derived from the response url, replacing special characters with "_" @@ -657,12 +706,11 @@ class Extractor(): Extractor._dump_index += 1 else: Extractor._dump_index = 1 - Extractor._dump_sanitize = re.compile(r"[\\\\|/<>:\"?*&=#]+").sub + Extractor._dump_sanitize = util.re_compile( + r"[\\\\|/<>:\"?*&=#]+").sub - fname = "{:>02}_{}".format( - Extractor._dump_index, - Extractor._dump_sanitize('_', response.url), - ) + fname = (f"{Extractor._dump_index:>02}_" + f"{Extractor._dump_sanitize('_', response.url)}") if util.WINDOWS: path = os.path.abspath(fname)[:255] @@ -693,19 +741,24 @@ class GalleryExtractor(Extractor): def __init__(self, match, url=None): Extractor.__init__(self, match) - self.gallery_url = self.root + self.groups[0] if url is None else url + + if url is None and (path := self.groups[0]) and path[0] == "/": + self.page_url = f"{self.root}{path}" + else: + self.page_url = url def items(self): self.login() - if self.gallery_url: + if self.page_url: page = self.request( - self.gallery_url, notfound=self.subcategory).text + self.page_url, notfound=self.subcategory).text else: page = None data = self.metadata(page) imgs = self.images(page) + assets = self.assets(page) if "count" in data: if self.config("page-reverse"): @@ -727,7 +780,18 @@ class GalleryExtractor(Extractor): images = enum(imgs, 1) yield Message.Directory, data - for data[self.enum], (url, imgdata) in images: + enum_key = self.enum + + if assets: + for asset in assets: + url = asset["url"] + asset.update(data) + asset[enum_key] = 0 + if "extension" not in asset: + text.nameext_from_url(url, asset) + yield Message.Url, url, asset + + for data[enum_key], (url, imgdata) in images: if imgdata: data.update(imgdata) if "extension" not in imgdata: @@ -743,7 +807,13 @@ class GalleryExtractor(Extractor): """Return a dict with general metadata""" def images(self, page): - """Return a list of all (image-url, metadata)-tuples""" + """Return a list or iterable of all (image-url, metadata)-tuples""" + + def assets(self, page): + """Return an iterable of additional gallery assets + + Each asset must be a 'dict' containing at least 'url' and 'type' + """ class ChapterExtractor(GalleryExtractor): @@ -768,7 +838,11 @@ class MangaExtractor(Extractor): def __init__(self, match, url=None): Extractor.__init__(self, match) - self.manga_url = self.root + self.groups[0] if url is None else url + + if url is None and (path := self.groups[0]) and path[0] == "/": + self.page_url = f"{self.root}{path}" + else: + self.page_url = url if self.config("chapter-reverse", False): self.reverse = not self.reverse @@ -776,8 +850,8 @@ class MangaExtractor(Extractor): def items(self): self.login() - if self.manga_url: - page = self.request(self.manga_url, notfound=self.subcategory).text + if self.page_url: + page = self.request(self.page_url, notfound=self.subcategory).text else: page = None @@ -796,6 +870,45 @@ class MangaExtractor(Extractor): """Return a list of all (chapter-url, metadata)-tuples""" +class Dispatch(): + subcategory = "user" + cookies_domain = None + finalize = Extractor.finalize + skip = Extractor.skip + + def __iter__(self): + return self.items() + + def initialize(self): + pass + + def _dispatch_extractors(self, extractor_data, default=(), alt=None): + extractors = { + data[0].subcategory: data + for data in extractor_data + } + + if alt is not None: + for sub, sub_alt in alt: + extractors[sub_alt] = extractors[sub] + + include = self.config("include", default) or () + if include == "all": + include = extractors + elif isinstance(include, str): + include = include.replace(" ", "").split(",") + + results = [(Message.Version, 1)] + for category in include: + try: + extr, url = extractors[category] + except KeyError: + self.log.warning("Invalid include '%s'", category) + else: + results.append((Message.Queue, url, {"_extractor": extr})) + return iter(results) + + class AsynchronousMixin(): """Run info extraction in a separate thread""" @@ -846,7 +959,7 @@ class BaseExtractor(Extractor): if index: self.category, self.root, info = self.instances[index-1] if not self.root: - self.root = text.root_from_url(self.match.group(0)) + self.root = text.root_from_url(self.match[0]) self.config_instance = info.get else: self.root = group @@ -855,8 +968,7 @@ class BaseExtractor(Extractor): @classmethod def update(cls, instances): - extra_instances = config.get(("extractor",), cls.basecategory) - if extra_instances: + if extra_instances := config.get(("extractor",), cls.basecategory): for category, info in extra_instances.items(): if isinstance(info, dict) and "root" in info: instances[category] = info @@ -864,8 +976,7 @@ class BaseExtractor(Extractor): pattern_list = [] instance_list = cls.instances = [] for category, info in instances.items(): - root = info["root"] - if root: + if root := info["root"]: root = root.rstrip("/") instance_list.append((category, root, info)) @@ -898,24 +1009,35 @@ class RequestsAdapter(HTTPAdapter): return HTTPAdapter.proxy_manager_for(self, *args, **kwargs) -def _build_requests_adapter(ssl_options, ssl_ciphers, source_address): - key = (ssl_options, ssl_ciphers, source_address) +def _build_requests_adapter( + ssl_options, ssl_ciphers, ssl_ctx, source_address): + + key = (ssl_options, ssl_ciphers, ssl_ctx, source_address) try: - return _adapter_cache[key] + return CACHE_ADAPTERS[key] except KeyError: pass - if ssl_options or ssl_ciphers: - ssl_context = urllib3.connection.create_urllib3_context( - options=ssl_options or None, ciphers=ssl_ciphers) - if not requests.__version__ < "2.32": - # https://github.com/psf/requests/pull/6731 - ssl_context.load_verify_locations(requests.certs.where()) + if ssl_options or ssl_ciphers or ssl_ctx: + if ssl_ctx is None: + ssl_context = urllib3.connection.create_urllib3_context( + options=ssl_options or None, ciphers=ssl_ciphers) + if not requests.__version__ < "2.32": + # https://github.com/psf/requests/pull/6731 + ssl_context.load_verify_locations(requests.certs.where()) + else: + ssl_ctx_orig = urllib3.util.ssl_.SSLContext + try: + urllib3.util.ssl_.SSLContext = ssl_ctx + ssl_context = urllib3.connection.create_urllib3_context( + options=ssl_options or None, ciphers=ssl_ciphers) + finally: + urllib3.util.ssl_.SSLContext = ssl_ctx_orig ssl_context.check_hostname = False else: ssl_context = None - adapter = _adapter_cache[key] = RequestsAdapter( + adapter = CACHE_ADAPTERS[key] = RequestsAdapter( ssl_context, source_address) return adapter @@ -932,7 +1054,7 @@ def _browser_useragent(): server.listen(1) host, port = server.getsockname() - webbrowser.open("http://{}:{}/user-agent".format(host, port)) + webbrowser.open(f"http://{host}:{port}/user-agent") client = server.accept()[0] server.close() @@ -951,83 +1073,131 @@ def _browser_useragent(): return useragent.decode() -_adapter_cache = {} -_browser_cookies = {} - - -HTTP_HEADERS = { - "firefox": ( - ("User-Agent", "Mozilla/5.0 ({}; " - "rv:128.0) Gecko/20100101 Firefox/128.0"), - ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," - "image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"), - ("Accept-Language", "en-US,en;q=0.5"), - ("Accept-Encoding", None), - ("Referer", None), - ("Connection", "keep-alive"), - ("Upgrade-Insecure-Requests", "1"), - ("Cookie", None), - ("Sec-Fetch-Dest", "empty"), - ("Sec-Fetch-Mode", "no-cors"), - ("Sec-Fetch-Site", "same-origin"), - ("TE", "trailers"), - ), - "chrome": ( - ("Connection", "keep-alive"), - ("Upgrade-Insecure-Requests", "1"), - ("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, " - "like Gecko) Chrome/111.0.0.0 Safari/537.36"), - ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," - "image/avif,image/webp,image/apng,*/*;q=0.8," - "application/signed-exchange;v=b3;q=0.7"), - ("Referer", None), - ("Sec-Fetch-Site", "same-origin"), - ("Sec-Fetch-Mode", "no-cors"), - ("Sec-Fetch-Dest", "empty"), - ("Accept-Encoding", None), - ("Accept-Language", "en-US,en;q=0.9"), - ("cookie", None), - ("content-length", None), - ), +CACHE_ADAPTERS = {} +CACHE_COOKIES = {} +CATEGORY_MAP = () + + +HEADERS_FIREFOX_140 = ( + ("User-Agent", "Mozilla/5.0 ({}; rv:140.0) Gecko/20100101 Firefox/140.0"), + ("Accept", "text/html,application/xhtml+xml," + "application/xml;q=0.9,*/*;q=0.8"), + ("Accept-Language", "en-US,en;q=0.5"), + ("Accept-Encoding", None), + ("Connection", "keep-alive"), + ("Content-Type", None), + ("Content-Length", None), + ("Referer", None), + ("Origin", None), + ("Cookie", None), + ("Sec-Fetch-Dest", "empty"), + ("Sec-Fetch-Mode", "cors"), + ("Sec-Fetch-Site", "same-origin"), + ("TE", "trailers"), +) +HEADERS_FIREFOX_128 = ( + ("User-Agent", "Mozilla/5.0 ({}; rv:128.0) Gecko/20100101 Firefox/128.0"), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," + "image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"), + ("Accept-Language", "en-US,en;q=0.5"), + ("Accept-Encoding", None), + ("Referer", None), + ("Connection", "keep-alive"), + ("Upgrade-Insecure-Requests", "1"), + ("Cookie", None), + ("Sec-Fetch-Dest", "empty"), + ("Sec-Fetch-Mode", "no-cors"), + ("Sec-Fetch-Site", "same-origin"), + ("TE", "trailers"), +) +HEADERS_CHROMIUM_138 = ( + ("Connection", "keep-alive"), + ("sec-ch-ua", '"Not)A;Brand";v="8", "Chromium";v="138"'), + ("sec-ch-ua-mobile", "?0"), + ("sec-ch-ua-platform", '"Linux"'), + ("Upgrade-Insecure-Requests", "1"), + ("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/138.0.0.0 Safari/537.36"), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," + "image/avif,image/webp,image/apng,*/*;q=0.8," + "application/signed-exchange;v=b3;q=0.7"), + ("Referer", None), + ("Sec-Fetch-Site", "same-origin"), + ("Sec-Fetch-Mode", "no-cors"), + # ("Sec-Fetch-User", "?1"), + ("Sec-Fetch-Dest", "empty"), + ("Accept-Encoding", None), + ("Accept-Language", "en-US,en;q=0.9"), +) +HEADERS_CHROMIUM_111 = ( + ("Connection", "keep-alive"), + ("Upgrade-Insecure-Requests", "1"), + ("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/111.0.0.0 Safari/537.36"), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," + "image/avif,image/webp,image/apng,*/*;q=0.8," + "application/signed-exchange;v=b3;q=0.7"), + ("Referer", None), + ("Sec-Fetch-Site", "same-origin"), + ("Sec-Fetch-Mode", "no-cors"), + ("Sec-Fetch-Dest", "empty"), + ("Accept-Encoding", None), + ("Accept-Language", "en-US,en;q=0.9"), + ("cookie", None), + ("content-length", None), +) +HEADERS = { + "firefox" : HEADERS_FIREFOX_140, + "firefox/140": HEADERS_FIREFOX_140, + "firefox/128": HEADERS_FIREFOX_128, + "chrome" : HEADERS_CHROMIUM_138, + "chrome/138" : HEADERS_CHROMIUM_138, + "chrome/111" : HEADERS_CHROMIUM_111, } -SSL_CIPHERS = { - "firefox": ( - "TLS_AES_128_GCM_SHA256:" - "TLS_CHACHA20_POLY1305_SHA256:" - "TLS_AES_256_GCM_SHA384:" - "ECDHE-ECDSA-AES128-GCM-SHA256:" - "ECDHE-RSA-AES128-GCM-SHA256:" - "ECDHE-ECDSA-CHACHA20-POLY1305:" - "ECDHE-RSA-CHACHA20-POLY1305:" - "ECDHE-ECDSA-AES256-GCM-SHA384:" - "ECDHE-RSA-AES256-GCM-SHA384:" - "ECDHE-ECDSA-AES256-SHA:" - "ECDHE-ECDSA-AES128-SHA:" - "ECDHE-RSA-AES128-SHA:" - "ECDHE-RSA-AES256-SHA:" - "AES128-GCM-SHA256:" - "AES256-GCM-SHA384:" - "AES128-SHA:" - "AES256-SHA" - ), - "chrome": ( - "TLS_AES_128_GCM_SHA256:" - "TLS_AES_256_GCM_SHA384:" - "TLS_CHACHA20_POLY1305_SHA256:" - "ECDHE-ECDSA-AES128-GCM-SHA256:" - "ECDHE-RSA-AES128-GCM-SHA256:" - "ECDHE-ECDSA-AES256-GCM-SHA384:" - "ECDHE-RSA-AES256-GCM-SHA384:" - "ECDHE-ECDSA-CHACHA20-POLY1305:" - "ECDHE-RSA-CHACHA20-POLY1305:" - "ECDHE-RSA-AES128-SHA:" - "ECDHE-RSA-AES256-SHA:" - "AES128-GCM-SHA256:" - "AES256-GCM-SHA384:" - "AES128-SHA:" - "AES256-SHA" - ), +CIPHERS_FIREFOX = ( + "TLS_AES_128_GCM_SHA256:" + "TLS_CHACHA20_POLY1305_SHA256:" + "TLS_AES_256_GCM_SHA384:" + "ECDHE-ECDSA-AES128-GCM-SHA256:" + "ECDHE-RSA-AES128-GCM-SHA256:" + "ECDHE-ECDSA-CHACHA20-POLY1305:" + "ECDHE-RSA-CHACHA20-POLY1305:" + "ECDHE-ECDSA-AES256-GCM-SHA384:" + "ECDHE-RSA-AES256-GCM-SHA384:" + "ECDHE-ECDSA-AES256-SHA:" + "ECDHE-ECDSA-AES128-SHA:" + "ECDHE-RSA-AES128-SHA:" + "ECDHE-RSA-AES256-SHA:" + "AES128-GCM-SHA256:" + "AES256-GCM-SHA384:" + "AES128-SHA:" + "AES256-SHA" +) +CIPHERS_CHROMIUM = ( + "TLS_AES_128_GCM_SHA256:" + "TLS_AES_256_GCM_SHA384:" + "TLS_CHACHA20_POLY1305_SHA256:" + "ECDHE-ECDSA-AES128-GCM-SHA256:" + "ECDHE-RSA-AES128-GCM-SHA256:" + "ECDHE-ECDSA-AES256-GCM-SHA384:" + "ECDHE-RSA-AES256-GCM-SHA384:" + "ECDHE-ECDSA-CHACHA20-POLY1305:" + "ECDHE-RSA-CHACHA20-POLY1305:" + "ECDHE-RSA-AES128-SHA:" + "ECDHE-RSA-AES256-SHA:" + "AES128-GCM-SHA256:" + "AES256-GCM-SHA384:" + "AES128-SHA:" + "AES256-SHA" +) +CIPHERS = { + "firefox" : CIPHERS_FIREFOX, + "firefox/140": CIPHERS_FIREFOX, + "firefox/128": CIPHERS_FIREFOX, + "chrome" : CIPHERS_CHROMIUM, + "chrome/138" : CIPHERS_CHROMIUM, + "chrome/111" : CIPHERS_CHROMIUM, } |
