diff options
Diffstat (limited to 'gallery_dl/extractor/readcomiconline.py')
| -rw-r--r-- | gallery_dl/extractor/readcomiconline.py | 45 |
1 files changed, 20 insertions, 25 deletions
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index 2f2daca..24a0171 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2023 Mike Fährmann +# Copyright 2016-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception import binascii -import re BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)" @@ -37,9 +36,9 @@ class ReadcomiconlineBase(): "the CAPTCHA, and press ENTER to continue", response.url) self.input() else: - raise exception.StopExtraction( - "Redirect to \n%s\nVisit this URL in your browser and " - "solve the CAPTCHA to continue", response.url) + raise exception.AbortExtraction( + f"Redirect to \n{response.url}\nVisit this URL in your " + f"browser and solve the CAPTCHA to continue") class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): @@ -48,12 +47,8 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)" example = "https://readcomiconline.li/Comic/TITLE/Issue-123?id=12345" - def __init__(self, match): - ChapterExtractor.__init__(self, match) - self.params = match.group(2) - def _init(self): - params = text.parse_query(self.params) + params = text.parse_query(self.groups[1]) quality = self.config("quality") if quality is None or quality == "auto": @@ -61,17 +56,18 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): params["quality"] = "hq" else: params["quality"] = str(quality) + params["readType"] = "0" # force "One page" Reading mode (#7890) - self.gallery_url += "&".join(k + "=" + v for k, v in params.items()) + self.page_url += "&".join(f"{k}={v}" for k, v in params.items()) self.issue_id = params.get("id") def metadata(self, page): comic, pos = text.extract(page, " - Read\r\n ", "\r\n") iinfo, pos = text.extract(page, " ", "\r\n", pos) - match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo) + match = text.re(r"(?:Issue )?#(\d+)|(.+)").match(iinfo) return { "comic": comic, - "issue": match.group(1) or match.group(2), + "issue": match[1] or match[2], "issue_id": text.parse_int(self.issue_id), "lang": "en", "language": "English", @@ -79,22 +75,21 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): def images(self, page): results = [] - referer = {"_http_headers": {"Referer": self.gallery_url}} - root = text.extr(page, "return baeu(l, '", "'") + referer = {"_http_headers": {"Referer": self.page_url}} + root, pos = text.extract(page, "return baeu(l, '", "'") + _ , pos = text.extract(page, "var pth = '", "", pos) + var , pos = text.extract(page, "var ", "= '", pos) - replacements = re.findall( - r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)", page) + replacements = text.re( + r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)").findall(page) - for block in page.split("\t\tpht = '")[1:]: - pth = text.extr(block, "", "'") + for path in page.split(var)[2:]: + path = text.extr(path, "= '", "'") - for needle, repl in re.findall( - r"pth = pth\.replace\(/([^/]+)/g, [\"']([^\"']*)", block): - pth = pth.replace(needle, repl) for needle, repl in replacements: - pth = pth.replace(needle, repl) + path = path.replace(needle, repl) - results.append((baeu(pth, root), referer)) + results.append((baeu(path, root), referer)) return results @@ -112,7 +107,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): page , pos = text.extract(page, ' class="listing">', '</table>', pos) comic = comic.rpartition("information")[0].strip() - needle = ' title="Read {} '.format(comic) + needle = f' title="Read {comic} ' comic = text.unescape(comic) for item in text.extract_iter(page, ' href="', ' comic online '): |
