# -*- coding: utf-8 -*- # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://bato.to/""" from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, util from ..cache import memcache BASE_PATTERN = (r"(?:https?://)?(" r"(?:ba|d|f|h|j|m|w)to\.to|" r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|" r"comiko\.(?:net|org)|" r"bat(?:otoo|o?two)\.com)") # https://rentry.co/batoto DOMAINS = { "dto.to", "fto.to", "hto.to", "jto.to", "mto.to", "wto.to", "xbato.com", "xbato.net", "xbato.org", "zbato.com", "zbato.net", "zbato.org", "readtoto.com", "readtoto.net", "readtoto.org", "batocomic.com", "batocomic.net", "batocomic.org", "batotoo.com", "batotwo.com", "comiko.net", "comiko.org", "battwo.com", } LEGACY_DOMAINS = { "bato.to", "mangatoto.com", "mangatoto.net", "mangatoto.org", } class BatotoBase(): """Base class for batoto extractors""" category = "batoto" root = "https://xbato.org" _warn_legacy = True def _init_root(self): domain = self.config("domain") if domain is None or domain in {"auto", "url"}: domain = self.groups[0] if domain in LEGACY_DOMAINS: if self._warn_legacy: BatotoBase._warn_legacy = False self.log.warning("Legacy domain '%s'", domain) elif domain == "nolegacy": domain = self.groups[0] if domain in LEGACY_DOMAINS: domain = "xbato.org" elif domain == "nowarn": domain = self.groups[0] self.root = "https://" + domain def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" return Extractor.request(self, url, **kwargs) class BatotoChapterExtractor(BatotoBase, ChapterExtractor): """Extractor for batoto manga chapters""" archive_fmt = "{chapter_id}_{page}" pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): ChapterExtractor.__init__(self, match, False) self._init_root() self.chapter_id = self.groups[1] self.page_url = f"{self.root}/title/0/{self.chapter_id}" def metadata(self, page): extr = text.extract_from(page) try: manga, info, _ = extr("