diff options
Diffstat (limited to 'gallery_dl/extractor/batoto.py')
| -rw-r--r-- | gallery_dl/extractor/batoto.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index cd6302e..e82cd09 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -10,8 +10,11 @@ from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception import re -BASE_PATTERN = (r"(?:https?://)?" - r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)") +BASE_PATTERN = (r"(?:https?://)?(?:" + r"(?:ba|d|h|m|w)to\.to|" + r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|" + r"comiko\.(?:net|org)|" + r"bat(?:otoo|o?two)\.com)") class BatotoBase(): @@ -38,7 +41,8 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor): def metadata(self, page): extr = text.extract_from(page) manga, info, _ = extr("<title>", "<").rsplit(" - ", 3) - manga_id = extr("/title/", "/") + manga_id = text.extr( + extr('rel="canonical" href="', '"'), "/title/", "/") match = re.match( r"(?:Volume\s+(\d+) )?" @@ -76,12 +80,13 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor): """Extractor for bato.to manga""" reverse = False chapterclass = BatotoChapterExtractor - pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$" + pattern = (BASE_PATTERN + + r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$") example = "https://bato.to/title/12345-MANGA/" def __init__(self, match): self.root = text.root_from_url(match.group(0)) - self.manga_id = match.group(1) + self.manga_id = match.group(1) or match.group(2) url = "{}/title/{}".format(self.root, self.manga_id) MangaExtractor.__init__(self, match, url) |
