New upstream version 1.26.7.upstream/1.26.7

author: Unit 193 <unit193@unit193.net> 2024-01-23 23:35:00 -0500
committer: Unit 193 <unit193@unit193.net> 2024-01-23 23:35:00 -0500
commit: 12e23f1195164dcb740d6d4a4287e762c9e5e534 (patch)
tree: e6b13483475c510ea2f685c21363271f23745c56 /gallery_dl/extractor/batoto.py
parent: e949aaf6f6ac93896947d5b736e48e7911926efb (diff)
1 files changed, 10 insertions, 5 deletions
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index cd6302e..e82cd09 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -10,8 +10,11 @@ from .common import Extractor, ChapterExtractor, MangaExtractor
 from .. import text, exception
 import re
 
-BASE_PATTERN = (r"(?:https?://)?"
-                r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)")
+BASE_PATTERN = (r"(?:https?://)?(?:"
+                r"(?:ba|d|h|m|w)to\.to|"
+                r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|"
+                r"comiko\.(?:net|org)|"
+                r"bat(?:otoo|o?two)\.com)")
 
 
 class BatotoBase():
@@ -38,7 +41,8 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
     def metadata(self, page):
         extr = text.extract_from(page)
         manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
-        manga_id = extr("/title/", "/")
+        manga_id = text.extr(
+            extr('rel="canonical" href="', '"'), "/title/", "/")
 
         match = re.match(
             r"(?:Volume\s+(\d+) )?"
@@ -76,12 +80,13 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
     """Extractor for bato.to manga"""
     reverse = False
     chapterclass = BatotoChapterExtractor
-    pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$"
+    pattern = (BASE_PATTERN +
+               r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
     example = "https://bato.to/title/12345-MANGA/"
 
     def __init__(self, match):
         self.root = text.root_from_url(match.group(0))
-        self.manga_id = match.group(1)
+        self.manga_id = match.group(1) or match.group(2)
         url = "{}/title/{}".format(self.root, self.manga_id)
         MangaExtractor.__init__(self, match, url)
author	Unit 193 <unit193@unit193.net>	2024-01-23 23:35:00 -0500
committer	Unit 193 <unit193@unit193.net>	2024-01-23 23:35:00 -0500
commit	12e23f1195164dcb740d6d4a4287e762c9e5e534 (patch)
tree	e6b13483475c510ea2f685c21363271f23745c56 /gallery_dl/extractor/batoto.py
parent	e949aaf6f6ac93896947d5b736e48e7911926efb (diff)