diff options
Diffstat (limited to 'gallery_dl/extractor/mangafox.py')
| -rw-r--r-- | gallery_dl/extractor/mangafox.py | 67 |
1 files changed, 61 insertions, 6 deletions
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index a9d504e..f6514ca 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -6,17 +6,21 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for from https://fanfox.net/""" +"""Extractors for https://fanfox.net/""" -from .common import ChapterExtractor +from .common import ChapterExtractor, MangaExtractor from .. import text +import re + +BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)" class MangafoxChapterExtractor(ChapterExtractor): - """Extractor for manga-chapters from fanfox.net""" + """Extractor for manga chapters from fanfox.net""" category = "mangafox" - pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)" - r"(/manga/[^/]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))") + root = "https://m.fanfox.net" + pattern = BASE_PATTERN + \ + r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))" test = ( ("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", { "keyword": "5661dab258d42d09d98f194f7172fb9851a49766", @@ -25,7 +29,6 @@ class MangafoxChapterExtractor(ChapterExtractor): ("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"), ("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"), ) - root = "https://m.fanfox.net" def __init__(self, match): base, self.cstr, self.volume, self.chapter, self.minor = match.groups() @@ -60,3 +63,55 @@ class MangafoxChapterExtractor(ChapterExtractor): pnum += 2 page = self.request("{}/{}.html".format(self.urlbase, pnum)).text + + +class MangafoxMangaExtractor(MangaExtractor): + """Extractor for manga from fanfox.net""" + category = "mangafox" + root = "https://m.fanfox.net" + chapterclass = MangafoxChapterExtractor + pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$" + test = ( + ("https://fanfox.net/manga/kanojo_mo_kanojo", { + "pattern": MangafoxChapterExtractor.pattern, + "count": ">=60", + }), + ("https://mangafox.me/manga/shangri_la_frontier", { + "pattern": MangafoxChapterExtractor.pattern, + "count": ">=45", + }), + ("https://m.fanfox.net/manga/sentai_daishikkaku"), + ) + + def chapters(self, page): + match_info = re.compile(r"Ch (\d+)(\S*)(?: (.*))?").match + manga, pos = text.extract(page, '<p class="title">', '</p>') + author, pos = text.extract(page, '<p>Author(s):', '</p>', pos) + data = { + "manga" : text.unescape(manga), + "author" : text.remove_html(author), + "lang" : "en", + "language": "English", + } + + results = [] + pos = page.index('<dd class="chlist">') + while True: + url, pos = text.extract(page, '<a href="//', '"', pos) + if url == 'mangafox.la?f=mobile': + return results + info, pos = text.extract(page, '>', '<span', pos) + date, pos = text.extract(page, 'right">', '</span>', pos) + + match = match_info(text.unescape(info)) + if match: + chapter, minor, title = match.groups() + chapter_minor = minor + else: + chapter, _, minor = url[:-7].rpartition("/c")[2].partition(".") + chapter_minor = "." + minor + + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = chapter_minor if minor else "" + data["date"] = date + results.append(("https://" + url, data.copy())) |
