diff options
| author | 2023-06-19 01:14:28 -0400 | |
|---|---|---|
| committer | 2023-06-19 01:14:28 -0400 | |
| commit | 9fb906aeb3816abb42f459d1b67e35024e6f2348 (patch) | |
| tree | 30b039301c783475c0f4d46b0e0c5ec9851b2567 /gallery_dl/extractor/jschan.py | |
| parent | 8950c0f2ef55ec2ed36b3fccc9fd85b64b877c3b (diff) | |
New upstream version 1.25.6.upstream/1.25.6
Diffstat (limited to 'gallery_dl/extractor/jschan.py')
| -rw-r--r-- | gallery_dl/extractor/jschan.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/gallery_dl/extractor/jschan.py b/gallery_dl/extractor/jschan.py new file mode 100644 index 0000000..fe758fa --- /dev/null +++ b/gallery_dl/extractor/jschan.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for jschan Imageboards""" + +from .common import BaseExtractor, Message +from .. import text +import itertools + + +class JschanExtractor(BaseExtractor): + basecategory = "jschan" + + +BASE_PATTERN = JschanExtractor.update({ + "94chan": { + "root": "https://94chan.org", + "pattern": r"94chan\.org" + } +}) + + +class JschanThreadExtractor(JschanExtractor): + """Extractor for jschan threads""" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", + "{threadId} {subject|nomarkup[:50]}") + filename_fmt = "{postId}{num:?-//} {filename}.{extension}" + archive_fmt = "{board}_{postId}_{num}" + pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html" + test = ( + ("https://94chan.org/art/thread/25.html", { + "pattern": r"https://94chan.org/file/[0-9a-f]{64}(\.\w+)?", + "count": ">= 15" + }) + ) + + def __init__(self, match): + JschanExtractor.__init__(self, match) + index = match.lastindex + self.board = match.group(index-1) + self.thread = match.group(index) + + def items(self): + url = "{}/{}/thread/{}.json".format( + self.root, self.board, self.thread) + thread = self.request(url).json() + thread["threadId"] = thread["postId"] + posts = thread.pop("replies", ()) + + yield Message.Directory, thread + for post in itertools.chain((thread,), posts): + files = post.pop("files", ()) + if files: + thread.update(post) + thread["count"] = len(files) + for num, file in enumerate(files): + url = self.root + "/file/" + file["filename"] + file.update(thread) + file["num"] = num + file["siteFilename"] = file["filename"] + text.nameext_from_url(file["originalFilename"], file) + yield Message.Url, url, file + + +class JschanBoardExtractor(JschanExtractor): + """Extractor for jschan boards""" + subcategory = "board" + pattern = (BASE_PATTERN + r"/([^/?#]+)" + r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)") + test = ( + ("https://94chan.org/art/", { + "pattern": JschanThreadExtractor.pattern, + "count": ">= 30" + }), + ("https://94chan.org/art/2.html"), + ("https://94chan.org/art/catalog.html"), + ("https://94chan.org/art/index.html"), + ) + + def __init__(self, match): + JschanExtractor.__init__(self, match) + self.board = match.group(match.lastindex) + + def items(self): + url = "{}/{}/catalog.json".format(self.root, self.board) + for thread in self.request(url).json(): + url = "{}/{}/thread/{}.html".format( + self.root, self.board, thread["postId"]) + thread["_extractor"] = JschanThreadExtractor + yield Message.Queue, url, thread |
