diff options
Diffstat (limited to 'gallery_dl/extractor/2chen.py')
| -rw-r--r-- | gallery_dl/extractor/2chen.py | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py index 8fffeb0..76a085a 100644 --- a/gallery_dl/extractor/2chen.py +++ b/gallery_dl/extractor/2chen.py @@ -16,13 +16,15 @@ class _2chenThreadExtractor(Extractor): subcategory = "thread" directory_fmt = ("{category}", "{board}", "{thread} {title}") filename_fmt = "{time} {filename}.{extension}" - archive_fmt = "{board}_{thread}_{hash}" + archive_fmt = "{board}_{thread}_{hash}_{time}" root = "https://2chen.moe" pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)/(\d+)" test = ( - ("https://2chen.moe/jp/303786", { - "count": ">= 10", + ("https://2chen.moe/tv/496715", { + "count": ">= 179", }), + # 404 + ("https://2chen.moe/jp/303786"), ) def __init__(self, match): @@ -31,7 +33,7 @@ class _2chenThreadExtractor(Extractor): def items(self): url = "{}/{}/{}".format(self.root, self.board, self.thread) - page = self.request(url, encoding="utf-8").text + page = self.request(url, encoding="utf-8", notfound="thread").text data = self.metadata(page) yield Message.Directory, data for post in self.posts(page): @@ -66,7 +68,7 @@ class _2chenThreadExtractor(Extractor): "%d %b %Y (%a) %H:%M:%S" ), "no" : extr('href="#p', '"'), - "url" : extr('</span><a href="', '"'), + "url" : extr('</a><a href="', '"'), "filename": text.unescape(extr('download="', '"')), "hash" : extr('data-hash="', '"'), } @@ -77,7 +79,7 @@ class _2chenBoardExtractor(Extractor): category = "2chen" subcategory = "board" root = "https://2chen.moe" - pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)(?:/catalog)?/?$" + pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)(?:/catalog|/?$)" test = ( ("https://2chen.moe/co/", { "pattern": _2chenThreadExtractor.pattern @@ -92,7 +94,7 @@ class _2chenBoardExtractor(Extractor): def items(self): url = "{}/{}/catalog".format(self.root, self.board) - page = self.request(url).text + page = self.request(url, notfound="board").text data = {"_extractor": _2chenThreadExtractor} for thread in text.extract_iter( page, '<figure><a href="', '"'): |
