diff options
Diffstat (limited to 'gallery_dl/extractor/4chan.py')
| -rw-r--r-- | gallery_dl/extractor/4chan.py | 33 |
1 files changed, 22 insertions, 11 deletions
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index 2db6042..d81f305 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2023 Mike Fährmann +# Copyright 2015-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -28,9 +28,8 @@ class _4chanThreadExtractor(Extractor): self.board, self.thread = match.groups() def items(self): - url = "https://a.4cdn.org/{}/thread/{}.json".format( - self.board, self.thread) - posts = self.request(url).json()["posts"] + url = f"https://a.4cdn.org/{self.board}/thread/{self.thread}.json" + posts = self.request_json(url)["posts"] title = posts[0].get("sub") or text.remove_html(posts[0]["com"]) data = { @@ -45,11 +44,23 @@ class _4chanThreadExtractor(Extractor): post.update(data) post["extension"] = post["ext"][1:] post["filename"] = text.unescape(post["filename"]) - url = "https://i.4cdn.org/{}/{}{}".format( - post["board"], post["tim"], post["ext"]) + post["_http_signature"] = _detect_null_byte + url = (f"https://i.4cdn.org" + f"/{post['board']}/{post['tim']}{post['ext']}") yield Message.Url, url, post +def _detect_null_byte(signature): + """Return False if all file signature bytes are null""" + if signature: + if signature[0]: + return True + for byte in signature: + if byte: + return True + return "File data consists of null bytes" + + class _4chanBoardExtractor(Extractor): """Extractor for 4chan boards""" category = "4chan" @@ -59,16 +70,16 @@ class _4chanBoardExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.board = match.group(1) + self.board = match[1] def items(self): - url = "https://a.4cdn.org/{}/threads.json".format(self.board) - threads = self.request(url).json() + url = f"https://a.4cdn.org/{self.board}/threads.json" + threads = self.request_json(url) for page in threads: for thread in page["threads"]: - url = "https://boards.4chan.org/{}/thread/{}/".format( - self.board, thread["no"]) + url = (f"https://boards.4chan.org" + f"/{self.board}/thread/{thread['no']}/") thread["page"] = page["page"] thread["_extractor"] = _4chanThreadExtractor yield Message.Queue, url, thread |
