aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/4chan.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/4chan.py')
-rw-r--r--gallery_dl/extractor/4chan.py33
1 files changed, 22 insertions, 11 deletions
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 2db6042..d81f305 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2023 Mike Fährmann
+# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -28,9 +28,8 @@ class _4chanThreadExtractor(Extractor):
self.board, self.thread = match.groups()
def items(self):
- url = "https://a.4cdn.org/{}/thread/{}.json".format(
- self.board, self.thread)
- posts = self.request(url).json()["posts"]
+ url = f"https://a.4cdn.org/{self.board}/thread/{self.thread}.json"
+ posts = self.request_json(url)["posts"]
title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
data = {
@@ -45,11 +44,23 @@ class _4chanThreadExtractor(Extractor):
post.update(data)
post["extension"] = post["ext"][1:]
post["filename"] = text.unescape(post["filename"])
- url = "https://i.4cdn.org/{}/{}{}".format(
- post["board"], post["tim"], post["ext"])
+ post["_http_signature"] = _detect_null_byte
+ url = (f"https://i.4cdn.org"
+ f"/{post['board']}/{post['tim']}{post['ext']}")
yield Message.Url, url, post
+def _detect_null_byte(signature):
+ """Return False if all file signature bytes are null"""
+ if signature:
+ if signature[0]:
+ return True
+ for byte in signature:
+ if byte:
+ return True
+ return "File data consists of null bytes"
+
+
class _4chanBoardExtractor(Extractor):
"""Extractor for 4chan boards"""
category = "4chan"
@@ -59,16 +70,16 @@ class _4chanBoardExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.board = match.group(1)
+ self.board = match[1]
def items(self):
- url = "https://a.4cdn.org/{}/threads.json".format(self.board)
- threads = self.request(url).json()
+ url = f"https://a.4cdn.org/{self.board}/threads.json"
+ threads = self.request_json(url)
for page in threads:
for thread in page["threads"]:
- url = "https://boards.4chan.org/{}/thread/{}/".format(
- self.board, thread["no"])
+ url = (f"https://boards.4chan.org"
+ f"/{self.board}/thread/{thread['no']}/")
thread["page"] = page["page"]
thread["_extractor"] = _4chanThreadExtractor
yield Message.Queue, url, thread