aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/8chan.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/8chan.py')
-rw-r--r--gallery_dl/extractor/8chan.py53
1 files changed, 29 insertions, 24 deletions
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index f81d2a1..ce1c52a 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -27,12 +27,22 @@ class _8chanExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
- now = util.datetime_utcnow()
- domain = self.root.rpartition("/")[2]
- self.cookies.set(
- now.strftime("TOS%Y%m%d"), "1", domain=domain)
- self.cookies.set(
- (now - timedelta(1)).strftime("TOS%Y%m%d"), "1", domain=domain)
+ tos = self.cookies_tos_name()
+ self.cookies.set(tos, "1", domain=self.root[8:])
+
+ @memcache()
+ def cookies_tos_name(self):
+ url = self.root + "/.static/pages/confirmed.html"
+ headers = {"Referer": self.root + "/.static/pages/disclaimer.html"}
+ response = self.request(url, headers=headers, allow_redirects=False)
+
+ for cookie in response.cookies:
+ if cookie.name.lower().startswith("tos"):
+ self.log.debug("TOS cookie name: %s", cookie.name)
+ return cookie.name
+
+ self.log.error("Unable to determin TOS cookie name")
+ return "TOS20241009"
@memcache()
def cookies_prepare(self):
@@ -64,16 +74,14 @@ class _8chanThreadExtractor(_8chanExtractor):
"{threadId} {subject[:50]}")
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)"
example = "https://8chan.moe/a/res/12345.html"
- def __init__(self, match):
- _8chanExtractor.__init__(self, match)
- _, self.board, self.thread = match.groups()
-
def items(self):
+ _, board, thread = self.groups
+
# fetch thread data
- url = "{}/{}/res/{}.".format(self.root, self.board, self.thread)
+ url = "{}/{}/res/{}.".format(self.root, board, thread)
self.session.headers["Referer"] = url + "html"
thread = self.request(url + "json").json()
thread["postId"] = thread["threadId"]
@@ -106,25 +114,22 @@ class _8chanBoardExtractor(_8chanExtractor):
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
example = "https://8chan.moe/a/"
- def __init__(self, match):
- _8chanExtractor.__init__(self, match)
- _, self.board, self.page = match.groups()
-
def items(self):
- page = text.parse_int(self.page, 1)
- url = "{}/{}/{}.json".format(self.root, self.board, page)
- board = self.request(url).json()
- threads = board["threads"]
+ _, board, pnum = self.groups
+ pnum = text.parse_int(pnum, 1)
+ url = "{}/{}/{}.json".format(self.root, board, pnum)
+ data = self.request(url).json()
+ threads = data["threads"]
while True:
for thread in threads:
thread["_extractor"] = _8chanThreadExtractor
url = "{}/{}/res/{}.html".format(
- self.root, self.board, thread["threadId"])
+ self.root, board, thread["threadId"])
yield Message.Queue, url, thread
- page += 1
- if page > board["pageCount"]:
+ pnum += 1
+ if pnum > data["pageCount"]:
return
- url = "{}/{}/{}.json".format(self.root, self.board, page)
+ url = "{}/{}/{}.json".format(self.root, board, pnum)
threads = self.request(url).json()["threads"]