diff options
Diffstat (limited to 'gallery_dl/extractor/8chan.py')
| -rw-r--r-- | gallery_dl/extractor/8chan.py | 53 |
1 files changed, 29 insertions, 24 deletions
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index f81d2a1..ce1c52a 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -27,12 +27,22 @@ class _8chanExtractor(Extractor): Extractor.__init__(self, match) def _init(self): - now = util.datetime_utcnow() - domain = self.root.rpartition("/")[2] - self.cookies.set( - now.strftime("TOS%Y%m%d"), "1", domain=domain) - self.cookies.set( - (now - timedelta(1)).strftime("TOS%Y%m%d"), "1", domain=domain) + tos = self.cookies_tos_name() + self.cookies.set(tos, "1", domain=self.root[8:]) + + @memcache() + def cookies_tos_name(self): + url = self.root + "/.static/pages/confirmed.html" + headers = {"Referer": self.root + "/.static/pages/disclaimer.html"} + response = self.request(url, headers=headers, allow_redirects=False) + + for cookie in response.cookies: + if cookie.name.lower().startswith("tos"): + self.log.debug("TOS cookie name: %s", cookie.name) + return cookie.name + + self.log.error("Unable to determin TOS cookie name") + return "TOS20241009" @memcache() def cookies_prepare(self): @@ -64,16 +74,14 @@ class _8chanThreadExtractor(_8chanExtractor): "{threadId} {subject[:50]}") filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}" archive_fmt = "{boardUri}_{postId}_{num}" - pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)" example = "https://8chan.moe/a/res/12345.html" - def __init__(self, match): - _8chanExtractor.__init__(self, match) - _, self.board, self.thread = match.groups() - def items(self): + _, board, thread = self.groups + # fetch thread data - url = "{}/{}/res/{}.".format(self.root, self.board, self.thread) + url = "{}/{}/res/{}.".format(self.root, board, thread) self.session.headers["Referer"] = url + "html" thread = self.request(url + "json").json() thread["postId"] = thread["threadId"] @@ -106,25 +114,22 @@ class _8chanBoardExtractor(_8chanExtractor): pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$" example = "https://8chan.moe/a/" - def __init__(self, match): - _8chanExtractor.__init__(self, match) - _, self.board, self.page = match.groups() - def items(self): - page = text.parse_int(self.page, 1) - url = "{}/{}/{}.json".format(self.root, self.board, page) - board = self.request(url).json() - threads = board["threads"] + _, board, pnum = self.groups + pnum = text.parse_int(pnum, 1) + url = "{}/{}/{}.json".format(self.root, board, pnum) + data = self.request(url).json() + threads = data["threads"] while True: for thread in threads: thread["_extractor"] = _8chanThreadExtractor url = "{}/{}/res/{}.html".format( - self.root, self.board, thread["threadId"]) + self.root, board, thread["threadId"]) yield Message.Queue, url, thread - page += 1 - if page > board["pageCount"]: + pnum += 1 + if pnum > data["pageCount"]: return - url = "{}/{}/{}.json".format(self.root, self.board, page) + url = "{}/{}/{}.json".format(self.root, board, pnum) threads = self.request(url).json()["threads"] |
