blob: e387b33184513781d056fd6f3588a2d523f66790 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images and videos from https://www.4chan.org/"""
from . import chan
from .. import text
class FourchanThreadExtractor(chan.ChanThreadExtractor):
"""Extractor for images from threads from 4chan.org"""
category = "4chan"
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)")
test = (
("https://boards.4chan.org/tg/thread/15396072/", {
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
"content": "20b7b51afa51c9c31a0020a0737b889532c8d7ec",
}),
("https://boards.4channel.org/tg/thread/15396072/", {
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
}),
)
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
def update(self, post, data=None):
chan.ChanThreadExtractor.update(self, post, data)
post["filename"] = text.unescape(post["filename"])
|