diff options
| author | 2019-11-10 22:14:10 -0500 | |
|---|---|---|
| committer | 2019-11-10 22:14:10 -0500 | |
| commit | 0c73e982fa596da07f23b377621ab894a9e64884 (patch) | |
| tree | 96f6a40a5656c15a2ec7217a8a1efcff5827bcbb /gallery_dl/extractor/4chan.py | |
| parent | 40f5fe6edef268632d3bc484e85e5b37bad67bff (diff) | |
New upstream version 1.11.1upstream/1.11.1
Diffstat (limited to 'gallery_dl/extractor/4chan.py')
| -rw-r--r-- | gallery_dl/extractor/4chan.py | 43 |
1 files changed, 34 insertions, 9 deletions
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index e387b33..36a0573 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -6,15 +6,19 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images and videos from https://www.4chan.org/""" +"""Extractors for https://www.4chan.org/""" -from . import chan +from .common import Extractor, Message from .. import text -class FourchanThreadExtractor(chan.ChanThreadExtractor): - """Extractor for images from threads from 4chan.org""" +class _4chanThreadExtractor(Extractor): + """Extractor for 4chan threads""" category = "4chan" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{tim} {filename}.{extension}" + archive_fmt = "{board}_{thread}_{tim}" pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org" r"/([^/]+)/thread/(\d+)") test = ( @@ -28,9 +32,30 @@ class FourchanThreadExtractor(chan.ChanThreadExtractor): "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a", }), ) - api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" - file_url = "https://i.4cdn.org/{board}/{tim}{ext}" - def update(self, post, data=None): - chan.ChanThreadExtractor.update(self, post, data) - post["filename"] = text.unescape(post["filename"]) + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://a.4cdn.org/{}/thread/{}.json".format( + self.board, self.thread) + posts = self.request(url).json()["posts"] + title = posts[0].get("sub") or text.remove_html(posts[0]["com"]) + + data = { + "board" : self.board, + "thread": self.thread, + "title" : text.unescape(title)[:50], + } + + yield Message.Version, 1 + yield Message.Directory, data + for post in posts: + if "filename" in post: + post.update(data) + post["extension"] = post["ext"][1:] + post["filename"] = text.unescape(post["filename"]) + url = "https://i.4cdn.org/{}/{}{}".format( + post["board"], post["tim"], post["ext"]) + yield Message.Url, url, post |
