summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/4chan.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-11-10 22:14:10 -0500
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-11-10 22:14:10 -0500
commit0c73e982fa596da07f23b377621ab894a9e64884 (patch)
tree96f6a40a5656c15a2ec7217a8a1efcff5827bcbb /gallery_dl/extractor/4chan.py
parent40f5fe6edef268632d3bc484e85e5b37bad67bff (diff)
New upstream version 1.11.1upstream/1.11.1
Diffstat (limited to 'gallery_dl/extractor/4chan.py')
-rw-r--r--gallery_dl/extractor/4chan.py43
1 files changed, 34 insertions, 9 deletions
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index e387b33..36a0573 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -6,15 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images and videos from https://www.4chan.org/"""
+"""Extractors for https://www.4chan.org/"""
-from . import chan
+from .common import Extractor, Message
from .. import text
-class FourchanThreadExtractor(chan.ChanThreadExtractor):
- """Extractor for images from threads from 4chan.org"""
+class _4chanThreadExtractor(Extractor):
+ """Extractor for 4chan threads"""
category = "4chan"
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ filename_fmt = "{tim} {filename}.{extension}"
+ archive_fmt = "{board}_{thread}_{tim}"
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)")
test = (
@@ -28,9 +32,30 @@ class FourchanThreadExtractor(chan.ChanThreadExtractor):
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
}),
)
- api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
- file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
- def update(self, post, data=None):
- chan.ChanThreadExtractor.update(self, post, data)
- post["filename"] = text.unescape(post["filename"])
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "https://a.4cdn.org/{}/thread/{}.json".format(
+ self.board, self.thread)
+ posts = self.request(url).json()["posts"]
+ title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
+
+ data = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : text.unescape(title)[:50],
+ }
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for post in posts:
+ if "filename" in post:
+ post.update(data)
+ post["extension"] = post["ext"][1:]
+ post["filename"] = text.unescape(post["filename"])
+ url = "https://i.4cdn.org/{}/{}{}".format(
+ post["board"], post["tim"], post["ext"])
+ yield Message.Url, url, post