summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/foolfuuka.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-01-11 03:25:41 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2021-01-11 03:25:41 -0500
commit6335711bbe769b6b9301a88d88790d7a2f8aa82e (patch)
tree9122f4da2bcce66fbee1a2f21260a2de53dc4cc7 /gallery_dl/extractor/foolfuuka.py
parent87a5aa088ce33a1196ff409b76a9ea8233bdc634 (diff)
New upstream version 1.16.3.upstream/1.16.3
Diffstat (limited to 'gallery_dl/extractor/foolfuuka.py')
-rw-r--r--gallery_dl/extractor/foolfuuka.py167
1 files changed, 141 insertions, 26 deletions
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 8a03dc9..81f2bc2 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,37 +11,26 @@
from .common import Extractor, Message, generate_extractors
from .. import text
import itertools
-import operator
-class FoolfuukaThreadExtractor(Extractor):
+class FoolfuukaExtractor(Extractor):
"""Base extractor for FoolFuuka based boards/archives"""
basecategory = "foolfuuka"
- subcategory = "thread"
- directory_fmt = ("{category}", "{board[shortname]}",
- "{thread_num}{title:? - //}")
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
- pattern_fmt = r"/([^/]+)/thread/(\d+)"
external = "default"
def __init__(self, match):
Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
self.session.headers["Referer"] = self.root
if self.external == "direct":
self.remote = self._remote_direct
def items(self):
- op = True
- yield Message.Version, 1
+ yield Message.Directory, self.metadata()
for post in self.posts():
- if op:
- yield Message.Directory, post
- op = False
- if not post["media"]:
- continue
-
media = post["media"]
+ if not media:
+ continue
url = media["media_link"]
if not url and "remote_media_link" in media:
@@ -53,17 +42,11 @@ class FoolfuukaThreadExtractor(Extractor):
media["media"].rpartition(".")
yield Message.Url, url, post
- def posts(self):
- """Return an iterable with all posts in this thread"""
- url = self.root + "/_/api/chan/thread/"
- params = {"board": self.board, "num": self.thread}
- data = self.request(url, params=params).json()[self.thread]
-
- # sort post-objects by key
- posts = sorted(data.get("posts", {}).items())
- posts = map(operator.itemgetter(1), posts)
+ def metadata(self):
+ """ """
- return itertools.chain((data["op"],), posts)
+ def posts(self):
+ """Return an iterable with all relevant posts"""
def remote(self, media):
"""Resolve a remote media link"""
@@ -76,6 +59,117 @@ class FoolfuukaThreadExtractor(Extractor):
return media["remote_media_link"]
+class FoolfuukaThreadExtractor(FoolfuukaExtractor):
+ """Base extractor for threads on FoolFuuka based boards/archives"""
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board[shortname]}",
+ "{thread_num}{title:? - //}")
+ pattern_fmt = r"/([^/?#]+)/thread/(\d+)"
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+ self.data = None
+
+ def metadata(self):
+ url = self.root + "/_/api/chan/thread/"
+ params = {"board": self.board, "num": self.thread}
+ self.data = self.request(url, params=params).json()[self.thread]
+ return self.data["op"]
+
+ def posts(self):
+ posts = self.data.get("posts")
+ if posts:
+ posts = list(posts.values())
+ posts.sort(key=lambda p: p["timestamp"])
+ else:
+ posts = ()
+ return itertools.chain((self.data["op"],), posts)
+
+
+class FoolfuukaBoardExtractor(FoolfuukaExtractor):
+ """Base extractor for FoolFuuka based boards/archives"""
+ subcategory = "board"
+ pattern_fmt = r"/([^/?#]+)/\d*$"
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ index_base = "{}/_/api/chan/index/?board={}&page=".format(
+ self.root, self.board)
+ thread_base = "{}/{}/thread/".format(self.root, self.board)
+
+ for page in itertools.count(1):
+ with self.request(index_base + format(page)) as response:
+ try:
+ threads = response.json()
+ except ValueError:
+ threads = None
+
+ if not threads:
+ return
+
+ for num, thread in threads.items():
+ thread["url"] = thread_base + format(num)
+ thread["_extractor"] = self.childclass
+ yield Message.Queue, thread["url"], thread
+
+
+class FoolfuukaSearchExtractor(FoolfuukaExtractor):
+ """Base extractor for search results on FoolFuuka based boards/archives"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "search", "{search}")
+ pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
+ request_interval = 1.0
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+ board, search = match.groups()
+
+ self.params = params = {}
+ args = search.split("/")
+ key = None
+
+ for arg in args:
+ if key:
+ params[key] = text.unescape(arg)
+ key = None
+ else:
+ key = arg
+ if board != "_":
+ params["boards"] = board
+
+ def metadata(self):
+ return {"search": self.params.get("text", "")}
+
+ def posts(self):
+ url = self.root + "/_/api/chan/search/"
+ params = self.params.copy()
+ params["page"] = text.parse_int(params.get("page"), 1)
+ if "filter" not in params:
+ params["filter"] = "text"
+
+ while True:
+ try:
+ data = self.request(url, params=params).json()
+ except ValueError:
+ return
+
+ if isinstance(data, dict):
+ if data.get("error"):
+ return
+ posts = data["0"]["posts"]
+ elif isinstance(data, list):
+ posts = data[0]["posts"]
+ else:
+ return
+
+ yield from posts
+ params["page"] += 1
+
+
EXTRACTORS = {
"4plebs": {
"name": "_4plebs",
@@ -84,6 +178,8 @@ EXTRACTORS = {
"test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
"url": "07452944164b602502b02b24521f8cee5c484d2a",
}),
+ "test-board": ("https://archive.4plebs.org/tg/",),
+ "test-search": ("https://archive.4plebs.org/_/search/text/test/",),
},
"archivedmoe": {
"root": "https://archived.moe",
@@ -96,6 +192,8 @@ EXTRACTORS = {
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}),
),
+ "test-board": ("https://archived.moe/gd/",),
+ "test-search": ("https://archived.moe/_/search/text/test/",),
},
"archiveofsins": {
"root": "https://archiveofsins.com",
@@ -104,6 +202,8 @@ EXTRACTORS = {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
}),
+ "test-board": ("https://archiveofsins.com/h/",),
+ "test-search": ("https://archiveofsins.com/_/search/text/test/",),
},
"b4k": {
"root": "https://arch.b4k.co",
@@ -111,18 +211,24 @@ EXTRACTORS = {
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
+ "test-board": ("https://arch.b4k.co/meta/",),
+ "test-search": ("https://arch.b4k.co/_/search/text/test/",),
},
"desuarchive": {
"root": "https://desuarchive.org",
"test-thread": ("https://desuarchive.org/a/thread/159542679/", {
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
}),
+ "test-board": ("https://desuarchive.org/a/",),
+ "test-search": ("https://desuarchive.org/_/search/text/test/",),
},
"fireden": {
"root": "https://boards.fireden.net",
"test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
}),
+ "test-board": ("https://boards.fireden.net/sci/",),
+ "test-search": ("https://boards.fireden.net/_/search/text/test/",),
},
"nyafuu": {
"root": "https://archive.nyafuu.org",
@@ -130,6 +236,8 @@ EXTRACTORS = {
"test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", {
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
}),
+ "test-board": ("https://archive.nyafuu.org/c/",),
+ "test-search": ("https://archive.nyafuu.org/_/search/text/test/",),
},
"rbt": {
"root": "https://rbt.asia",
@@ -142,6 +250,8 @@ EXTRACTORS = {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
),
+ "test-board": ("https://rbt.asia/g/",),
+ "test-search": ("https://rbt.asia/_/search/text/test/",),
},
"thebarchive": {
"root": "https://thebarchive.com",
@@ -149,9 +259,14 @@ EXTRACTORS = {
"test-thread": ("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
}),
+ "test-board": ("https://thebarchive.com/b/",),
+ "test-search": ("https://thebarchive.com/_/search/text/test/",),
},
+ "_ckey": "childclass",
}
generate_extractors(EXTRACTORS, globals(), (
FoolfuukaThreadExtractor,
+ FoolfuukaBoardExtractor,
+ FoolfuukaSearchExtractor,
))