aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/foolfuuka.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/foolfuuka.py')
-rw-r--r--gallery_dl/extractor/foolfuuka.py232
1 files changed, 113 insertions, 119 deletions
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 319ebe2..0bcec2b 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -8,21 +8,21 @@
"""Extractors for 4chan archives based on FoolFuuka"""
-from .common import Extractor, Message, generate_extractors
+from .common import BaseExtractor, Message
from .. import text
import itertools
-class FoolfuukaExtractor(Extractor):
+class FoolfuukaExtractor(BaseExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
basecategory = "foolfuuka"
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
external = "default"
def __init__(self, match):
- Extractor.__init__(self, match)
+ BaseExtractor.__init__(self, match)
self.session.headers["Referer"] = self.root
- if self.external == "direct":
+ if self.category == "b4k":
self.remote = self._remote_direct
def items(self):
@@ -43,7 +43,7 @@ class FoolfuukaExtractor(Extractor):
yield Message.Url, url, post
def metadata(self):
- """ """
+ """Return general metadata"""
def posts(self):
"""Return an iterable with all relevant posts"""
@@ -59,16 +59,90 @@ class FoolfuukaExtractor(Extractor):
return media["remote_media_link"]
+BASE_PATTERN = FoolfuukaExtractor.update({
+ "4plebs": {
+ "root": "https://archive.4plebs.org",
+ "pattern": r"(?:archive\.)?4plebs\.org",
+ },
+ "archivedmoe": {
+ "root": "https://archived.moe",
+ },
+ "archiveofsins": {
+ "root": "https://archiveofsins.com",
+ "pattern": r"(?:www\.)?archiveofsins\.com",
+ },
+ "b4k": {
+ "root": "https://arch.b4k.co",
+ },
+ "desuarchive": {
+ "root": "https://desuarchive.org",
+ },
+ "fireden": {
+ "root": "https://boards.fireden.net",
+ },
+ "nyafuu": {
+ "root": "https://archive.nyafuu.org",
+ "pattern": r"(?:archive\.)?nyafuu\.org",
+ },
+ "rbt": {
+ "root": "https://rbt.asia",
+ "pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
+ },
+ "thebarchive": {
+ "root": "https://thebarchive.com",
+ "pattern": r"thebarchive\.com",
+ },
+})
+
+
class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"""Base extractor for threads on FoolFuuka based boards/archives"""
subcategory = "thread"
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num}{title:? - //}")
- pattern_fmt = r"/([^/?#]+)/thread/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
+ test = (
+ ("https://archive.4plebs.org/tg/thread/54059290", {
+ "url": "07452944164b602502b02b24521f8cee5c484d2a",
+ }),
+ ("https://archived.moe/gd/thread/309639/", {
+ "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
+ "content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
+ }),
+ ("https://archived.moe/a/thread/159767162/", {
+ "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
+ }),
+ ("https://archiveofsins.com/h/thread/4668813/", {
+ "url": "f612d287087e10a228ef69517cf811539db9a102",
+ "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
+ }),
+ ("https://arch.b4k.co/meta/thread/196/", {
+ "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
+ }),
+ ("https://desuarchive.org/a/thread/159542679/", {
+ "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
+ }),
+ ("https://boards.fireden.net/sci/thread/11264294/", {
+ "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
+ }),
+ ("https://archive.nyafuu.org/c/thread/2849220/", {
+ "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
+ }),
+ ("https://rbt.asia/g/thread/61487650/", {
+ "url": "61896d9d9a2edb556b619000a308a984307b6d30",
+ }),
+ ("https://archive.rebeccablacktech.com/g/thread/61487650/", {
+ "url": "61896d9d9a2edb556b619000a308a984307b6d30",
+ }),
+ ("https://thebarchive.com/b/thread/739772332/", {
+ "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
+ }),
+ )
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
- self.board, self.thread = match.groups()
+ self.board = match.group(match.lastindex-1)
+ self.thread = match.group(match.lastindex)
self.data = None
def metadata(self):
@@ -78,23 +152,34 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
return self.data["op"]
def posts(self):
+ op = (self.data["op"],)
posts = self.data.get("posts")
if posts:
posts = list(posts.values())
posts.sort(key=lambda p: p["timestamp"])
- else:
- posts = ()
- return itertools.chain((self.data["op"],), posts)
+ return itertools.chain(op, posts)
+ return op
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
- pattern_fmt = r"/([^/?#]+)/\d*$"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
+ test = (
+ ("https://archive.4plebs.org/tg/"),
+ ("https://archived.moe/gd/"),
+ ("https://archiveofsins.com/h/"),
+ ("https://arch.b4k.co/meta/"),
+ ("https://desuarchive.org/a/"),
+ ("https://boards.fireden.net/sci/"),
+ ("https://archive.nyafuu.org/c/"),
+ ("https://rbt.asia/g/"),
+ ("https://thebarchive.com/b/"),
+ )
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
- self.board = match.group(1)
+ self.board = match.group(match.lastindex)
def items(self):
index_base = "{}/_/api/chan/index/?board={}&page=".format(
@@ -113,7 +198,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
for num, thread in threads.items():
thread["url"] = thread_base + format(num)
- thread["_extractor"] = self.childclass
+ thread["_extractor"] = FoolfuukaThreadExtractor
yield Message.Queue, thread["url"], thread
@@ -121,15 +206,24 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search}")
- pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
request_interval = 1.0
+ test = (
+ ("https://archive.4plebs.org/_/search/text/test/"),
+ ("https://archived.moe/_/search/text/test/"),
+ ("https://archiveofsins.com/_/search/text/test/"),
+ ("https://archiveofsins.com/_/search/text/test/"),
+ ("https://desuarchive.org/_/search/text/test/"),
+ ("https://boards.fireden.net/_/search/text/test/"),
+ ("https://archive.nyafuu.org/_/search/text/test/"),
+ ("https://rbt.asia/_/search/text/test/"),
+ ("https://thebarchive.com/_/search/text/test/"),
+ )
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
- board, search = match.groups()
-
self.params = params = {}
- args = search.split("/")
+ args = match.group(match.lastindex).split("/")
key = None
for arg in args:
@@ -138,6 +232,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
key = None
else:
key = arg
+
+ board = match.group(match.lastindex-1)
if board != "_":
params["boards"] = board
@@ -170,105 +266,3 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
if len(posts) <= 3:
return
params["page"] += 1
-
-
-EXTRACTORS = {
- "4plebs": {
- "name": "_4plebs",
- "root": "https://archive.4plebs.org",
- "pattern": r"(?:archive\.)?4plebs\.org",
- "test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
- "url": "07452944164b602502b02b24521f8cee5c484d2a",
- }),
- "test-board": ("https://archive.4plebs.org/tg/",),
- "test-search": ("https://archive.4plebs.org/_/search/text/test/",),
- },
- "archivedmoe": {
- "root": "https://archived.moe",
- "test-thread": (
- ("https://archived.moe/gd/thread/309639/", {
- "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
- "content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
- }),
- ("https://archived.moe/a/thread/159767162/", {
- "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
- }),
- ),
- "test-board": ("https://archived.moe/gd/",),
- "test-search": ("https://archived.moe/_/search/text/test/",),
- },
- "archiveofsins": {
- "root": "https://archiveofsins.com",
- "pattern": r"(?:www\.)?archiveofsins\.com",
- "test-thread": ("https://archiveofsins.com/h/thread/4668813/", {
- "url": "f612d287087e10a228ef69517cf811539db9a102",
- "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
- }),
- "test-board": ("https://archiveofsins.com/h/",),
- "test-search": ("https://archiveofsins.com/_/search/text/test/",),
- },
- "b4k": {
- "root": "https://arch.b4k.co",
- "extra": {"external": "direct"},
- "test-thread": ("https://arch.b4k.co/meta/thread/196/", {
- "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
- }),
- "test-board": ("https://arch.b4k.co/meta/",),
- "test-search": ("https://arch.b4k.co/_/search/text/test/",),
- },
- "desuarchive": {
- "root": "https://desuarchive.org",
- "test-thread": ("https://desuarchive.org/a/thread/159542679/", {
- "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
- }),
- "test-board": ("https://desuarchive.org/a/",),
- "test-search": ("https://desuarchive.org/_/search/text/test/",),
- },
- "fireden": {
- "root": "https://boards.fireden.net",
- "test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
- "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
- }),
- "test-board": ("https://boards.fireden.net/sci/",),
- "test-search": ("https://boards.fireden.net/_/search/text/test/",),
- },
- "nyafuu": {
- "root": "https://archive.nyafuu.org",
- "pattern": r"(?:archive\.)?nyafuu\.org",
- "test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", {
- "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
- }),
- "test-board": ("https://archive.nyafuu.org/c/",),
- "test-search": ("https://archive.nyafuu.org/_/search/text/test/",),
- },
- "rbt": {
- "root": "https://rbt.asia",
- "pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
- "test-thread": (
- ("https://rbt.asia/g/thread/61487650/", {
- "url": "61896d9d9a2edb556b619000a308a984307b6d30",
- }),
- ("https://archive.rebeccablacktech.com/g/thread/61487650/", {
- "url": "61896d9d9a2edb556b619000a308a984307b6d30",
- }),
- ),
- "test-board": ("https://rbt.asia/g/",),
- "test-search": ("https://rbt.asia/_/search/text/test/",),
- },
- "thebarchive": {
- "root": "https://thebarchive.com",
- "pattern": r"thebarchive\.com",
- "test-thread": ("https://thebarchive.com/b/thread/739772332/", {
- "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
- }),
- "test-board": ("https://thebarchive.com/b/",),
- "test-search": ("https://thebarchive.com/_/search/text/test/",),
- },
- "_ckey": "childclass",
-}
-
-generate_extractors(EXTRACTORS, globals(), (
- FoolfuukaThreadExtractor,
- FoolfuukaBoardExtractor,
- FoolfuukaSearchExtractor,
-))