aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/danbooru.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/danbooru.py')
-rw-r--r--gallery_dl/extractor/danbooru.py178
1 files changed, 31 insertions, 147 deletions
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 7b0e572..f104556 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -9,8 +9,7 @@
"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
from .common import BaseExtractor, Message
-from ..version import __version__
-from .. import text
+from .. import text, util
import datetime
@@ -21,36 +20,13 @@ class DanbooruExtractor(BaseExtractor):
page_limit = 1000
page_start = None
per_page = 200
+ request_interval = 1.0
def __init__(self, match):
- self._init_category(match)
-
- instance = INSTANCES.get(self.category) or {}
- iget = instance.get
-
- self.headers = iget("headers")
- self.page_limit = iget("page-limit", 1000)
- self.page_start = iget("page-start")
- self.per_page = iget("per-page", 200)
- self.request_interval_min = iget("request-interval-min", 0.0)
- self._pools = iget("pools")
- self._popular_endpoint = iget("popular", "/explore/posts/popular.json")
-
BaseExtractor.__init__(self, match)
-
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
- metadata = self.config("metadata", False)
- if metadata:
- if isinstance(metadata, (list, tuple)):
- metadata = ",".join(metadata)
- elif not isinstance(metadata, str):
- metadata = "artist_commentary,children,notes,parent,uploader"
- self.metadata_includes = metadata
- else:
- self.metadata_includes = None
-
threshold = self.config("threshold")
if isinstance(threshold, int):
self.threshold = 1 if threshold < 1 else threshold
@@ -62,10 +38,6 @@ class DanbooruExtractor(BaseExtractor):
self.log.debug("Using HTTP Basic Auth for user '%s'", username)
self.session.auth = (username, api_key)
- def request(self, url, **kwargs):
- kwargs["headers"] = self.headers
- return BaseExtractor.request(self, url, **kwargs)
-
def skip(self, num):
pages = num // self.per_page
if pages >= self.page_limit:
@@ -74,32 +46,28 @@ class DanbooruExtractor(BaseExtractor):
return pages * self.per_page
def items(self):
+ self.session.headers["User-Agent"] = util.USERAGENT
+
+ includes = self.config("metadata")
+ if includes:
+ if isinstance(includes, (list, tuple)):
+ includes = ",".join(includes)
+ elif not isinstance(includes, str):
+ includes = "artist_commentary,children,notes,parent,uploader"
+
data = self.metadata()
for post in self.posts():
- file = post.get("file")
- if file:
- url = file["url"]
- if not url:
- md5 = file["md5"]
- url = file["url"] = (
- "https://static1.{}/data/{}/{}/{}.{}".format(
- self.root[8:], md5[0:2], md5[2:4], md5, file["ext"]
- ))
- post["filename"] = file["md5"]
- post["extension"] = file["ext"]
+ try:
+ url = post["file_url"]
+ except KeyError:
+ if self.external and post["source"]:
+ post.update(data)
+ yield Message.Directory, post
+ yield Message.Queue, post["source"], post
+ continue
- else:
- try:
- url = post["file_url"]
- except KeyError:
- if self.external and post["source"]:
- post.update(data)
- yield Message.Directory, post
- yield Message.Queue, post["source"], post
- continue
-
- text.nameext_from_url(url, post)
+ text.nameext_from_url(url, post)
if post["extension"] == "zip":
if self.ugoira:
@@ -109,9 +77,9 @@ class DanbooruExtractor(BaseExtractor):
url = post["large_file_url"]
post["extension"] = "webm"
- if self.metadata_includes:
+ if includes:
meta_url = "{}/posts/{}.json?only={}".format(
- self.root, post["id"], self.metadata_includes)
+ self.root, post["id"], includes)
post.update(self.request(meta_url).json())
if url[0] == "/":
@@ -127,7 +95,7 @@ class DanbooruExtractor(BaseExtractor):
def posts(self):
return ()
- def _pagination(self, endpoint, params, pagenum=False):
+ def _pagination(self, endpoint, params, pages=False):
url = self.root + endpoint
params["limit"] = self.per_page
params["page"] = self.page_start
@@ -141,7 +109,7 @@ class DanbooruExtractor(BaseExtractor):
if len(posts) < self.threshold:
return
- if pagenum:
+ if pages:
params["page"] += 1
else:
for post in reversed(posts):
@@ -163,34 +131,20 @@ class DanbooruExtractor(BaseExtractor):
for index, delay in enumerate(delays)]
-INSTANCES = {
+BASE_PATTERN = DanbooruExtractor.update({
"danbooru": {
"root": None,
"pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us",
},
- "e621": {
- "root": None,
- "pattern": r"e(?:621|926)\.net",
- "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format(
- __version__)},
- "pools": "sort",
- "popular": "/popular.json",
- "page-limit": 750,
- "per-page": 320,
- "request-interval-min": 1.0,
- },
"atfbooru": {
"root": "https://booru.allthefallen.moe",
"pattern": r"booru\.allthefallen\.moe",
- "page-limit": 5000,
},
"aibooru": {
"root": None,
"pattern": r"(?:safe.)?aibooru\.online",
}
-}
-
-BASE_PATTERN = DanbooruExtractor.update(INSTANCES)
+})
class DanbooruTagExtractor(DanbooruExtractor):
@@ -213,10 +167,6 @@ class DanbooruTagExtractor(DanbooruExtractor):
"pattern": r"https://i\.pximg\.net/img-original/img"
r"/2008/08/28/02/35/48/1476533_p0\.jpg",
}),
- ("https://e621.net/posts?tags=anry", {
- "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
- "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
- }),
("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
"count": 12,
}),
@@ -228,7 +178,6 @@ class DanbooruTagExtractor(DanbooruExtractor):
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
- ("https://e926.net/posts?tags=anry"),
("https://safe.aibooru.online/posts?tags=center_frills"),
)
@@ -254,23 +203,17 @@ class DanbooruPoolExtractor(DanbooruExtractor):
("https://danbooru.donmai.us/pools/7659", {
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
}),
- ("https://e621.net/pools/73", {
- "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
- "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
- }),
("https://booru.allthefallen.moe/pools/9", {
"url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
"count": 6,
}),
("https://aibooru.online/pools/1"),
("https://danbooru.donmai.us/pool/show/7659"),
- ("https://e621.net/pool/show/73"),
)
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
self.pool_id = match.group(match.lastindex)
- self.post_ids = ()
def metadata(self):
url = "{}/pools/{}.json".format(self.root, self.pool_id)
@@ -280,29 +223,8 @@ class DanbooruPoolExtractor(DanbooruExtractor):
return {"pool": pool}
def posts(self):
- if self._pools == "sort":
- self.log.info("Fetching posts of pool %s", self.pool_id)
-
- id_to_post = {
- post["id"]: post
- for post in self._pagination(
- "/posts.json", {"tags": "pool:" + self.pool_id})
- }
-
- posts = []
- append = posts.append
- for num, pid in enumerate(self.post_ids, 1):
- if pid in id_to_post:
- post = id_to_post[pid]
- post["num"] = num
- append(post)
- else:
- self.log.warning("Post %s is unavailable", pid)
- return posts
-
- else:
- params = {"tags": "pool:" + self.pool_id}
- return self._pagination("/posts.json", params)
+ params = {"tags": "pool:" + self.pool_id}
+ return self._pagination("/posts.json", params)
class DanbooruPostExtractor(DanbooruExtractor):
@@ -318,10 +240,6 @@ class DanbooruPostExtractor(DanbooruExtractor):
"pattern": r"https?://.+\.zip$",
"options": (("ugoira", True),)
}),
- ("https://e621.net/posts/535", {
- "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
- "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
- }),
("https://booru.allthefallen.moe/posts/22", {
"content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
}),
@@ -329,7 +247,6 @@ class DanbooruPostExtractor(DanbooruExtractor):
"content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9",
}),
("https://danbooru.donmai.us/post/show/294929"),
- ("https://e621.net/post/show/535"),
)
def __init__(self, match):
@@ -338,8 +255,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
- post = self.request(url).json()
- return (post["post"] if "post" in post else post,)
+ return (self.request(url).json(),)
class DanbooruPopularExtractor(DanbooruExtractor):
@@ -355,12 +271,6 @@ class DanbooruPopularExtractor(DanbooruExtractor):
"range": "1-120",
"count": 120,
}),
- ("https://e621.net/popular"),
- (("https://e621.net/explore/posts/popular"
- "?date=2019-06-01&scale=month"), {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": ">= 70",
- }),
("https://booru.allthefallen.moe/explore/posts/popular"),
("https://aibooru.online/explore/posts/popular"),
)
@@ -385,31 +295,5 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def posts(self):
if self.page_start is None:
self.page_start = 1
- return self._pagination(self._popular_endpoint, self.params, True)
-
-
-class DanbooruFavoriteExtractor(DanbooruExtractor):
- """Extractor for e621 favorites"""
- subcategory = "favorite"
- directory_fmt = ("{category}", "Favorites", "{user_id}")
- archive_fmt = "f_{user_id}_{id}"
- pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
- test = (
- ("https://e621.net/favorites"),
- ("https://e621.net/favorites?page=2&user_id=53275", {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": "> 260",
- }),
- )
-
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.query = text.parse_query(match.group(match.lastindex))
-
- def metadata(self):
- return {"user_id": self.query.get("user_id", "")}
-
- def posts(self):
- if self.page_start is None:
- self.page_start = 1
- return self._pagination("/favorites.json", self.query, True)
+ return self._pagination(
+ "/explore/posts/popular.json", self.params, True)