diff options
Diffstat (limited to 'gallery_dl/extractor/danbooru.py')
| -rw-r--r-- | gallery_dl/extractor/danbooru.py | 178 |
1 files changed, 31 insertions, 147 deletions
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 7b0e572..f104556 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -9,8 +9,7 @@ """Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" from .common import BaseExtractor, Message -from ..version import __version__ -from .. import text +from .. import text, util import datetime @@ -21,36 +20,13 @@ class DanbooruExtractor(BaseExtractor): page_limit = 1000 page_start = None per_page = 200 + request_interval = 1.0 def __init__(self, match): - self._init_category(match) - - instance = INSTANCES.get(self.category) or {} - iget = instance.get - - self.headers = iget("headers") - self.page_limit = iget("page-limit", 1000) - self.page_start = iget("page-start") - self.per_page = iget("per-page", 200) - self.request_interval_min = iget("request-interval-min", 0.0) - self._pools = iget("pools") - self._popular_endpoint = iget("popular", "/explore/posts/popular.json") - BaseExtractor.__init__(self, match) - self.ugoira = self.config("ugoira", False) self.external = self.config("external", False) - metadata = self.config("metadata", False) - if metadata: - if isinstance(metadata, (list, tuple)): - metadata = ",".join(metadata) - elif not isinstance(metadata, str): - metadata = "artist_commentary,children,notes,parent,uploader" - self.metadata_includes = metadata - else: - self.metadata_includes = None - threshold = self.config("threshold") if isinstance(threshold, int): self.threshold = 1 if threshold < 1 else threshold @@ -62,10 +38,6 @@ class DanbooruExtractor(BaseExtractor): self.log.debug("Using HTTP Basic Auth for user '%s'", username) self.session.auth = (username, api_key) - def request(self, url, **kwargs): - kwargs["headers"] = self.headers - return BaseExtractor.request(self, url, **kwargs) - def skip(self, num): pages = num // self.per_page if pages >= self.page_limit: @@ -74,32 +46,28 @@ class DanbooruExtractor(BaseExtractor): return pages * self.per_page def items(self): + self.session.headers["User-Agent"] = util.USERAGENT + + includes = self.config("metadata") + if includes: + if isinstance(includes, (list, tuple)): + includes = ",".join(includes) + elif not isinstance(includes, str): + includes = "artist_commentary,children,notes,parent,uploader" + data = self.metadata() for post in self.posts(): - file = post.get("file") - if file: - url = file["url"] - if not url: - md5 = file["md5"] - url = file["url"] = ( - "https://static1.{}/data/{}/{}/{}.{}".format( - self.root[8:], md5[0:2], md5[2:4], md5, file["ext"] - )) - post["filename"] = file["md5"] - post["extension"] = file["ext"] + try: + url = post["file_url"] + except KeyError: + if self.external and post["source"]: + post.update(data) + yield Message.Directory, post + yield Message.Queue, post["source"], post + continue - else: - try: - url = post["file_url"] - except KeyError: - if self.external and post["source"]: - post.update(data) - yield Message.Directory, post - yield Message.Queue, post["source"], post - continue - - text.nameext_from_url(url, post) + text.nameext_from_url(url, post) if post["extension"] == "zip": if self.ugoira: @@ -109,9 +77,9 @@ class DanbooruExtractor(BaseExtractor): url = post["large_file_url"] post["extension"] = "webm" - if self.metadata_includes: + if includes: meta_url = "{}/posts/{}.json?only={}".format( - self.root, post["id"], self.metadata_includes) + self.root, post["id"], includes) post.update(self.request(meta_url).json()) if url[0] == "/": @@ -127,7 +95,7 @@ class DanbooruExtractor(BaseExtractor): def posts(self): return () - def _pagination(self, endpoint, params, pagenum=False): + def _pagination(self, endpoint, params, pages=False): url = self.root + endpoint params["limit"] = self.per_page params["page"] = self.page_start @@ -141,7 +109,7 @@ class DanbooruExtractor(BaseExtractor): if len(posts) < self.threshold: return - if pagenum: + if pages: params["page"] += 1 else: for post in reversed(posts): @@ -163,34 +131,20 @@ class DanbooruExtractor(BaseExtractor): for index, delay in enumerate(delays)] -INSTANCES = { +BASE_PATTERN = DanbooruExtractor.update({ "danbooru": { "root": None, "pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us", }, - "e621": { - "root": None, - "pattern": r"e(?:621|926)\.net", - "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format( - __version__)}, - "pools": "sort", - "popular": "/popular.json", - "page-limit": 750, - "per-page": 320, - "request-interval-min": 1.0, - }, "atfbooru": { "root": "https://booru.allthefallen.moe", "pattern": r"booru\.allthefallen\.moe", - "page-limit": 5000, }, "aibooru": { "root": None, "pattern": r"(?:safe.)?aibooru\.online", } -} - -BASE_PATTERN = DanbooruExtractor.update(INSTANCES) +}) class DanbooruTagExtractor(DanbooruExtractor): @@ -213,10 +167,6 @@ class DanbooruTagExtractor(DanbooruExtractor): "pattern": r"https://i\.pximg\.net/img-original/img" r"/2008/08/28/02/35/48/1476533_p0\.jpg", }), - ("https://e621.net/posts?tags=anry", { - "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", - "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", - }), ("https://booru.allthefallen.moe/posts?tags=yume_shokunin", { "count": 12, }), @@ -228,7 +178,6 @@ class DanbooruTagExtractor(DanbooruExtractor): ("https://hijiribe.donmai.us/posts?tags=bonocho"), ("https://sonohara.donmai.us/posts?tags=bonocho"), ("https://safebooru.donmai.us/posts?tags=bonocho"), - ("https://e926.net/posts?tags=anry"), ("https://safe.aibooru.online/posts?tags=center_frills"), ) @@ -254,23 +203,17 @@ class DanbooruPoolExtractor(DanbooruExtractor): ("https://danbooru.donmai.us/pools/7659", { "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", }), - ("https://e621.net/pools/73", { - "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a", - "content": "91abe5d5334425d9787811d7f06d34c77974cd22", - }), ("https://booru.allthefallen.moe/pools/9", { "url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5", "count": 6, }), ("https://aibooru.online/pools/1"), ("https://danbooru.donmai.us/pool/show/7659"), - ("https://e621.net/pool/show/73"), ) def __init__(self, match): DanbooruExtractor.__init__(self, match) self.pool_id = match.group(match.lastindex) - self.post_ids = () def metadata(self): url = "{}/pools/{}.json".format(self.root, self.pool_id) @@ -280,29 +223,8 @@ class DanbooruPoolExtractor(DanbooruExtractor): return {"pool": pool} def posts(self): - if self._pools == "sort": - self.log.info("Fetching posts of pool %s", self.pool_id) - - id_to_post = { - post["id"]: post - for post in self._pagination( - "/posts.json", {"tags": "pool:" + self.pool_id}) - } - - posts = [] - append = posts.append - for num, pid in enumerate(self.post_ids, 1): - if pid in id_to_post: - post = id_to_post[pid] - post["num"] = num - append(post) - else: - self.log.warning("Post %s is unavailable", pid) - return posts - - else: - params = {"tags": "pool:" + self.pool_id} - return self._pagination("/posts.json", params) + params = {"tags": "pool:" + self.pool_id} + return self._pagination("/posts.json", params) class DanbooruPostExtractor(DanbooruExtractor): @@ -318,10 +240,6 @@ class DanbooruPostExtractor(DanbooruExtractor): "pattern": r"https?://.+\.zip$", "options": (("ugoira", True),) }), - ("https://e621.net/posts/535", { - "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", - "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", - }), ("https://booru.allthefallen.moe/posts/22", { "content": "21dda68e1d7e0a554078e62923f537d8e895cac8", }), @@ -329,7 +247,6 @@ class DanbooruPostExtractor(DanbooruExtractor): "content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9", }), ("https://danbooru.donmai.us/post/show/294929"), - ("https://e621.net/post/show/535"), ) def __init__(self, match): @@ -338,8 +255,7 @@ class DanbooruPostExtractor(DanbooruExtractor): def posts(self): url = "{}/posts/{}.json".format(self.root, self.post_id) - post = self.request(url).json() - return (post["post"] if "post" in post else post,) + return (self.request(url).json(),) class DanbooruPopularExtractor(DanbooruExtractor): @@ -355,12 +271,6 @@ class DanbooruPopularExtractor(DanbooruExtractor): "range": "1-120", "count": 120, }), - ("https://e621.net/popular"), - (("https://e621.net/explore/posts/popular" - "?date=2019-06-01&scale=month"), { - "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", - "count": ">= 70", - }), ("https://booru.allthefallen.moe/explore/posts/popular"), ("https://aibooru.online/explore/posts/popular"), ) @@ -385,31 +295,5 @@ class DanbooruPopularExtractor(DanbooruExtractor): def posts(self): if self.page_start is None: self.page_start = 1 - return self._pagination(self._popular_endpoint, self.params, True) - - -class DanbooruFavoriteExtractor(DanbooruExtractor): - """Extractor for e621 favorites""" - subcategory = "favorite" - directory_fmt = ("{category}", "Favorites", "{user_id}") - archive_fmt = "f_{user_id}_{id}" - pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" - test = ( - ("https://e621.net/favorites"), - ("https://e621.net/favorites?page=2&user_id=53275", { - "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", - "count": "> 260", - }), - ) - - def __init__(self, match): - DanbooruExtractor.__init__(self, match) - self.query = text.parse_query(match.group(match.lastindex)) - - def metadata(self): - return {"user_id": self.query.get("user_id", "")} - - def posts(self): - if self.page_start is None: - self.page_start = 1 - return self._pagination("/favorites.json", self.query, True) + return self._pagination( + "/explore/posts/popular.json", self.params, True) |
