diff options
| author | 2022-05-26 23:57:04 -0400 | |
|---|---|---|
| committer | 2022-05-26 23:57:04 -0400 | |
| commit | ad61a6d8122973534ab63df48f6090954bc73db6 (patch) | |
| tree | aedce94427ac95fa180005f88fc94b5c8ef5a62a /gallery_dl/extractor/gelbooru_v01.py | |
| parent | c6b88a96bd191711fc540d7babab3d2e09c68da8 (diff) | |
New upstream version 1.22.0.upstream/1.22.0
Diffstat (limited to 'gallery_dl/extractor/gelbooru_v01.py')
| -rw-r--r-- | gallery_dl/extractor/gelbooru_v01.py | 91 |
1 files changed, 70 insertions, 21 deletions
diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py index 541f454..9c19664 100644 --- a/gallery_dl/extractor/gelbooru_v01.py +++ b/gallery_dl/extractor/gelbooru_v01.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for Gelbooru v0.1 sites""" +"""Extractors for Gelbooru Beta 0.1.11 sites""" from . import booru from .. import text @@ -42,14 +42,43 @@ class GelbooruV01Extractor(booru.BooruExtractor): return post + def _pagination(self, url, begin, end): + pid = self.page_start + + while True: + page = self.request(url + str(pid)).text + + cnt = 0 + for post_id in text.extract_iter(page, begin, end): + yield self._parse_post(post_id) + cnt += 1 + + if cnt < self.per_page: + return + pid += self.per_page + BASE_PATTERN = GelbooruV01Extractor.update({ - "thecollection" : {"root": "https://the-collection.booru.org"}, - "illusioncardsbooru": {"root": "https://illusioncards.booru.org"}, - "allgirlbooru" : {"root": "https://allgirl.booru.org"}, - "drawfriends" : {"root": "https://drawfriends.booru.org"}, - "vidyart" : {"root": "https://vidyart.booru.org"}, - "theloudbooru" : {"root": "https://tlb.booru.org"}, + "thecollection": { + "root": "https://the-collection.booru.org", + "pattern": r"the-collection\.booru\.org", + }, + "illusioncardsbooru": { + "root": "https://illusioncards.booru.org", + "pattern": r"illusioncards\.booru\.org", + }, + "allgirlbooru": { + "root": "https://allgirl.booru.org", + "pattern": r"allgirl\.booru\.org", + }, + "drawfriends": { + "root": "https://drawfriends.booru.org", + "pattern": r"drawfriends\.booru\.org", + }, + "vidyart": { + "root": "https://vidyart.booru.org", + "pattern": r"vidyart\.booru\.org", + }, }) @@ -75,7 +104,6 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor): }), ("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"), ("https://vidyart.booru.org/index.php?page=post&s=list&tags=all"), - ("https://tlb.booru.org/index.php?page=post&s=list&tags=all"), ) def __init__(self, match): @@ -88,20 +116,42 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor): def posts(self): url = "{}/index.php?page=post&s=list&tags={}&pid=".format( self.root, self.tags) - pid = self.page_start + return self._pagination(url, 'class="thumb"><a id="p', '"') - while True: - page = self.request(url + str(pid)).text - cnt = 0 - for post_id in text.extract_iter( - page, 'class="thumb"><a id="p', '"'): - yield self._parse_post(post_id) - cnt += 1 +class GelbooruV01FavoriteExtractor(GelbooruV01Extractor): + subcategory = "favorite" + directory_fmt = ("{category}", "favorites", "{favorite_id}") + archive_fmt = "f_{favorite_id}_{id}" + per_page = 50 + pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" + test = ( + (("https://the-collection.booru.org" + "/index.php?page=favorites&s=view&id=1166"), { + "count": 2, + }), + (("https://illusioncards.booru.org" + "/index.php?page=favorites&s=view&id=84887"), { + "count": 2, + }), + ("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", { + "count": 4, + }), + ("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"), + ("https://vidyart.booru.org/index.php?page=favorites&s=view&id=1"), + ) - if cnt < self.per_page: - return - pid += self.per_page + def __init__(self, match): + GelbooruV01Extractor.__init__(self, match) + self.favorite_id = match.group(match.lastindex) + + def metadata(self): + return {"favorite_id": text.parse_int(self.favorite_id)} + + def posts(self): + url = "{}/index.php?page=favorites&s=view&id={}&pid=".format( + self.root, self.favorite_id) + return self._pagination(url, "posts[", "]") class GelbooruV01PostExtractor(GelbooruV01Extractor): @@ -141,7 +191,6 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor): }), ("https://drawfriends.booru.org/index.php?page=post&s=view&id=107474"), ("https://vidyart.booru.org/index.php?page=post&s=view&id=383111"), - ("https://tlb.booru.org/index.php?page=post&s=view&id=127223"), ) def __init__(self, match): |
