diff options
Diffstat (limited to 'gallery_dl/extractor/gelbooru.py')
| -rw-r--r-- | gallery_dl/extractor/gelbooru.py | 51 |
1 files changed, 29 insertions, 22 deletions
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 8d73949..80b0ae1 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -13,6 +13,8 @@ from . import gelbooru_v02 from .. import text, exception import binascii +BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?" + class GelbooruBase(): """Base class for gelbooru extractors""" @@ -53,6 +55,23 @@ class GelbooruBase(): del params["pid"] params["tags"] = "{} id:<{}".format(self.tags, post["id"]) + def _pagination_html(self, params): + url = self.root + "/index.php" + params["pid"] = self.page_start * self.per_page + + data = {} + while True: + num_ids = 0 + page = self.request(url, params=params).text + + for data["id"] in text.extract_iter(page, '" id="p', '"'): + num_ids += 1 + yield from self._api_request(data) + + if num_ids < self.per_page: + return + params["pid"] += self.per_page + @staticmethod def _file_url(post): url = post["file_url"] @@ -88,8 +107,7 @@ class GelbooruBase(): class GelbooruTagExtractor(GelbooruBase, gelbooru_v02.GelbooruV02TagExtractor): """Extractor for images from gelbooru.com based on search-tags""" - pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" - r"\?page=post&s=list&tags=(?P<tags>[^&#]+)") + pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)" test = ( ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", { "count": 5, @@ -108,8 +126,7 @@ class GelbooruPoolExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PoolExtractor): """Extractor for gelbooru pools""" per_page = 45 - pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" - r"\?page=pool&s=show&id=(?P<pool>\d+)") + pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)" test = ( ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { "count": 6, @@ -124,9 +141,9 @@ class GelbooruPoolExtractor(GelbooruBase, "id" : self.pool_id, "pid" : self.page_start, } - self._page = self.request(url, params=self._params).text + page = self.request(url, params=self._params).text - name, pos = text.extract(self._page, "<h3>Now Viewing: ", "</h3>") + name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>") if not name: raise exception.NotFoundError("pool") @@ -136,29 +153,19 @@ class GelbooruPoolExtractor(GelbooruBase, } def posts(self): - url = self.root + "/index.php" - params = self._params + return self._pagination_html(self._params) - page = self._page - del self._page - data = {} - - while True: - num_ids = 0 - for data["id"] in text.extract_iter(page, '" id="p', '"'): - num_ids += 1 - yield from self._api_request(data) - if num_ids < self.per_page: - return - params["pid"] += self.per_page - page = self.request(url, params=params).text +class GelbooruFavoriteExtractor(GelbooruBase, + gelbooru_v02.GelbooruV02FavoriteExtractor): + pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)" + test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=12345",) class GelbooruPostExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PostExtractor): """Extractor for single images from gelbooru.com""" - pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?" + pattern = (BASE_PATTERN + r"(?=(?:[^#]+&)?page=post(?:&|#|$))" r"(?=(?:[^#]+&)?s=view(?:&|#|$))" r"(?:[^#]+&)?id=(\d+)") |
