diff options
| author | 2024-12-08 20:34:33 -0500 | |
|---|---|---|
| committer | 2024-12-08 20:34:33 -0500 | |
| commit | f6877087773089220d68288d055276fca6c556d4 (patch) | |
| tree | e4847e3bcff284c3daece7b3b9cf308dfc2129ab /gallery_dl/extractor/gelbooru_v02.py | |
| parent | 1981ccaaea6eab2cf32536ec5afe132a870914d8 (diff) | |
New upstream version 1.28.1.upstream/1.28.1
Diffstat (limited to 'gallery_dl/extractor/gelbooru_v02.py')
| -rw-r--r-- | gallery_dl/extractor/gelbooru_v02.py | 64 |
1 files changed, 5 insertions, 59 deletions
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index aad5752..2c1174a 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -24,10 +24,6 @@ class GelbooruV02Extractor(booru.BooruExtractor): self.user_id = self.config("user-id") self.root_api = self.config_instance("root-api") or self.root - if self.category == "realbooru": - self.items = self._items_realbooru - self._tags = self._tags_realbooru - def _api_request(self, params): url = self.root_api + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) @@ -82,16 +78,17 @@ class GelbooruV02Extractor(booru.BooruExtractor): params["pid"] = self.page_start * self.per_page data = {} + find_ids = re.compile(r"\sid=\"p(\d+)").findall + while True: - num_ids = 0 page = self.request(url, params=params).text + pids = find_ids(page) - for data["id"] in text.extract_iter(page, '" id="p', '"'): - num_ids += 1 + for data["id"] in pids: for post in self._api_request(data): yield post.attrib - if num_ids < self.per_page: + if len(pids) < self.per_page: return params["pid"] += self.per_page @@ -136,59 +133,8 @@ class GelbooruV02Extractor(booru.BooruExtractor): "body" : text.unescape(text.remove_html(extr(">", "</div>"))), }) - def _file_url_realbooru(self, post): - url = post["file_url"] - md5 = post["md5"] - if md5 not in post["preview_url"] or url.count("/") == 5: - url = "{}/images/{}/{}/{}.{}".format( - self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2]) - return url - - def _items_realbooru(self): - from .common import Message - data = self.metadata() - - for post in self.posts(): - try: - html = self._html(post) - fallback = post["file_url"] - url = post["file_url"] = text.rextract( - html, 'href="', '"', html.index(">Original<"))[0] - except Exception: - self.log.debug("Unable to fetch download URL for post %s " - "(md5: %s)", post.get("id"), post.get("md5")) - continue - - text.nameext_from_url(url, post) - post.update(data) - self._prepare(post) - self._tags(post, html) - - path = url.rpartition("/")[0] - post["_fallback"] = ( - "{}/{}.{}".format(path, post["md5"], post["extension"]), - fallback, - ) - - yield Message.Directory, post - yield Message.Url, url, post - - def _tags_realbooru(self, post, page): - tag_container = text.extr(page, 'id="tagLink"', '</div>') - tags = collections.defaultdict(list) - pattern = re.compile( - r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)') - for tag_type, tag_name in pattern.findall(tag_container): - tags[tag_type].append(text.unescape(text.unquote(tag_name))) - for key, value in tags.items(): - post["tags_" + key] = " ".join(value) - BASE_PATTERN = GelbooruV02Extractor.update({ - "realbooru": { - "root": "https://realbooru.com", - "pattern": r"realbooru\.com", - }, "rule34": { "root": "https://rule34.xxx", "root-api": "https://api.rule34.xxx", |
