summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/gelbooru_v02.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-12-08 20:34:33 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2024-12-08 20:34:33 -0500
commitf6877087773089220d68288d055276fca6c556d4 (patch)
treee4847e3bcff284c3daece7b3b9cf308dfc2129ab /gallery_dl/extractor/gelbooru_v02.py
parent1981ccaaea6eab2cf32536ec5afe132a870914d8 (diff)
New upstream version 1.28.1.upstream/1.28.1
Diffstat (limited to 'gallery_dl/extractor/gelbooru_v02.py')
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py64
1 files changed, 5 insertions, 59 deletions
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index aad5752..2c1174a 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -24,10 +24,6 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.user_id = self.config("user-id")
self.root_api = self.config_instance("root-api") or self.root
- if self.category == "realbooru":
- self.items = self._items_realbooru
- self._tags = self._tags_realbooru
-
def _api_request(self, params):
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
@@ -82,16 +78,17 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start * self.per_page
data = {}
+ find_ids = re.compile(r"\sid=\"p(\d+)").findall
+
while True:
- num_ids = 0
page = self.request(url, params=params).text
+ pids = find_ids(page)
- for data["id"] in text.extract_iter(page, '" id="p', '"'):
- num_ids += 1
+ for data["id"] in pids:
for post in self._api_request(data):
yield post.attrib
- if num_ids < self.per_page:
+ if len(pids) < self.per_page:
return
params["pid"] += self.per_page
@@ -136,59 +133,8 @@ class GelbooruV02Extractor(booru.BooruExtractor):
"body" : text.unescape(text.remove_html(extr(">", "</div>"))),
})
- def _file_url_realbooru(self, post):
- url = post["file_url"]
- md5 = post["md5"]
- if md5 not in post["preview_url"] or url.count("/") == 5:
- url = "{}/images/{}/{}/{}.{}".format(
- self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
- return url
-
- def _items_realbooru(self):
- from .common import Message
- data = self.metadata()
-
- for post in self.posts():
- try:
- html = self._html(post)
- fallback = post["file_url"]
- url = post["file_url"] = text.rextract(
- html, 'href="', '"', html.index(">Original<"))[0]
- except Exception:
- self.log.debug("Unable to fetch download URL for post %s "
- "(md5: %s)", post.get("id"), post.get("md5"))
- continue
-
- text.nameext_from_url(url, post)
- post.update(data)
- self._prepare(post)
- self._tags(post, html)
-
- path = url.rpartition("/")[0]
- post["_fallback"] = (
- "{}/{}.{}".format(path, post["md5"], post["extension"]),
- fallback,
- )
-
- yield Message.Directory, post
- yield Message.Url, url, post
-
- def _tags_realbooru(self, post, page):
- tag_container = text.extr(page, 'id="tagLink"', '</div>')
- tags = collections.defaultdict(list)
- pattern = re.compile(
- r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
- for tag_type, tag_name in pattern.findall(tag_container):
- tags[tag_type].append(text.unescape(text.unquote(tag_name)))
- for key, value in tags.items():
- post["tags_" + key] = " ".join(value)
-
BASE_PATTERN = GelbooruV02Extractor.update({
- "realbooru": {
- "root": "https://realbooru.com",
- "pattern": r"realbooru\.com",
- },
"rule34": {
"root": "https://rule34.xxx",
"root-api": "https://api.rule34.xxx",