aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/gelbooru_v02.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/gelbooru_v02.py')
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py73
1 files changed, 35 insertions, 38 deletions
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 2c1174a..c12a7a2 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,10 +10,7 @@
from . import booru
from .. import text, util, exception
-
-from xml.etree import ElementTree
import collections
-import re
class GelbooruV02Extractor(booru.BooruExtractor):
@@ -24,9 +21,12 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.user_id = self.config("user-id")
self.root_api = self.config_instance("root-api") or self.root
+ if self.category == "rule34":
+ self._file_url = self._file_url_rule34
+
def _api_request(self, params):
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
- return ElementTree.fromstring(self.request(url, params=params).text)
+ return self.request_xml(url, params=params)
def _pagination(self, params):
params["pid"] = self.page_start
@@ -38,7 +38,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
while True:
try:
root = self._api_request(params)
- except ElementTree.ParseError:
+ except SyntaxError: # ElementTree.ParseError
if "tags" not in params or post is None:
raise
taglist = [tag for tag in params["tags"].split()
@@ -50,7 +50,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
if total is None:
try:
- total = int(root.attrib["count"])
+ self.kwdict["total"] = total = int(root.attrib["count"])
+ if "search_tags" in self.kwdict:
+ self.kwdict["search_count"] = total
self.log.debug("%s posts in total", total)
except Exception as exc:
total = 0
@@ -78,7 +80,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start * self.per_page
data = {}
- find_ids = re.compile(r"\sid=\"p(\d+)").findall
+ find_ids = util.re(r"\sid=\"p(\d+)").findall
while True:
page = self.request(url, params=params).text
@@ -92,15 +94,24 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
params["pid"] += self.per_page
- @staticmethod
- def _prepare(post):
+ def _file_url_rule34(self, post):
+ url = post["file_url"]
+
+ if text.ext_from_url(url) not in util.EXTS_VIDEO:
+ path = url.partition(".")[2]
+ post["_fallback"] = (url,)
+ post["file_url"] = url = "https://wimg." + path
+
+ return url
+
+ def _prepare(self, post):
post["tags"] = post["tags"].strip()
post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
def _html(self, post):
- return self.request("{}/index.php?page=post&s=view&id={}".format(
- self.root, post["id"])).text
+ url = f"{self.root}/index.php?page=post&s=view&id={post['id']}"
+ return self.request(url).text
def _tags(self, post, page):
tag_container = (text.extr(page, '<ul id="tag-', '</ul>') or
@@ -109,8 +120,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
tags = collections.defaultdict(list)
- pattern = re.compile(
- r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
+ pattern = util.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():
@@ -166,18 +176,13 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
- def __init__(self, match):
- GelbooruV02Extractor.__init__(self, match)
- tags = match.group(match.lastindex)
- self.tags = text.unquote(tags.replace("+", " "))
-
- def metadata(self):
- return {"search_tags": self.tags}
-
def posts(self):
- if self.tags == "all":
- self.tags = ""
- return self._pagination({"tags": self.tags})
+ self.kwdict["search_tags"] = tags = text.unquote(
+ self.groups[-1].replace("+", " "))
+
+ if tags == "all":
+ tags = ""
+ return self._pagination({"tags": tags})
class GelbooruV02PoolExtractor(GelbooruV02Extractor):
@@ -189,7 +194,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
- self.pool_id = match.group(match.lastindex)
+ self.pool_id = self.groups[-1]
if self.category == "rule34":
self.posts = self._posts_pages
@@ -202,8 +207,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
return num
def metadata(self):
- url = "{}/index.php?page=pool&s=show&id={}".format(
- self.root, self.pool_id)
+ url = f"{self.root}/index.php?page=pool&s=show&id={self.pool_id}"
page = self.request(url).text
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
@@ -239,12 +243,9 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
- def __init__(self, match):
- GelbooruV02Extractor.__init__(self, match)
- self.favorite_id = match.group(match.lastindex)
-
def metadata(self):
- return {"favorite_id": text.parse_int(self.favorite_id)}
+ self.favorite_id = fav_id = self.groups[-1]
+ return {"favorite_id": text.parse_int(fav_id)}
def posts(self):
return self._pagination_html({
@@ -260,9 +261,5 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
- def __init__(self, match):
- GelbooruV02Extractor.__init__(self, match)
- self.post_id = match.group(match.lastindex)
-
def posts(self):
- return self._pagination({"id": self.post_id})
+ return self._pagination({"id": self.groups[-1]})