1 files changed, 35 insertions, 38 deletions
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 2c1174a..c12a7a2 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2025 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -10,10 +10,7 @@
 
 from . import booru
 from .. import text, util, exception
-
-from xml.etree import ElementTree
 import collections
-import re
 
 
 class GelbooruV02Extractor(booru.BooruExtractor):
@@ -24,9 +21,12 @@ class GelbooruV02Extractor(booru.BooruExtractor):
         self.user_id = self.config("user-id")
         self.root_api = self.config_instance("root-api") or self.root
 
+        if self.category == "rule34":
+            self._file_url = self._file_url_rule34
+
     def _api_request(self, params):
         url = self.root_api + "/index.php?page=dapi&s=post&q=index"
-        return ElementTree.fromstring(self.request(url, params=params).text)
+        return self.request_xml(url, params=params)
 
     def _pagination(self, params):
         params["pid"] = self.page_start
@@ -38,7 +38,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
         while True:
             try:
                 root = self._api_request(params)
-            except ElementTree.ParseError:
+            except SyntaxError:  # ElementTree.ParseError
                 if "tags" not in params or post is None:
                     raise
                 taglist = [tag for tag in params["tags"].split()
@@ -50,7 +50,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
 
             if total is None:
                 try:
-                    total = int(root.attrib["count"])
+                    self.kwdict["total"] = total = int(root.attrib["count"])
+                    if "search_tags" in self.kwdict:
+                        self.kwdict["search_count"] = total
                     self.log.debug("%s posts in total", total)
                 except Exception as exc:
                     total = 0
@@ -78,7 +80,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
         params["pid"] = self.page_start * self.per_page
 
         data = {}
-        find_ids = re.compile(r"\sid=\"p(\d+)").findall
+        find_ids = util.re(r"\sid=\"p(\d+)").findall
 
         while True:
             page = self.request(url, params=params).text
@@ -92,15 +94,24 @@ class GelbooruV02Extractor(booru.BooruExtractor):
                 return
             params["pid"] += self.per_page
 
-    @staticmethod
-    def _prepare(post):
+    def _file_url_rule34(self, post):
+        url = post["file_url"]
+
+        if text.ext_from_url(url) not in util.EXTS_VIDEO:
+            path = url.partition(".")[2]
+            post["_fallback"] = (url,)
+            post["file_url"] = url = "https://wimg." + path
+
+        return url
+
+    def _prepare(self, post):
         post["tags"] = post["tags"].strip()
         post["date"] = text.parse_datetime(
             post["created_at"], "%a %b %d %H:%M:%S %z %Y")
 
     def _html(self, post):
-        return self.request("{}/index.php?page=post&s=view&id={}".format(
-            self.root, post["id"])).text
+        url = f"{self.root}/index.php?page=post&s=view&id={post['id']}"
+        return self.request(url).text
 
     def _tags(self, post, page):
         tag_container = (text.extr(page, '<ul id="tag-', '</ul>') or
@@ -109,8 +120,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
             return
 
         tags = collections.defaultdict(list)
-        pattern = re.compile(
-            r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
+        pattern = util.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
         for tag_type, tag_name in pattern.findall(tag_container):
             tags[tag_type].append(text.unescape(text.unquote(tag_name)))
         for key, value in tags.items():
@@ -166,18 +176,13 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
     pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
     example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
 
-    def __init__(self, match):
-        GelbooruV02Extractor.__init__(self, match)
-        tags = match.group(match.lastindex)
-        self.tags = text.unquote(tags.replace("+", " "))
-
-    def metadata(self):
-        return {"search_tags": self.tags}
-
     def posts(self):
-        if self.tags == "all":
-            self.tags = ""
-        return self._pagination({"tags": self.tags})
+        self.kwdict["search_tags"] = tags = text.unquote(
+            self.groups[-1].replace("+", " "))
+
+        if tags == "all":
+            tags = ""
+        return self._pagination({"tags": tags})
 
 
 class GelbooruV02PoolExtractor(GelbooruV02Extractor):
@@ -189,7 +194,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
 
     def __init__(self, match):
         GelbooruV02Extractor.__init__(self, match)
-        self.pool_id = match.group(match.lastindex)
+        self.pool_id = self.groups[-1]
 
         if self.category == "rule34":
             self.posts = self._posts_pages
@@ -202,8 +207,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
         return num
 
     def metadata(self):
-        url = "{}/index.php?page=pool&s=show&id={}".format(
-            self.root, self.pool_id)
+        url = f"{self.root}/index.php?page=pool&s=show&id={self.pool_id}"
         page = self.request(url).text
 
         name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
@@ -239,12 +243,9 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
     pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
     example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
 
-    def __init__(self, match):
-        GelbooruV02Extractor.__init__(self, match)
-        self.favorite_id = match.group(match.lastindex)
-
     def metadata(self):
-        return {"favorite_id": text.parse_int(self.favorite_id)}
+        self.favorite_id = fav_id = self.groups[-1]
+        return {"favorite_id": text.parse_int(fav_id)}
 
     def posts(self):
         return self._pagination_html({
@@ -260,9 +261,5 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
     pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
     example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
 
-    def __init__(self, match):
-        GelbooruV02Extractor.__init__(self, match)
-        self.post_id = match.group(match.lastindex)
-
     def posts(self):
-        return self._pagination({"id": self.post_id})
+        return self._pagination({"id": self.groups[-1]})