New upstream version 1.26.5.upstream/1.26.5

author: Unit 193 <unit193@unit193.net> 2023-12-25 01:27:47 -0500
committer: Unit 193 <unit193@unit193.net> 2023-12-25 01:27:47 -0500
commit: 4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (patch)
tree: c66c0b829ed69c7424befddc193eaa51054b1410 /gallery_dl/extractor/shimmie2.py
parent: 30dee4697019389ef29458b2e3931adc976389b2 (diff)
1 files changed, 30 insertions, 11 deletions
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 912e601..8a08fab 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -41,8 +41,9 @@ class Shimmie2Extractor(BaseExtractor):
 
         for post in self.posts():
 
-            for key in ("id", "width", "height"):
-                post[key] = text.parse_int(post[key])
+            post["id"] = text.parse_int(post["id"])
+            post["width"] = text.parse_int(post["width"])
+            post["height"] = text.parse_int(post["height"])
             post["tags"] = text.unquote(post["tags"])
             post.update(data)
 
@@ -64,6 +65,13 @@ class Shimmie2Extractor(BaseExtractor):
         """Return an iterable containing data of all relevant posts"""
         return ()
 
+    def _quote_type(self, page):
+        """Return quoting character used in 'page' (' or ")"""
+        try:
+            return page[page.index("<link rel=")+10]
+        except Exception:
+            return "'"
+
 
 INSTANCES = {
     "loudbooru": {
@@ -85,6 +93,10 @@ INSTANCES = {
         "pattern": r"booru\.cavemanon\.xyz",
         "file_url": "{0}/index.php?q=image/{2}.{4}",
     },
+    "rule34hentai": {
+        "root": "https://rule34hentai.net",
+        "pattern": r"rule34hentai\.net",
+    },
 }
 
 BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
@@ -121,21 +133,26 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
 
             if init:
                 init = False
-                has_mime = ("data-mime='" in page)
-                has_pid = ("data-post-id='" in page)
+                quote = self._quote_type(page)
+                has_mime = (" data-mime=" in page)
+                has_pid = (" data-post-id=" in page)
 
             while True:
                 if has_mime:
-                    mime = extr("data-mime='", "'")
+                    mime = extr(" data-mime="+quote, quote)
                 if has_pid:
-                    pid = extr("data-post-id='", "'")
+                    pid = extr(" data-post-id="+quote, quote)
                 else:
-                    pid = extr("href='/post/view/", "?")
+                    pid = extr(" href='/post/view/", quote)
 
                 if not pid:
                     break
 
-                tags, dimensions, size = extr("title='", "'").split(" // ")
+                data = extr("title="+quote, quote).split(" // ")
+                tags = data[0]
+                dimensions = data[1]
+                size = data[2]
+
                 width, _, height = dimensions.partition("x")
                 md5 = extr("/_thumbs/", "/")
 
@@ -200,15 +217,17 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
 
     def posts(self):
         url = "{}/post/view/{}".format(self.root, self.post_id)
-        extr = text.extract_from(self.request(url).text)
+        page = self.request(url).text
+        extr = text.extract_from(page)
+        quote = self._quote_type(page)
 
         post = {
             "id"      : self.post_id,
             "tags"    : extr(": ", "<").partition(" - ")[0].rstrip(")"),
             "md5"     : extr("/_thumbs/", "/"),
             "file_url": self.root + (
-                extr("id='main_image' src='", "'") or
-                extr("<source src='", "'")).lstrip("."),
+                extr("id={0}main_image{0} src={0}".format(quote), quote) or
+                extr("<source src="+quote, quote)).lstrip("."),
             "width"   : extr("data-width=", " ").strip("\"'"),
             "height"  : extr("data-height=", ">").partition(
                 " ")[0].strip("\"'"),
author	Unit 193 <unit193@unit193.net>	2023-12-25 01:27:47 -0500
committer	Unit 193 <unit193@unit193.net>	2023-12-25 01:27:47 -0500
commit	4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (patch)
tree	c66c0b829ed69c7424befddc193eaa51054b1410 /gallery_dl/extractor/shimmie2.py
parent	30dee4697019389ef29458b2e3931adc976389b2 (diff)