summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/shimmie2.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-12-25 01:27:47 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2023-12-25 01:27:47 -0500
commit4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (patch)
treec66c0b829ed69c7424befddc193eaa51054b1410 /gallery_dl/extractor/shimmie2.py
parent30dee4697019389ef29458b2e3931adc976389b2 (diff)
New upstream version 1.26.5.upstream/1.26.5
Diffstat (limited to 'gallery_dl/extractor/shimmie2.py')
-rw-r--r--gallery_dl/extractor/shimmie2.py41
1 files changed, 30 insertions, 11 deletions
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 912e601..8a08fab 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -41,8 +41,9 @@ class Shimmie2Extractor(BaseExtractor):
for post in self.posts():
- for key in ("id", "width", "height"):
- post[key] = text.parse_int(post[key])
+ post["id"] = text.parse_int(post["id"])
+ post["width"] = text.parse_int(post["width"])
+ post["height"] = text.parse_int(post["height"])
post["tags"] = text.unquote(post["tags"])
post.update(data)
@@ -64,6 +65,13 @@ class Shimmie2Extractor(BaseExtractor):
"""Return an iterable containing data of all relevant posts"""
return ()
+ def _quote_type(self, page):
+ """Return quoting character used in 'page' (' or ")"""
+ try:
+ return page[page.index("<link rel=")+10]
+ except Exception:
+ return "'"
+
INSTANCES = {
"loudbooru": {
@@ -85,6 +93,10 @@ INSTANCES = {
"pattern": r"booru\.cavemanon\.xyz",
"file_url": "{0}/index.php?q=image/{2}.{4}",
},
+ "rule34hentai": {
+ "root": "https://rule34hentai.net",
+ "pattern": r"rule34hentai\.net",
+ },
}
BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
@@ -121,21 +133,26 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
if init:
init = False
- has_mime = ("data-mime='" in page)
- has_pid = ("data-post-id='" in page)
+ quote = self._quote_type(page)
+ has_mime = (" data-mime=" in page)
+ has_pid = (" data-post-id=" in page)
while True:
if has_mime:
- mime = extr("data-mime='", "'")
+ mime = extr(" data-mime="+quote, quote)
if has_pid:
- pid = extr("data-post-id='", "'")
+ pid = extr(" data-post-id="+quote, quote)
else:
- pid = extr("href='/post/view/", "?")
+ pid = extr(" href='/post/view/", quote)
if not pid:
break
- tags, dimensions, size = extr("title='", "'").split(" // ")
+ data = extr("title="+quote, quote).split(" // ")
+ tags = data[0]
+ dimensions = data[1]
+ size = data[2]
+
width, _, height = dimensions.partition("x")
md5 = extr("/_thumbs/", "/")
@@ -200,15 +217,17 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
def posts(self):
url = "{}/post/view/{}".format(self.root, self.post_id)
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+ quote = self._quote_type(page)
post = {
"id" : self.post_id,
"tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
"md5" : extr("/_thumbs/", "/"),
"file_url": self.root + (
- extr("id='main_image' src='", "'") or
- extr("<source src='", "'")).lstrip("."),
+ extr("id={0}main_image{0} src={0}".format(quote), quote) or
+ extr("<source src="+quote, quote)).lstrip("."),
"width" : extr("data-width=", " ").strip("\"'"),
"height" : extr("data-height=", ">").partition(
" ")[0].strip("\"'"),