diff options
Diffstat (limited to 'gallery_dl/extractor/shimmie2.py')
| -rw-r--r-- | gallery_dl/extractor/shimmie2.py | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py index 912e601..8a08fab 100644 --- a/gallery_dl/extractor/shimmie2.py +++ b/gallery_dl/extractor/shimmie2.py @@ -41,8 +41,9 @@ class Shimmie2Extractor(BaseExtractor): for post in self.posts(): - for key in ("id", "width", "height"): - post[key] = text.parse_int(post[key]) + post["id"] = text.parse_int(post["id"]) + post["width"] = text.parse_int(post["width"]) + post["height"] = text.parse_int(post["height"]) post["tags"] = text.unquote(post["tags"]) post.update(data) @@ -64,6 +65,13 @@ class Shimmie2Extractor(BaseExtractor): """Return an iterable containing data of all relevant posts""" return () + def _quote_type(self, page): + """Return quoting character used in 'page' (' or ")""" + try: + return page[page.index("<link rel=")+10] + except Exception: + return "'" + INSTANCES = { "loudbooru": { @@ -85,6 +93,10 @@ INSTANCES = { "pattern": r"booru\.cavemanon\.xyz", "file_url": "{0}/index.php?q=image/{2}.{4}", }, + "rule34hentai": { + "root": "https://rule34hentai.net", + "pattern": r"rule34hentai\.net", + }, } BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?" @@ -121,21 +133,26 @@ class Shimmie2TagExtractor(Shimmie2Extractor): if init: init = False - has_mime = ("data-mime='" in page) - has_pid = ("data-post-id='" in page) + quote = self._quote_type(page) + has_mime = (" data-mime=" in page) + has_pid = (" data-post-id=" in page) while True: if has_mime: - mime = extr("data-mime='", "'") + mime = extr(" data-mime="+quote, quote) if has_pid: - pid = extr("data-post-id='", "'") + pid = extr(" data-post-id="+quote, quote) else: - pid = extr("href='/post/view/", "?") + pid = extr(" href='/post/view/", quote) if not pid: break - tags, dimensions, size = extr("title='", "'").split(" // ") + data = extr("title="+quote, quote).split(" // ") + tags = data[0] + dimensions = data[1] + size = data[2] + width, _, height = dimensions.partition("x") md5 = extr("/_thumbs/", "/") @@ -200,15 +217,17 @@ class Shimmie2PostExtractor(Shimmie2Extractor): def posts(self): url = "{}/post/view/{}".format(self.root, self.post_id) - extr = text.extract_from(self.request(url).text) + page = self.request(url).text + extr = text.extract_from(page) + quote = self._quote_type(page) post = { "id" : self.post_id, "tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"), "md5" : extr("/_thumbs/", "/"), "file_url": self.root + ( - extr("id='main_image' src='", "'") or - extr("<source src='", "'")).lstrip("."), + extr("id={0}main_image{0} src={0}".format(quote), quote) or + extr("<source src="+quote, quote)).lstrip("."), "width" : extr("data-width=", " ").strip("\"'"), "height" : extr("data-height=", ">").partition( " ")[0].strip("\"'"), |
