diff options
| author | 2022-11-22 04:28:38 -0500 | |
|---|---|---|
| committer | 2022-11-22 04:28:38 -0500 | |
| commit | 7af5cc29d1c02d20a6890b7b7ba78ab41532a763 (patch) | |
| tree | 4f0366e5653074c7eb31ac7ca59a1ee55f2d736e /gallery_dl/extractor/gelbooru_v02.py | |
| parent | e59d46ecda74190381b1d2725b0bd9df5c0be8d8 (diff) | |
New upstream version 1.24.0.upstream/1.24.0
Diffstat (limited to 'gallery_dl/extractor/gelbooru_v02.py')
| -rw-r--r-- | gallery_dl/extractor/gelbooru_v02.py | 182 |
1 files changed, 127 insertions, 55 deletions
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 8214614..da87b8f 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -31,6 +31,7 @@ class GelbooruV02Extractor(booru.BooruExtractor): if self.category == "realbooru": self._file_url = self._file_url_realbooru + self._tags = self._tags_realbooru def _api_request(self, params): url = self.api_root + "/index.php?page=dapi&s=post&q=index" @@ -85,55 +86,58 @@ class GelbooruV02Extractor(booru.BooruExtractor): post["date"] = text.parse_datetime( post["created_at"], "%a %b %d %H:%M:%S %z %Y") + def _html(self, post): + return self.request("{}/index.php?page=post&s=view&id={}".format( + self.root, post["id"])).text + + def _tags(self, post, page): + tag_container = (text.extr(page, '<ul id="tag-', '</ul>') or + text.extr(page, '<ul class="tag-', '</ul>')) + if not tag_container: + return + + tags = collections.defaultdict(list) + pattern = re.compile( + r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S) + for tag_type, tag_name in pattern.findall(tag_container): + tags[tag_type].append(text.unquote(tag_name)) + for key, value in tags.items(): + post["tags_" + key] = " ".join(value) + + def _notes(self, post, page): + note_container = text.extr(page, 'id="note-container"', "<img ") + if not note_container: + return + + post["notes"] = notes = [] + for note in note_container.split('class="note-box"')[1:]: + extr = text.extract_from(note) + notes.append({ + "width" : int(extr("width:", "p")), + "height": int(extr("height:", "p")), + "y" : int(extr("top:", "p")), + "x" : int(extr("left:", "p")), + "id" : int(extr('id="note-body-', '"')), + "body" : text.unescape(text.remove_html(extr(">", "</div>"))), + }) + def _file_url_realbooru(self, post): url = post["file_url"] - if url.count("/") == 5: - md5 = post["md5"] + md5 = post["md5"] + if md5 not in post["preview_url"] or url.count("/") == 5: url = "{}/images/{}/{}/{}.{}".format( self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2]) return url - def _extended_tags(self, post, page=None): - if not page: - url = "{}/index.php?page=post&s=view&id={}".format( - self.root, post["id"]) - page = self.request(url).text - html = text.extract(page, '<ul id="tag-', '</ul>')[0] - if not html: - html = text.extract(page, '<ul class="tag-', '</ul>')[0] - if html: - tags = collections.defaultdict(list) - pattern = re.compile( - r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S) - for tag_type, tag_name in pattern.findall(html): - tags[tag_type].append(text.unquote(tag_name)) - for key, value in tags.items(): - post["tags_" + key] = " ".join(value) - return page - - def _notes(self, post, page=None): - if not page: - url = "{}/index.php?page=post&s=view&id={}".format( - self.root, post["id"]) - page = self.request(url).text - notes = [] - notes_data = text.extract(page, '<section id="notes"', '</section>')[0] - if not notes_data: - return - - note_iter = text.extract_iter(notes_data, '<article', '</article>') - extr = text.extract - for note_data in note_iter: - note = { - "width": int(extr(note_data, 'data-width="', '"')[0]), - "height": int(extr(note_data, 'data-height="', '"')[0]), - "x": int(extr(note_data, 'data-x="', '"')[0]), - "y": int(extr(note_data, 'data-y="', '"')[0]), - "body": extr(note_data, 'data-body="', '"')[0], - } - notes.append(note) - - post["notes"] = notes + def _tags_realbooru(self, post, page): + tag_container = text.extr(page, 'id="tagLink"', '</div>') + tags = collections.defaultdict(list) + pattern = re.compile( + r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)') + for tag_type, tag_name in pattern.findall(tag_container): + tags[tag_type].append(text.unquote(tag_name)) + for key, value in tags.items(): + post["tags_" + key] = " ".join(value) INSTANCES = { @@ -310,15 +314,81 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor): archive_fmt = "{id}" pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" test = ( - ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", { - "content": "97e4bbf86c3860be18de384d02d544251afe1d45", - "options": (("tags", True),), + ("https://rule34.xxx/index.php?page=post&s=view&id=863", { + "pattern": r"https://api-cdn\.rule34\.xxx/images" + r"/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3\.jpg", + "content": ("a43f418aa350039af0d11cae501396a33bbe2201", + "67b516295950867e1c1ab6bc13b35d3b762ed2a3"), + "options": (("tags", True), ("notes", True)), "keyword": { - "tags_artist": "danraku", - "tags_character": "kashima_(kantai_collection)", - "tags_copyright": "kantai_collection", + "tags_artist": "reverse_noise yamu_(reverse_noise)", + "tags_character": "hong_meiling", + "tags_copyright": "touhou", "tags_general": str, - "tags_metadata": str, + "tags_metadata": "censored translated", + "notes": [ + { + "body": "It feels angry, I'm losing myself... " + "It won't calm down!", + "height": 65, + "id": 93586, + "width": 116, + "x": 22, + "y": 333, + }, + { + "body": "REPUTATION OF RAGE", + "height": 272, + "id": 93587, + "width": 199, + "x": 78, + "y": 442, + }, + ], + + }, + }), + ("https://hypnohub.net/index.php?page=post&s=view&id=1439", { + "pattern": r"https://hypnohub\.net/images" + r"/90/24/90245c3c5250c2a8173255d3923a010b\.jpg", + "content": "5987c5d2354f22e5fa9b7ee7ce4a6f7beb8b2b71", + "options": (("tags", True), ("notes", True)), + "keyword": { + "tags_artist": "brokenteapot", + "tags_character": "hsien-ko", + "tags_copyright": "capcom darkstalkers", + "tags_general": str, + "tags_metadata": "dialogue text translated", + "notes": [ + { + "body": "Master Master Master " + "Master Master Master", + "height": 83, + "id": 10577, + "width": 129, + "x": 259, + "y": 20, + }, + { + "body": "Response Response Response " + "Response Response Response", + "height": 86, + "id": 10578, + "width": 125, + "x": 126, + "y": 20, + }, + { + "body": "Obedience Obedience Obedience " + "Obedience Obedience Obedience", + "height": 80, + "id": 10579, + "width": 98, + "x": 20, + "y": 20, + }, + ], + }, }), ("https://safebooru.org/index.php?page=post&s=view&id=1169132", { @@ -336,16 +406,18 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor): "pattern": r"https://realbooru\.com/images/dc/b5" r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg", "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da", + "options": (("tags", True),), + "keyword": { + "tags_general": "1girl blonde blonde_hair blue_eyes cute " + "female female_only looking_at_viewer smile " + "solo solo_female teeth", + "tags_model": "jennifer_lawrence", + }, }), ("https://tbib.org/index.php?page=post&s=view&id=9233957", { "url": "5a6ebe07bfff8e6d27f7c30b5480f27abcb577d2", "content": "1c3831b6fbaa4686e3c79035b5d98460b1c85c43", }), - ("https://hypnohub.net/index.php?page=post&s=view&id=73964", { - "pattern": r"https://hypnohub\.net/images/7a/37" - r"/7a37c0ba372f35767fb10c904a398831\.png", - "content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee", - }), ) def __init__(self, match): |
