diff options
Diffstat (limited to 'gallery_dl/extractor/gelbooru_v02.py')
| -rw-r--r-- | gallery_dl/extractor/gelbooru_v02.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 51fb478..1b877b3 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -47,6 +47,8 @@ class GelbooruV02Extractor(booru.BooruExtractor): self.root, post["id"]) page = self.request(url).text html = text.extract(page, '<ul id="tag-', '</ul>')[0] + if not html: + html = text.extract(page, '<ul class="tag-', '</ul>')[0] if html: tags = collections.defaultdict(list) pattern = re.compile( @@ -55,6 +57,31 @@ class GelbooruV02Extractor(booru.BooruExtractor): tags[tag_type].append(text.unquote(tag_name)) for key, value in tags.items(): post["tags_" + key] = " ".join(value) + return page + + def _notes(self, post, page=None): + if not page: + url = "{}/index.php?page=post&s=view&id={}".format( + self.root, post["id"]) + page = self.request(url).text + notes = [] + notes_data = text.extract(page, '<section id="notes"', '</section>')[0] + if not notes_data: + return + + note_iter = text.extract_iter(notes_data, '<article', '</article>') + extr = text.extract + for note_data in note_iter: + note = { + "width": int(extr(note_data, 'data-width="', '"')[0]), + "height": int(extr(note_data, 'data-height="', '"')[0]), + "x": int(extr(note_data, 'data-x="', '"')[0]), + "y": int(extr(note_data, 'data-y="', '"')[0]), + "body": extr(note_data, 'data-body="', '"')[0], + } + notes.append(note) + + post["notes"] = notes BASE_PATTERN = GelbooruV02Extractor.update({ |
