diff options
Diffstat (limited to 'gallery_dl/extractor/moebooru.py')
| -rw-r--r-- | gallery_dl/extractor/moebooru.py | 55 |
1 files changed, 26 insertions, 29 deletions
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index 4d63c3e..0ef0a32 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -26,42 +26,39 @@ class MoebooruExtractor(BooruExtractor): def _prepare(post): post["date"] = text.parse_timestamp(post["created_at"]) - def _extended_tags(self, post, page=None): - if not page: - url = "{}/post/show/{}".format(self.root, post["id"]) - page = self.request(url).text - html = text.extract(page, '<ul id="tag-', '</ul>')[0] - if html: - tags = collections.defaultdict(list) - pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)") - for tag_type, tag_name in pattern.findall(html): - tags[tag_type].append(text.unquote(tag_name)) - for key, value in tags.items(): - post["tags_" + key] = " ".join(value) - return page - - def _notes(self, post, page=None): - if not page: - url = "{}/post/show/{}".format(self.root, post["id"]) - page = self.request(url).text - notes = [] - notes_container = text.extract(page, 'id="note-container"', "<img ")[0] - if not notes_container: + def _html(self, post): + return self.request("{}/post/show/{}".format( + self.root, post["id"])).text + + def _tags(self, post, page): + tag_container = text.extr(page, '<ul id="tag-', '</ul>') + if not tag_container: return - for note in notes_container.split('class="note-box"')[1:]: + tags = collections.defaultdict(list) + pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)") + for tag_type, tag_name in pattern.findall(tag_container): + tags[tag_type].append(text.unquote(tag_name)) + for key, value in tags.items(): + post["tags_" + key] = " ".join(value) + + def _notes(self, post, page): + note_container = text.extr(page, 'id="note-container"', "<img ") + if not note_container: + return + + post["notes"] = notes = [] + for note in note_container.split('class="note-box"')[1:]: extr = text.extract_from(note) notes.append({ - "width" : int(extr("width: ", "p")), - "height": int(extr("height: ", "p")), - "y" : int(extr("top: ", "p")), - "x" : int(extr("left: ", "p")), + "width" : int(extr("width:", "p")), + "height": int(extr("height:", "p")), + "y" : int(extr("top:", "p")), + "x" : int(extr("left:", "p")), "id" : int(extr('id="note-body-', '"')), - "body" : text.remove_html(extr('>', "</div>")), + "body" : text.unescape(text.remove_html(extr(">", "</div>"))), }) - post["notes"] = notes - def _pagination(self, url, params): params["page"] = self.page_start params["limit"] = self.per_page |
