summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/moebooru.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/moebooru.py')
-rw-r--r--gallery_dl/extractor/moebooru.py55
1 files changed, 26 insertions, 29 deletions
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index 4d63c3e..0ef0a32 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -26,42 +26,39 @@ class MoebooruExtractor(BooruExtractor):
def _prepare(post):
post["date"] = text.parse_timestamp(post["created_at"])
- def _extended_tags(self, post, page=None):
- if not page:
- url = "{}/post/show/{}".format(self.root, post["id"])
- page = self.request(url).text
- html = text.extract(page, '<ul id="tag-', '</ul>')[0]
- if html:
- tags = collections.defaultdict(list)
- pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
- for tag_type, tag_name in pattern.findall(html):
- tags[tag_type].append(text.unquote(tag_name))
- for key, value in tags.items():
- post["tags_" + key] = " ".join(value)
- return page
-
- def _notes(self, post, page=None):
- if not page:
- url = "{}/post/show/{}".format(self.root, post["id"])
- page = self.request(url).text
- notes = []
- notes_container = text.extract(page, 'id="note-container"', "<img ")[0]
- if not notes_container:
+ def _html(self, post):
+ return self.request("{}/post/show/{}".format(
+ self.root, post["id"])).text
+
+ def _tags(self, post, page):
+ tag_container = text.extr(page, '<ul id="tag-', '</ul>')
+ if not tag_container:
return
- for note in notes_container.split('class="note-box"')[1:]:
+ tags = collections.defaultdict(list)
+ pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
+ for tag_type, tag_name in pattern.findall(tag_container):
+ tags[tag_type].append(text.unquote(tag_name))
+ for key, value in tags.items():
+ post["tags_" + key] = " ".join(value)
+
+ def _notes(self, post, page):
+ note_container = text.extr(page, 'id="note-container"', "<img ")
+ if not note_container:
+ return
+
+ post["notes"] = notes = []
+ for note in note_container.split('class="note-box"')[1:]:
extr = text.extract_from(note)
notes.append({
- "width" : int(extr("width: ", "p")),
- "height": int(extr("height: ", "p")),
- "y" : int(extr("top: ", "p")),
- "x" : int(extr("left: ", "p")),
+ "width" : int(extr("width:", "p")),
+ "height": int(extr("height:", "p")),
+ "y" : int(extr("top:", "p")),
+ "x" : int(extr("left:", "p")),
"id" : int(extr('id="note-body-', '"')),
- "body" : text.remove_html(extr('>', "</div>")),
+ "body" : text.unescape(text.remove_html(extr(">", "</div>"))),
})
- post["notes"] = notes
-
def _pagination(self, url, params):
params["page"] = self.page_start
params["limit"] = self.per_page