diff options
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index c7303f2..3485db9 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -47,6 +47,10 @@ class SankakuExtractor(BooruExtractor): def _init(self): self.api = SankakuAPI(self) + if self.config("tags") == "extended": + self._tags = self._tags_extended + self._tags_findall = re.compile( + r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall def _file_url(self, post): url = post["file_url"] @@ -85,6 +89,23 @@ class SankakuExtractor(BooruExtractor): post["tags_" + name] = values post["tag_string_" + name] = " ".join(values) + def _tags_extended(self, post, page): + try: + url = "https://chan.sankakucomplex.com/posts/" + post["id"] + page = self.request(url).text + except Exception as exc: + return self.log.warning( + "%s: Failed to extract extended tag categories (%s: %s)", + post["id"], exc.__class__.__name__, exc) + + tags = collections.defaultdict(list) + tag_sidebar = text.extr(page, '<ul id="tag-sidebar"', "</ul>") + for tag_type, tag_name in self._tags_findall(tag_sidebar): + tags[tag_type].append(text.unescape(text.unquote(tag_name))) + for type, values in tags.items(): + post["tags_" + type] = values + post["tag_string_" + type] = " ".join(values) + def _notes(self, post, page): if post.get("has_notes"): post["notes"] = self.api.notes(post["id"]) |
