aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/sankaku.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
-rw-r--r--gallery_dl/extractor/sankaku.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index c7303f2..3485db9 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -47,6 +47,10 @@ class SankakuExtractor(BooruExtractor):
def _init(self):
self.api = SankakuAPI(self)
+ if self.config("tags") == "extended":
+ self._tags = self._tags_extended
+ self._tags_findall = re.compile(
+ r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
def _file_url(self, post):
url = post["file_url"]
@@ -85,6 +89,23 @@ class SankakuExtractor(BooruExtractor):
post["tags_" + name] = values
post["tag_string_" + name] = " ".join(values)
+ def _tags_extended(self, post, page):
+ try:
+ url = "https://chan.sankakucomplex.com/posts/" + post["id"]
+ page = self.request(url).text
+ except Exception as exc:
+ return self.log.warning(
+ "%s: Failed to extract extended tag categories (%s: %s)",
+ post["id"], exc.__class__.__name__, exc)
+
+ tags = collections.defaultdict(list)
+ tag_sidebar = text.extr(page, '<ul id="tag-sidebar"', "</ul>")
+ for tag_type, tag_name in self._tags_findall(tag_sidebar):
+ tags[tag_type].append(text.unescape(text.unquote(tag_name)))
+ for type, values in tags.items():
+ post["tags_" + type] = values
+ post["tag_string_" + type] = " ".join(values)
+
def _notes(self, post, page):
if post.get("has_notes"):
post["notes"] = self.api.notes(post["id"])