aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/sankaku.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-05-26 06:46:00 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-05-26 06:46:00 -0400
commit6424318a059207759b9055cf8a8df91c0ddac7c8 (patch)
tree3fb8adec807ad1ffeba4889a506b05e680ca8051 /gallery_dl/extractor/sankaku.py
parent2bef55427baa34bf0f78d52590bbf27b2c5f3a56 (diff)
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
Update upstream source from tag 'upstream/1.29.7'
Update to upstream version '1.29.7' with Debian dir 264267cd1ebd5c7205fe1f137a394d0ae1a2fb3b
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
-rw-r--r--gallery_dl/extractor/sankaku.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index c7303f2..3485db9 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -47,6 +47,10 @@ class SankakuExtractor(BooruExtractor):
def _init(self):
self.api = SankakuAPI(self)
+ if self.config("tags") == "extended":
+ self._tags = self._tags_extended
+ self._tags_findall = re.compile(
+ r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
def _file_url(self, post):
url = post["file_url"]
@@ -85,6 +89,23 @@ class SankakuExtractor(BooruExtractor):
post["tags_" + name] = values
post["tag_string_" + name] = " ".join(values)
+ def _tags_extended(self, post, page):
+ try:
+ url = "https://chan.sankakucomplex.com/posts/" + post["id"]
+ page = self.request(url).text
+ except Exception as exc:
+ return self.log.warning(
+ "%s: Failed to extract extended tag categories (%s: %s)",
+ post["id"], exc.__class__.__name__, exc)
+
+ tags = collections.defaultdict(list)
+ tag_sidebar = text.extr(page, '<ul id="tag-sidebar"', "</ul>")
+ for tag_type, tag_name in self._tags_findall(tag_sidebar):
+ tags[tag_type].append(text.unescape(text.unquote(tag_name)))
+ for type, values in tags.items():
+ post["tags_" + type] = values
+ post["tag_string_" + type] = " ".join(values)
+
def _notes(self, post, page):
if post.get("has_notes"):
post["notes"] = self.api.notes(post["id"])