diff options
| author | 2025-05-26 06:46:00 -0400 | |
|---|---|---|
| committer | 2025-05-26 06:46:00 -0400 | |
| commit | 6424318a059207759b9055cf8a8df91c0ddac7c8 (patch) | |
| tree | 3fb8adec807ad1ffeba4889a506b05e680ca8051 /gallery_dl/extractor/sankaku.py | |
| parent | 2bef55427baa34bf0f78d52590bbf27b2c5f3a56 (diff) | |
| parent | 7672a750cb74bf31e21d76aad2776367fd476155 (diff) | |
Update upstream source from tag 'upstream/1.29.7'
Update to upstream version '1.29.7'
with Debian dir 264267cd1ebd5c7205fe1f137a394d0ae1a2fb3b
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index c7303f2..3485db9 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -47,6 +47,10 @@ class SankakuExtractor(BooruExtractor): def _init(self): self.api = SankakuAPI(self) + if self.config("tags") == "extended": + self._tags = self._tags_extended + self._tags_findall = re.compile( + r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall def _file_url(self, post): url = post["file_url"] @@ -85,6 +89,23 @@ class SankakuExtractor(BooruExtractor): post["tags_" + name] = values post["tag_string_" + name] = " ".join(values) + def _tags_extended(self, post, page): + try: + url = "https://chan.sankakucomplex.com/posts/" + post["id"] + page = self.request(url).text + except Exception as exc: + return self.log.warning( + "%s: Failed to extract extended tag categories (%s: %s)", + post["id"], exc.__class__.__name__, exc) + + tags = collections.defaultdict(list) + tag_sidebar = text.extr(page, '<ul id="tag-sidebar"', "</ul>") + for tag_type, tag_name in self._tags_findall(tag_sidebar): + tags[tag_type].append(text.unescape(text.unquote(tag_name))) + for type, values in tags.items(): + post["tags_" + type] = values + post["tag_string_" + type] = " ".join(values) + def _notes(self, post, page): if post.get("has_notes"): post["notes"] = self.api.notes(post["id"]) |
