diff options
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 45 |
1 files changed, 19 insertions, 26 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 3485db9..1c93cbf 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2023 Mike Fährmann +# Copyright 2014-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,7 +13,6 @@ from .common import Message from .. import text, util, exception from ..cache import cache import collections -import re BASE_PATTERN = r"(?:https?://)?" \ r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \ @@ -26,7 +25,6 @@ class SankakuExtractor(BooruExtractor): category = "sankaku" root = "https://sankaku.app" filename_fmt = "{category}_{id}_{md5}.{extension}" - cookies_domain = None _warning = True TAG_TYPES = { @@ -49,7 +47,7 @@ class SankakuExtractor(BooruExtractor): self.api = SankakuAPI(self) if self.config("tags") == "extended": self._tags = self._tags_extended - self._tags_findall = re.compile( + self._tags_findall = util.re( r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall def _file_url(self, post): @@ -80,8 +78,7 @@ class SankakuExtractor(BooruExtractor): def _tags(self, post, page): tags = collections.defaultdict(list) for tag in self.api.tags(post["id"]): - name = tag["name"] - if name: + if name := tag["name"]: tags[tag["type"]].append(name.lower().replace(" ", "_")) types = self.TAG_TYPES for type, values in tags.items(): @@ -92,7 +89,8 @@ class SankakuExtractor(BooruExtractor): def _tags_extended(self, post, page): try: url = "https://chan.sankakucomplex.com/posts/" + post["id"] - page = self.request(url).text + headers = {"Referer": url} + page = self.request(url, headers=headers).text except Exception as exc: return self.log.warning( "%s: Failed to extract extended tag categories (%s: %s)", @@ -126,16 +124,16 @@ class SankakuTagExtractor(SankakuExtractor): def __init__(self, match): SankakuExtractor.__init__(self, match) - query = text.parse_query(match.group(1)) + query = text.parse_query(match[1]) self.tags = text.unquote(query.get("tags", "").replace("+", " ")) if "date:" in self.tags: # rewrite 'date:' tags (#1790) - self.tags = re.sub( - r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)", + self.tags = util.re( + r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)").sub( r"date:\3-\2-\1T00:00", self.tags) - self.tags = re.sub( - r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)", + self.tags = util.re( + r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)").sub( r"date:\1-\2-\3T00:00", self.tags) def metadata(self): @@ -156,7 +154,7 @@ class SankakuPoolExtractor(SankakuExtractor): def __init__(self, match): SankakuExtractor.__init__(self, match) - self.pool_id = match.group(1) + self.pool_id = match[1] def metadata(self): pool = self.api.pools(self.pool_id) @@ -182,7 +180,7 @@ class SankakuPostExtractor(SankakuExtractor): def __init__(self, match): SankakuExtractor.__init__(self, match) - self.post_id = match.group(1) + self.post_id = match[1] def posts(self): return self.api.posts(self.post_id) @@ -196,14 +194,14 @@ class SankakuBooksExtractor(SankakuExtractor): def __init__(self, match): SankakuExtractor.__init__(self, match) - query = text.parse_query(match.group(1)) + query = text.parse_query(match[1]) self.tags = text.unquote(query.get("tags", "").replace("+", " ")) def items(self): params = {"tags": self.tags, "pool_type": "0"} for pool in self.api.pools_keyset(params): pool["_extractor"] = SankakuPoolExtractor - url = "https://sankaku.app/books/{}".format(pool["id"]) + url = f"https://sankaku.app/books/{pool['id']}" yield Message.Queue, url, pool @@ -218,19 +216,16 @@ class SankakuAPI(): "Origin" : extractor.root, } - if extractor.config("id-format") in ("alnum", "alphanumeric"): - self.headers["Api-Version"] = "2" - self.username, self.password = extractor._get_auth_info() if not self.username: self.authenticate = util.noop def notes(self, post_id): params = {"lang": "en"} - return self._call("/posts/{}/notes".format(post_id), params) + return self._call(f"/posts/{post_id}/notes", params) def tags(self, post_id): - endpoint = "/posts/{}/tags".format(post_id) + endpoint = f"/posts/{post_id}/tags" params = { "lang" : "en", "page" : 1, @@ -312,15 +307,14 @@ class SankakuAPI(): ("unauthorized", "invalid-token", "invalid_token")): _authenticate_impl.invalidate(self.username) continue - raise exception.StopExtraction(code) + raise exception.AbortExtraction(code) return data def _pagination(self, endpoint, params): params["lang"] = "en" params["limit"] = str(self.extractor.per_page) - refresh = self.extractor.config("refresh", False) - if refresh: + if refresh := self.extractor.config("refresh", False): offset = expires = 0 from time import time @@ -334,8 +328,7 @@ class SankakuAPI(): for post in posts: if not expires: - url = post["file_url"] - if url: + if url := post["file_url"]: expires = text.parse_int( text.extr(url, "e=", "&")) - 60 |
