summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/sankaku.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
-rw-r--r--gallery_dl/extractor/sankaku.py45
1 files changed, 19 insertions, 26 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 3485db9..1c93cbf 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2023 Mike Fährmann
+# Copyright 2014-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,7 +13,6 @@ from .common import Message
from .. import text, util, exception
from ..cache import cache
import collections
-import re
BASE_PATTERN = r"(?:https?://)?" \
r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
@@ -26,7 +25,6 @@ class SankakuExtractor(BooruExtractor):
category = "sankaku"
root = "https://sankaku.app"
filename_fmt = "{category}_{id}_{md5}.{extension}"
- cookies_domain = None
_warning = True
TAG_TYPES = {
@@ -49,7 +47,7 @@ class SankakuExtractor(BooruExtractor):
self.api = SankakuAPI(self)
if self.config("tags") == "extended":
self._tags = self._tags_extended
- self._tags_findall = re.compile(
+ self._tags_findall = util.re(
r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
def _file_url(self, post):
@@ -80,8 +78,7 @@ class SankakuExtractor(BooruExtractor):
def _tags(self, post, page):
tags = collections.defaultdict(list)
for tag in self.api.tags(post["id"]):
- name = tag["name"]
- if name:
+ if name := tag["name"]:
tags[tag["type"]].append(name.lower().replace(" ", "_"))
types = self.TAG_TYPES
for type, values in tags.items():
@@ -92,7 +89,8 @@ class SankakuExtractor(BooruExtractor):
def _tags_extended(self, post, page):
try:
url = "https://chan.sankakucomplex.com/posts/" + post["id"]
- page = self.request(url).text
+ headers = {"Referer": url}
+ page = self.request(url, headers=headers).text
except Exception as exc:
return self.log.warning(
"%s: Failed to extract extended tag categories (%s: %s)",
@@ -126,16 +124,16 @@ class SankakuTagExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
- query = text.parse_query(match.group(1))
+ query = text.parse_query(match[1])
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
if "date:" in self.tags:
# rewrite 'date:' tags (#1790)
- self.tags = re.sub(
- r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)",
+ self.tags = util.re(
+ r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)").sub(
r"date:\3-\2-\1T00:00", self.tags)
- self.tags = re.sub(
- r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)",
+ self.tags = util.re(
+ r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)").sub(
r"date:\1-\2-\3T00:00", self.tags)
def metadata(self):
@@ -156,7 +154,7 @@ class SankakuPoolExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
- self.pool_id = match.group(1)
+ self.pool_id = match[1]
def metadata(self):
pool = self.api.pools(self.pool_id)
@@ -182,7 +180,7 @@ class SankakuPostExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
- self.post_id = match.group(1)
+ self.post_id = match[1]
def posts(self):
return self.api.posts(self.post_id)
@@ -196,14 +194,14 @@ class SankakuBooksExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
- query = text.parse_query(match.group(1))
+ query = text.parse_query(match[1])
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
def items(self):
params = {"tags": self.tags, "pool_type": "0"}
for pool in self.api.pools_keyset(params):
pool["_extractor"] = SankakuPoolExtractor
- url = "https://sankaku.app/books/{}".format(pool["id"])
+ url = f"https://sankaku.app/books/{pool['id']}"
yield Message.Queue, url, pool
@@ -218,19 +216,16 @@ class SankakuAPI():
"Origin" : extractor.root,
}
- if extractor.config("id-format") in ("alnum", "alphanumeric"):
- self.headers["Api-Version"] = "2"
-
self.username, self.password = extractor._get_auth_info()
if not self.username:
self.authenticate = util.noop
def notes(self, post_id):
params = {"lang": "en"}
- return self._call("/posts/{}/notes".format(post_id), params)
+ return self._call(f"/posts/{post_id}/notes", params)
def tags(self, post_id):
- endpoint = "/posts/{}/tags".format(post_id)
+ endpoint = f"/posts/{post_id}/tags"
params = {
"lang" : "en",
"page" : 1,
@@ -312,15 +307,14 @@ class SankakuAPI():
("unauthorized", "invalid-token", "invalid_token")):
_authenticate_impl.invalidate(self.username)
continue
- raise exception.StopExtraction(code)
+ raise exception.AbortExtraction(code)
return data
def _pagination(self, endpoint, params):
params["lang"] = "en"
params["limit"] = str(self.extractor.per_page)
- refresh = self.extractor.config("refresh", False)
- if refresh:
+ if refresh := self.extractor.config("refresh", False):
offset = expires = 0
from time import time
@@ -334,8 +328,7 @@ class SankakuAPI():
for post in posts:
if not expires:
- url = post["file_url"]
- if url:
+ if url := post["file_url"]:
expires = text.parse_int(
text.extr(url, "e=", "&")) - 60