summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/sankaku.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/sankaku.py')
-rw-r--r--gallery_dl/extractor/sankaku.py210
1 files changed, 164 insertions, 46 deletions
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 438dd9f..9e64eac 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -6,13 +6,15 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://chan.sankakucomplex.com/"""
+"""Extractors for https://sankaku.app/"""
from .booru import BooruExtractor
from .. import text, exception
+from ..cache import cache
import collections
-BASE_PATTERN = r"(?:https?://)?(?:beta|chan)\.sankakucomplex\.com"
+BASE_PATTERN = r"(?:https?://)?" \
+ r"(?:sankaku\.app|(?:beta|chan)\.sankakucomplex\.com)"
class SankakuExtractor(BooruExtractor):
@@ -20,8 +22,8 @@ class SankakuExtractor(BooruExtractor):
basecategory = "booru"
category = "sankaku"
filename_fmt = "{category}_{id}_{md5}.{extension}"
- request_interval_min = 1.0
- per_page = 100
+ cookiedomain = None
+ _warning = True
TAG_TYPES = {
0: "general",
@@ -36,17 +38,24 @@ class SankakuExtractor(BooruExtractor):
9: "meta",
}
- def _prepare_post(self, post, extended_tags=False):
+ def skip(self, num):
+ return 0
+
+ def _file_url(self, post):
url = post["file_url"]
- if url[0] == "/":
- url = self.root + url
- if extended_tags:
- self._fetch_extended_tags(post)
- post["date"] = text.parse_timestamp(post["created_at"]["s"])
- post["tags"] = [tag["name"] for tag in post["tags"]]
+ if not url and self._warning:
+ self.log.warning(
+ "Login required to download 'contentious_content' posts")
+ SankakuExtractor._warning = False
return url
- def _fetch_extended_tags(self, post):
+ @staticmethod
+ def _prepare(post):
+ post["created_at"] = post["created_at"]["s"]
+ post["date"] = text.parse_timestamp(post["created_at"])
+ post["tags"] = [tag["name"] for tag in post["tags"]]
+
+ def _extended_tags(self, post):
tags = collections.defaultdict(list)
types = self.TAG_TYPES
for tag in post["tags"]:
@@ -54,44 +63,21 @@ class SankakuExtractor(BooruExtractor):
for key, value in tags.items():
post["tags_" + key] = value
- def _api_request(self, endpoint, params=None):
- url = "https://capi-v2.sankakucomplex.com" + endpoint
- while True:
- response = self.request(url, params=params, fatal=False)
- if response.status_code == 429:
- self.wait(until=response.headers.get("X-RateLimit-Reset"))
- continue
- return response.json()
-
- def _pagination(self, params):
- params["lang"] = "en"
- params["limit"] = str(self.per_page)
-
- while True:
- data = self._api_request("/posts/keyset", params)
- if not data.get("success", True):
- raise exception.StopExtraction(data.get("code"))
- yield from data["data"]
-
- params["next"] = data["meta"]["next"]
- if not params["next"]:
- return
- if "page" in params:
- del params["page"]
-
class SankakuTagExtractor(SankakuExtractor):
- """Extractor for images from chan.sankakucomplex.com by search-tags"""
+ """Extractor for images from sankaku.app by search-tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/\?([^#]*)"
test = (
- ("https://beta.sankakucomplex.com/?tags=bonocho", {
+ ("https://sankaku.app/?tags=bonocho", {
"count": 5,
"pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
+ ("https://beta.sankakucomplex.com/?tags=bonocho"),
+ ("https://chan.sankakucomplex.com/?tags=bonocho"),
# error on five or more tags
("https://chan.sankakucomplex.com/?tags=bonocho+a+b+c+d", {
"options": (("username", None),),
@@ -111,19 +97,21 @@ class SankakuTagExtractor(SankakuExtractor):
return {"search_tags": self.tags}
def posts(self):
- return self._pagination({"tags": self.tags})
+ params = {"tags": self.tags}
+ return SankakuAPI(self).posts_keyset(params)
class SankakuPoolExtractor(SankakuExtractor):
- """Extractor for image pools or books from chan.sankakucomplex.com"""
+ """Extractor for image pools or books from sankaku.app"""
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/(?:books|pool/show)/(\d+)"
test = (
- ("https://beta.sankakucomplex.com/books/90", {
+ ("https://sankaku.app/books/90", {
"count": 5,
}),
+ ("https://beta.sankakucomplex.com/books/90"),
("https://chan.sankakucomplex.com/pool/show/90"),
)
@@ -132,7 +120,7 @@ class SankakuPoolExtractor(SankakuExtractor):
self.pool_id = match.group(1)
def metadata(self):
- pool = self._api_request("/pools/" + self.pool_id)
+ pool = SankakuAPI(self).pools(self.pool_id)
self._posts = pool.pop("posts")
return {"pool": pool}
@@ -141,12 +129,12 @@ class SankakuPoolExtractor(SankakuExtractor):
class SankakuPostExtractor(SankakuExtractor):
- """Extractor for single images from chan.sankakucomplex.com"""
+ """Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/show/(\d+)"
test = (
- ("https://beta.sankakucomplex.com/post/show/360451", {
+ ("https://sankaku.app/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"options": (("tags", True),),
"keyword": {
@@ -158,6 +146,12 @@ class SankakuPostExtractor(SankakuExtractor):
"tags_general" : list,
},
}),
+ # 'contentious_content'
+ ("https://sankaku.app/post/show/21418978", {
+ "pattern": r"https://s\.sankakucomplex\.com"
+ r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
+ }),
+ ("https://beta.sankakucomplex.com/post/show/360451"),
("https://chan.sankakucomplex.com/post/show/360451"),
)
@@ -166,4 +160,128 @@ class SankakuPostExtractor(SankakuExtractor):
self.post_id = match.group(1)
def posts(self):
- return self._pagination({"tags": "id:" + self.post_id})
+ return SankakuAPI(self).posts(self.post_id)
+
+
+class SankakuAPI():
+ """Interface for the sankaku.app API"""
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
+
+ self.username, self.password = self.extractor._get_auth_info()
+ if not self.username:
+ self.authenticate = lambda: None
+
+ def pools(self, pool_id):
+ params = {"lang": "en"}
+ return self._call("/pools/" + pool_id, params)
+
+ def posts(self, post_id):
+ params = {
+ "lang" : "en",
+ "page" : "1",
+ "limit": "1",
+ "tags" : "id_range:" + post_id,
+ }
+ return self._call("/posts", params)
+
+ def posts_keyset(self, params):
+ return self._pagination("/posts/keyset", params)
+
+ def authenticate(self):
+ self.headers["Authorization"] = \
+ _authenticate_impl(self.extractor, self.username, self.password)
+
+ def _call(self, endpoint, params=None):
+ url = "https://capi-v2.sankakucomplex.com" + endpoint
+ for _ in range(5):
+ self.authenticate()
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=False)
+
+ if response.status_code == 429:
+ self.extractor.wait(
+ until=response.headers.get("X-RateLimit-Reset"))
+ continue
+
+ data = response.json()
+ try:
+ success = data.get("success", True)
+ except AttributeError:
+ success = True
+ if not success:
+ code = data.get("code")
+ if code == "invalid_token":
+ _authenticate_impl.invalidate(self.username)
+ continue
+ raise exception.StopExtraction(code)
+ return data
+
+ def _pagination(self, endpoint, params):
+ params["lang"] = "en"
+ params["limit"] = str(self.extractor.per_page)
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["data"]
+
+ params["next"] = data["meta"]["next"]
+ if not params["next"]:
+ return
+
+
+@cache(maxage=365*24*3600, keyarg=1)
+def _authenticate_impl(extr, username, password):
+ extr.log.info("Logging in as %s", username)
+ headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
+
+ # get initial access_token
+ url = "https://login.sankakucomplex.com/auth/token"
+ data = {"login": username, "password": password}
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=False)
+ data = response.json()
+
+ if response.status_code >= 400 or not data.get("success"):
+ raise exception.AuthenticationError(data.get("error"))
+ access_token = data["access_token"]
+
+ # start openid auth
+ url = "https://login.sankakucomplex.com/oidc/auth"
+ params = {
+ "response_type": "code",
+ "scope" : "openid",
+ "client_id" : "sankaku-web-app",
+ "redirect_uri" : "https://sankaku.app/sso/callback",
+ "state" : "return_uri=https://sankaku.app/",
+ "theme" : "black",
+ "lang" : "undefined",
+ }
+ page = extr.request(url, params=params).text
+ submit_url = text.extract(page, 'submitUrl = "', '"')[0]
+
+ # get code from initial access_token
+ url = "https://login.sankakucomplex.com" + submit_url
+ data = {
+ "accessToken": access_token,
+ "nonce" : "undefined",
+ }
+ response = extr.request(url, method="POST", data=data)
+ query = text.parse_query(response.request.url.partition("?")[2])
+
+ # get final access_token from code
+ url = "https://capi-v2.sankakucomplex.com/sso/finalize?lang=en"
+ data = {
+ "code" : query["code"],
+ "client_id" : "sankaku-web-app",
+ "redirect_uri": "https://sankaku.app/sso/callback",
+ }
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=False)
+ data = response.json()
+
+ if response.status_code >= 400 or not data.get("success"):
+ raise exception.AuthenticationError(data.get("error"))
+ return "Bearer " + data["access_token"]