diff options
Diffstat (limited to 'gallery_dl/extractor/skeb.py')
| -rw-r--r-- | gallery_dl/extractor/skeb.py | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index cd8c238..822b1f2 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -26,8 +26,11 @@ class SkebExtractor(Extractor): self.article = self.config("article", False) def items(self): + metadata = self.metadata() for user_name, post_num in self.posts(): response, post = self._get_post_data(user_name, post_num) + if metadata: + post.update(metadata) yield Message.Directory, post for data in self._get_urls_from_post(response, post): url = data["file_url"] @@ -36,6 +39,9 @@ class SkebExtractor(Extractor): def posts(self): """Return post number""" + def metadata(self): + """Return additional metadata""" + def _pagination(self, url, params): headers = {"Referer": self.root, "Authorization": "Bearer null"} params["offset"] = 0 @@ -223,6 +229,62 @@ class SkebUserExtractor(SkebExtractor): return posts +class SkebSearchExtractor(SkebExtractor): + """Extractor for skeb search results""" + subcategory = "search" + pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)" + test = ("https://skeb.jp/search?q=bunny%20tree&t=works", { + "count": ">= 18", + "keyword": {"search_tags": "bunny tree"}, + }) + + def metadata(self): + return {"search_tags": text.unquote(self.user_name)} + + def posts(self): + url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries" + params = { + "x-algolia-agent": "Algolia for JavaScript (4.13.1); Browser", + } + headers = { + "Origin": self.root, + "Referer": self.root + "/", + "x-algolia-api-key": "9a4ce7d609e71bf29e977925e4c6740c", + "x-algolia-application-id": "HB1JT3KRE9", + } + + filters = self.config("filters") + if filters is None: + filters = ("genre:art OR genre:voice OR genre:novel OR " + "genre:video OR genre:music OR genre:correction") + elif not isinstance(filters, str): + filters = " OR ".join(filters) + + page = 0 + pams = "hitsPerPage=40&filters=" + text.quote(filters) + "&page=" + + request = { + "indexName": "Request", + "query": text.unquote(self.user_name), + "params": pams + str(page), + } + data = {"requests": (request,)} + + while True: + result = self.request( + url, method="POST", params=params, headers=headers, json=data, + ).json()["results"][0] + + for post in result["hits"]: + parts = post["path"].split("/") + yield parts[1][1:], parts[3] + + if page >= result["nbPages"]: + return + page += 1 + request["params"] = pams + str(page) + + class SkebFollowingExtractor(SkebExtractor): """Extractor for all creators followed by a skeb user""" subcategory = "following" @@ -238,8 +300,8 @@ class SkebFollowingExtractor(SkebExtractor): def users(self): url = "{}/api/users/{}/following_creators".format( self.root, self.user_name) - headers = {"Referer": self.root, "Authorization": "Bearer null"} params = {"sort": "date", "offset": 0, "limit": 90} + headers = {"Referer": self.root, "Authorization": "Bearer null"} while True: data = self.request(url, params=params, headers=headers).json() |
