diff options
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
| -rw-r--r-- | gallery_dl/extractor/zerochan.py | 58 |
1 files changed, 52 insertions, 6 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index 6ee96e6..fc61dff 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -10,7 +10,7 @@ from .booru import BooruExtractor from ..cache import cache -from .. import text, exception +from .. import text, util, exception BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net" @@ -21,8 +21,11 @@ class ZerochanExtractor(BooruExtractor): root = "https://www.zerochan.net" filename_fmt = "{id}.{extension}" archive_fmt = "{id}" + page_start = 1 + per_page = 250 cookies_domain = ".zerochan.net" cookies_names = ("z_id", "z_hash") + request_interval = (0.5, 1.5) def login(self): self._logged_in = True @@ -86,7 +89,7 @@ class ZerochanExtractor(BooruExtractor): return data - def _parse_entry_json(self, entry_id): + def _parse_entry_api(self, entry_id): url = "{}/{}?json".format(self.root, entry_id) item = self.request(url).json() @@ -117,14 +120,22 @@ class ZerochanTagExtractor(ZerochanExtractor): ZerochanExtractor.__init__(self, match) self.search_tag, self.query = match.groups() + def _init(self): + if self.config("pagination") == "html": + self.posts = self.posts_html + self.per_page = 24 + else: + self.posts = self.posts_api + self.session.headers["User-Agent"] = util.USERAGENT + def metadata(self): return {"search_tags": text.unquote( self.search_tag.replace("+", " "))} - def posts(self): + def posts_html(self): url = self.root + "/" + self.search_tag params = text.parse_query(self.query) - params["p"] = text.parse_int(params.get("p"), 1) + params["p"] = text.parse_int(params.get("p"), self.page_start) metadata = self.config("metadata") while True: @@ -140,7 +151,7 @@ class ZerochanTagExtractor(ZerochanExtractor): if metadata: entry_id = extr('href="/', '"') post = self._parse_entry_html(entry_id) - post.update(self._parse_entry_json(entry_id)) + post.update(self._parse_entry_api(entry_id)) yield post else: yield { @@ -157,6 +168,41 @@ class ZerochanTagExtractor(ZerochanExtractor): break params["p"] += 1 + def posts_api(self): + url = self.root + "/" + self.search_tag + metadata = self.config("metadata") + params = { + "json": "1", + "l" : self.per_page, + "p" : self.page_start, + } + + static = "https://static.zerochan.net/.full." + + while True: + data = self.request(url, params=params).json() + try: + posts = data["items"] + except ValueError: + return + + if metadata: + for post in posts: + post_id = post["id"] + post.update(self._parse_entry_html(post_id)) + post.update(self._parse_entry_api(post_id)) + else: + for post in posts: + base = static + str(post["id"]) + post["file_url"] = base + ".jpg" + post["_fallback"] = (base + ".png",) + + yield from posts + + if not data.get("next"): + return + params["p"] += 1 + class ZerochanImageExtractor(ZerochanExtractor): subcategory = "image" @@ -170,5 +216,5 @@ class ZerochanImageExtractor(ZerochanExtractor): def posts(self): post = self._parse_entry_html(self.image_id) if self.config("metadata"): - post.update(self._parse_entry_json(self.image_id)) + post.update(self._parse_entry_api(self.image_id)) return (post,) |
