diff options
| author | 2025-03-29 07:19:58 -0400 | |
|---|---|---|
| committer | 2025-03-29 07:19:58 -0400 | |
| commit | 662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch) | |
| tree | 537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/zerochan.py | |
| parent | 8026a3c45446030d7af524bfc487d3462c8114ef (diff) | |
New upstream version 1.29.3.upstream/1.29.3
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
| -rw-r--r-- | gallery_dl/extractor/zerochan.py | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index bc135ad..ac1400e 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -64,16 +64,22 @@ class ZerochanExtractor(BooruExtractor): def _parse_entry_html(self, entry_id): url = "{}/{}".format(self.root, entry_id) - extr = text.extract_from(self.request(url).text) + page = self.request(url).text + try: + jsonld = self._extract_jsonld(page) + except Exception: + return {"id": entry_id} + + extr = text.extract_from(page) data = { "id" : text.parse_int(entry_id), - "author" : text.parse_unicode_escapes(extr(' "name": "', '"')), - "file_url": extr('"contentUrl": "', '"'), - "date" : text.parse_datetime(extr('"datePublished": "', '"')), - "width" : text.parse_int(extr('"width": "', ' ')), - "height" : text.parse_int(extr('"height": "', ' ')), - "size" : text.parse_bytes(extr('"contentSize": "', 'B')), + "author" : jsonld["author"]["name"], + "file_url": jsonld["contentUrl"], + "date" : text.parse_datetime(jsonld["datePublished"]), + "width" : text.parse_int(jsonld["width"][:-3]), + "height" : text.parse_int(jsonld["height"][:-3]), + "size" : text.parse_bytes(jsonld["contentSize"][:-1]), "path" : text.split_html(extr( 'class="breadcrumbs', '</nav>'))[2:], "uploader": extr('href="/user/', '"'), @@ -86,7 +92,7 @@ class ZerochanExtractor(BooruExtractor): tags = data["tags"] = [] for tag in html.split("<li class=")[1:]: category = text.extr(tag, '"', '"') - name = text.extr(tag, 'data-tag="', '"') + name = text.unescape(text.extr(tag, 'data-tag="', '"')) tags.append(category.partition(" ")[0].capitalize() + ":" + name) return data |
