summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/zerochan.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
-rw-r--r--gallery_dl/extractor/zerochan.py22
1 files changed, 14 insertions, 8 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index bc135ad..ac1400e 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -64,16 +64,22 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_html(self, entry_id):
url = "{}/{}".format(self.root, entry_id)
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ try:
+ jsonld = self._extract_jsonld(page)
+ except Exception:
+ return {"id": entry_id}
+
+ extr = text.extract_from(page)
data = {
"id" : text.parse_int(entry_id),
- "author" : text.parse_unicode_escapes(extr(' "name": "', '"')),
- "file_url": extr('"contentUrl": "', '"'),
- "date" : text.parse_datetime(extr('"datePublished": "', '"')),
- "width" : text.parse_int(extr('"width": "', ' ')),
- "height" : text.parse_int(extr('"height": "', ' ')),
- "size" : text.parse_bytes(extr('"contentSize": "', 'B')),
+ "author" : jsonld["author"]["name"],
+ "file_url": jsonld["contentUrl"],
+ "date" : text.parse_datetime(jsonld["datePublished"]),
+ "width" : text.parse_int(jsonld["width"][:-3]),
+ "height" : text.parse_int(jsonld["height"][:-3]),
+ "size" : text.parse_bytes(jsonld["contentSize"][:-1]),
"path" : text.split_html(extr(
'class="breadcrumbs', '</nav>'))[2:],
"uploader": extr('href="/user/', '"'),
@@ -86,7 +92,7 @@ class ZerochanExtractor(BooruExtractor):
tags = data["tags"] = []
for tag in html.split("<li class=")[1:]:
category = text.extr(tag, '"', '"')
- name = text.extr(tag, 'data-tag="', '"')
+ name = text.unescape(text.extr(tag, 'data-tag="', '"'))
tags.append(category.partition(" ")[0].capitalize() + ":" + name)
return data