New upstream version 1.29.3.upstream/1.29.3

author: Unit 193 <unit193@unit193.net> 2025-03-29 07:19:58 -0400
committer: Unit 193 <unit193@unit193.net> 2025-03-29 07:19:58 -0400
commit: 662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch)
tree: 537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/zerochan.py
parent: 8026a3c45446030d7af524bfc487d3462c8114ef (diff)
1 files changed, 14 insertions, 8 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index bc135ad..ac1400e 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -64,16 +64,22 @@ class ZerochanExtractor(BooruExtractor):
 
     def _parse_entry_html(self, entry_id):
         url = "{}/{}".format(self.root, entry_id)
-        extr = text.extract_from(self.request(url).text)
+        page = self.request(url).text
 
+        try:
+            jsonld = self._extract_jsonld(page)
+        except Exception:
+            return {"id": entry_id}
+
+        extr = text.extract_from(page)
         data = {
             "id"      : text.parse_int(entry_id),
-            "author"  : text.parse_unicode_escapes(extr('    "name": "', '"')),
-            "file_url": extr('"contentUrl": "', '"'),
-            "date"    : text.parse_datetime(extr('"datePublished": "', '"')),
-            "width"   : text.parse_int(extr('"width": "', ' ')),
-            "height"  : text.parse_int(extr('"height": "', ' ')),
-            "size"    : text.parse_bytes(extr('"contentSize": "', 'B')),
+            "author"  : jsonld["author"]["name"],
+            "file_url": jsonld["contentUrl"],
+            "date"    : text.parse_datetime(jsonld["datePublished"]),
+            "width"   : text.parse_int(jsonld["width"][:-3]),
+            "height"  : text.parse_int(jsonld["height"][:-3]),
+            "size"    : text.parse_bytes(jsonld["contentSize"][:-1]),
             "path"    : text.split_html(extr(
                 'class="breadcrumbs', '</nav>'))[2:],
             "uploader": extr('href="/user/', '"'),
@@ -86,7 +92,7 @@ class ZerochanExtractor(BooruExtractor):
         tags = data["tags"] = []
         for tag in html.split("<li class=")[1:]:
             category = text.extr(tag, '"', '"')
-            name = text.extr(tag, 'data-tag="', '"')
+            name = text.unescape(text.extr(tag, 'data-tag="', '"'))
             tags.append(category.partition(" ")[0].capitalize() + ":" + name)
 
         return data
author	Unit 193 <unit193@unit193.net>	2025-03-29 07:19:58 -0400
committer	Unit 193 <unit193@unit193.net>	2025-03-29 07:19:58 -0400
commit	662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch)
tree	537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/zerochan.py
parent	8026a3c45446030d7af524bfc487d3462c8114ef (diff)