New upstream version 1.28.2.upstream/1.28.2

author: Unit 193 <unit193@unit193.net> 2024-12-22 05:45:18 -0500
committer: Unit 193 <unit193@unit193.net> 2024-12-22 05:45:18 -0500
commit: bb8260277ab7483652c6c1526a15d62da92acc96 (patch)
tree: 02959c9d5aceb66f4429e0be1bc927921e01bbdc /gallery_dl/extractor/zerochan.py
parent: f6877087773089220d68288d055276fca6c556d4 (diff)
1 files changed, 28 insertions, 9 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 4c4fb3a..bc135ad 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -78,8 +78,8 @@ class ZerochanExtractor(BooruExtractor):
                 'class="breadcrumbs', '</nav>'))[2:],
             "uploader": extr('href="/user/', '"'),
             "tags"    : extr('<ul id="tags"', '</ul>'),
-            "source"  : text.unescape(text.extr(
-                extr('id="source-url"', '</a>'), 'href="', '"')),
+            "source"  : text.unescape(text.remove_html(extr(
+                'id="source-url"', '</p>').rpartition("</s>")[2])),
         }
 
         html = data["tags"]
@@ -93,14 +93,12 @@ class ZerochanExtractor(BooruExtractor):
 
     def _parse_entry_api(self, entry_id):
         url = "{}/{}?json".format(self.root, entry_id)
-        text = self.request(url).text
+        txt = self.request(url).text
         try:
-            item = util.json_loads(text)
-        except ValueError as exc:
-            if " control character " not in str(exc):
-                raise
-            text = re.sub(r"[\x00-\x1f\x7f]", "", text)
-            item = util.json_loads(text)
+            item = util.json_loads(txt)
+        except ValueError:
+            item = self._parse_json(txt)
+            item["id"] = text.parse_int(entry_id)
 
         data = {
             "id"      : item["id"],
@@ -118,6 +116,27 @@ class ZerochanExtractor(BooruExtractor):
 
         return data
 
+    def _parse_json(self, txt):
+        txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
+        main, _, tags = txt.partition('tags": [')
+
+        item = {}
+        for line in main.split(',  "')[1:]:
+            key, _, value = line.partition('": ')
+            if value:
+                if value[0] == '"':
+                    value = value[1:-1]
+                else:
+                    value = text.parse_int(value)
+            if key:
+                item[key] = value
+
+        item["tags"] = tags = tags[5:].split('",    "')
+        if tags:
+            tags[-1] = tags[-1][:-5]
+
+        return item
+
     def _tags(self, post, page):
         tags = collections.defaultdict(list)
         for tag in post["tags"]:
author	Unit 193 <unit193@unit193.net>	2024-12-22 05:45:18 -0500
committer	Unit 193 <unit193@unit193.net>	2024-12-22 05:45:18 -0500
commit	bb8260277ab7483652c6c1526a15d62da92acc96 (patch)
tree	02959c9d5aceb66f4429e0be1bc927921e01bbdc /gallery_dl/extractor/zerochan.py
parent	f6877087773089220d68288d055276fca6c556d4 (diff)