summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/zerochan.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-12-22 05:45:18 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2024-12-22 05:45:18 -0500
commitbb8260277ab7483652c6c1526a15d62da92acc96 (patch)
tree02959c9d5aceb66f4429e0be1bc927921e01bbdc /gallery_dl/extractor/zerochan.py
parentf6877087773089220d68288d055276fca6c556d4 (diff)
New upstream version 1.28.2.upstream/1.28.2
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
-rw-r--r--gallery_dl/extractor/zerochan.py37
1 files changed, 28 insertions, 9 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 4c4fb3a..bc135ad 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -78,8 +78,8 @@ class ZerochanExtractor(BooruExtractor):
'class="breadcrumbs', '</nav>'))[2:],
"uploader": extr('href="/user/', '"'),
"tags" : extr('<ul id="tags"', '</ul>'),
- "source" : text.unescape(text.extr(
- extr('id="source-url"', '</a>'), 'href="', '"')),
+ "source" : text.unescape(text.remove_html(extr(
+ 'id="source-url"', '</p>').rpartition("</s>")[2])),
}
html = data["tags"]
@@ -93,14 +93,12 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
- text = self.request(url).text
+ txt = self.request(url).text
try:
- item = util.json_loads(text)
- except ValueError as exc:
- if " control character " not in str(exc):
- raise
- text = re.sub(r"[\x00-\x1f\x7f]", "", text)
- item = util.json_loads(text)
+ item = util.json_loads(txt)
+ except ValueError:
+ item = self._parse_json(txt)
+ item["id"] = text.parse_int(entry_id)
data = {
"id" : item["id"],
@@ -118,6 +116,27 @@ class ZerochanExtractor(BooruExtractor):
return data
+ def _parse_json(self, txt):
+ txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
+ main, _, tags = txt.partition('tags": [')
+
+ item = {}
+ for line in main.split(', "')[1:]:
+ key, _, value = line.partition('": ')
+ if value:
+ if value[0] == '"':
+ value = value[1:-1]
+ else:
+ value = text.parse_int(value)
+ if key:
+ item[key] = value
+
+ item["tags"] = tags = tags[5:].split('", "')
+ if tags:
+ tags[-1] = tags[-1][:-5]
+
+ return item
+
def _tags(self, post, page):
tags = collections.defaultdict(list)
for tag in post["tags"]: