diff options
Diffstat (limited to 'gallery_dl/extractor/zerochan.py')
| -rw-r--r-- | gallery_dl/extractor/zerochan.py | 37 |
1 files changed, 28 insertions, 9 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index 4c4fb3a..bc135ad 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -78,8 +78,8 @@ class ZerochanExtractor(BooruExtractor): 'class="breadcrumbs', '</nav>'))[2:], "uploader": extr('href="/user/', '"'), "tags" : extr('<ul id="tags"', '</ul>'), - "source" : text.unescape(text.extr( - extr('id="source-url"', '</a>'), 'href="', '"')), + "source" : text.unescape(text.remove_html(extr( + 'id="source-url"', '</p>').rpartition("</s>")[2])), } html = data["tags"] @@ -93,14 +93,12 @@ class ZerochanExtractor(BooruExtractor): def _parse_entry_api(self, entry_id): url = "{}/{}?json".format(self.root, entry_id) - text = self.request(url).text + txt = self.request(url).text try: - item = util.json_loads(text) - except ValueError as exc: - if " control character " not in str(exc): - raise - text = re.sub(r"[\x00-\x1f\x7f]", "", text) - item = util.json_loads(text) + item = util.json_loads(txt) + except ValueError: + item = self._parse_json(txt) + item["id"] = text.parse_int(entry_id) data = { "id" : item["id"], @@ -118,6 +116,27 @@ class ZerochanExtractor(BooruExtractor): return data + def _parse_json(self, txt): + txt = re.sub(r"[\x00-\x1f\x7f]", "", txt) + main, _, tags = txt.partition('tags": [') + + item = {} + for line in main.split(', "')[1:]: + key, _, value = line.partition('": ') + if value: + if value[0] == '"': + value = value[1:-1] + else: + value = text.parse_int(value) + if key: + item[key] = value + + item["tags"] = tags = tags[5:].split('", "') + if tags: + tags[-1] = tags[-1][:-5] + + return item + def _tags(self, post, page): tags = collections.defaultdict(list) for tag in post["tags"]: |
