New upstream version 1.24.2.upstream/1.24.2

author: Unit 193 <unit193@unit193.net> 2022-12-24 17:14:46 -0500
committer: Unit 193 <unit193@unit193.net> 2022-12-24 17:14:46 -0500
commit: ebdfcd3cd3f76534a590ba08933ff7ea54813316 (patch)
tree: 35db6003766dff695cf8a5aa24f47629b602b7c0 /gallery_dl/extractor/zerochan.py
parent: 3338dfce719c999467ffe08fd45663be8190057a (diff)
1 files changed, 70 insertions, 37 deletions
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index c0d43fe..74da615 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -11,8 +11,6 @@
 from .booru import BooruExtractor
 from ..cache import cache
 from .. import text, exception
-from xml.etree import ElementTree
-
 
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
 
@@ -27,12 +25,13 @@ class ZerochanExtractor(BooruExtractor):
     cookienames = ("z_id", "z_hash")
 
     def login(self):
+        self._logged_in = True
         if not self._check_cookies(self.cookienames):
             username, password = self._get_auth_info()
             if username:
                 self._update_cookies(self._login_impl(username, password))
-        # force legacy layout
-        self.session.cookies.set("v3", "0", domain=self.cookiedomain)
+            else:
+                self._logged_in = False
 
     @cache(maxage=90*86400, keyarg=1)
     def _login_impl(self, username, password):
@@ -60,36 +59,50 @@ class ZerochanExtractor(BooruExtractor):
         url = "{}/{}".format(self.root, entry_id)
         extr = text.extract_from(self.request(url).text)
 
-        return {
-            "id"    : entry_id,
-            "author": extr('"author": "', '"'),
+        data = {
+            "id"      : text.parse_int(entry_id),
+            "author"  : extr('"author": "', '"'),
             "file_url": extr('"contentUrl": "', '"'),
-            "date"  : text.parse_datetime(extr(
-                '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"),
-            "width" : extr('"width": "', ' '),
-            "height": extr('"height": "', ' '),
-            "size"  : text.parse_bytes(extr('"contentSize": "', 'B')),
-            "path"  : text.split_html(extr(
-                'class="breadcrumbs', '</p>'))[3::2],
-            "tags"  : extr('alt="Tags: Anime, ', '"').split(", ")
+            "date"    : text.parse_datetime(extr('"datePublished": "', '"')),
+            "width"   : text.parse_int(extr('"width": "', ' ')),
+            "height"  : text.parse_int(extr('"height": "', ' ')),
+            "size"    : text.parse_bytes(extr('"contentSize": "', 'B')),
+            "path"    : text.split_html(extr(
+                'class="breadcrumbs', '</p>'))[2:],
+            "uploader": extr('href="/user/', '"'),
+            "tags"    : extr('<ul id="tags"', '</ul>'),
+            "source"  : extr('<h2>Source</h2>', '</p><h2>').rpartition(
+                ">")[2] or None,
         }
 
-    def _parse_entry_xml(self, entry_id):
-        url = "{}/{}?xml".format(self.root, entry_id)
-        item = ElementTree.fromstring(self.request(url).text)[0][-1]
-        #  content = item[4].attrib
-
-        return {
-            #  "id"    : entry_id,
-            #  "file_url": content["url"],
-            #  "width" : content["width"],
-            #  "height": content["height"],
-            #  "size"  : content["filesize"],
-            "name"  : item[2].text,
-            "tags"  : item[5].text.lstrip().split(", "),
-            "md5"   : item[6].text,
+        html = data["tags"]
+        tags = data["tags"] = []
+        for tag in html.split("<li class=")[1:]:
+            category, _, name = text.extr(tag, 'alt="', '<').partition('">')
+            tags.append(category + ":" + name.strip())
+
+        return data
+
+    def _parse_entry_json(self, entry_id):
+        url = "{}/{}?json".format(self.root, entry_id)
+        item = self.request(url).json()
+
+        data = {
+            "id"      : item["id"],
+            "file_url": item["full"],
+            "width"   : item["width"],
+            "height"  : item["height"],
+            "size"    : item["size"],
+            "name"    : item["primary"],
+            "md5"     : item["hash"],
+            "source"  : item.get("source"),
         }
 
+        if not self._logged_in:
+            data["tags"] = item["tags"]
+
+        return data
+
 
 class ZerochanTagExtractor(ZerochanExtractor):
     subcategory = "tag"
@@ -138,7 +151,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
                 if metadata:
                     entry_id = extr('href="/', '"')
                     post = self._parse_entry_html(entry_id)
-                    post.update(self._parse_entry_xml(entry_id))
+                    post.update(self._parse_entry_json(entry_id))
                     yield post
                 else:
                     yield {
@@ -163,14 +176,34 @@ class ZerochanImageExtractor(ZerochanExtractor):
         "pattern": r"https://static\.zerochan\.net/"
                    r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
         "keyword": {
-            "author": "YukinoTokisaki",
+            "author": "YeFan 葉凡",
             "date": "dt:2020-04-24 21:33:44",
-            "file_url": str,
+            "file_url": "https://static.zerochan.net"
+                        "/Perth.%28Kantai.Collection%29.full.2920445.jpg",
             "filename": "Perth.(Kantai.Collection).full.2920445",
-            "height": "1366",
-            "id": "2920445",
-            "size": "1929k",
-            "width": "1920",
+            "height": 1366,
+            "id": 2920445,
+            "path": ["Kantai Collection", "Perth (Kantai Collection)"],
+            "size": 1975296,
+            "tags": [
+                "Mangaka:YeFan 葉凡",
+                "Game:Kantai Collection",
+                "Character:Perth (Kantai Collection)",
+                "Theme:Blonde Hair",
+                "Theme:Braids",
+                "Theme:Coat",
+                "Theme:Female",
+                "Theme:Firefighter Outfit",
+                "Theme:Group",
+                "Theme:Long Sleeves",
+                "Theme:Personification",
+                "Theme:Pins",
+                "Theme:Ribbon",
+                "Theme:Shirt",
+                "Theme:Short Hair",
+            ],
+            "uploader": "YukinoTokisaki",
+            "width": 1920,
         },
     })
 
@@ -181,5 +214,5 @@ class ZerochanImageExtractor(ZerochanExtractor):
     def posts(self):
         post = self._parse_entry_html(self.image_id)
         if self.config("metadata"):
-            post.update(self._parse_entry_xml(self.image_id))
+            post.update(self._parse_entry_json(self.image_id))
         return (post,)
author	Unit 193 <unit193@unit193.net>	2022-12-24 17:14:46 -0500
committer	Unit 193 <unit193@unit193.net>	2022-12-24 17:14:46 -0500
commit	ebdfcd3cd3f76534a590ba08933ff7ea54813316 (patch)
tree	35db6003766dff695cf8a5aa24f47629b602b7c0 /gallery_dl/extractor/zerochan.py
parent	3338dfce719c999467ffe08fd45663be8190057a (diff)