New upstream version 1.31.1.upstream/1.31.1

author: Unit 193 <unit193@unit193.net> 2025-12-20 05:49:04 -0500
committer: Unit 193 <unit193@unit193.net> 2025-12-20 05:49:04 -0500
commit: a24ec1647aeac35a63b744ea856011ad6e06be3b (patch)
tree: ae94416de786aeddd05d99559098f7f16bb103a6 /gallery_dl/extractor/bellazon.py
parent: 33f8a8a37a9cba738ef25fb99955f0730da9eb48 (diff)
1 files changed, 31 insertions, 16 deletions
diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py
index ce50a91..33f4ad3 100644
--- a/gallery_dl/extractor/bellazon.py
+++ b/gallery_dl/extractor/bellazon.py
@@ -46,8 +46,8 @@ class BellazonExtractor(Extractor):
             data = {"post": post}
             post["count"] = data["count"] = len(urls)
 
-            yield Message.Directory, data
-            data["num"] = 0
+            yield Message.Directory, "", data
+            data["num"] = data["num_internal"] = data["num_external"] = 0
             for info, url, url_img in urls:
                 url = text.unescape(url or url_img)
 
@@ -59,27 +59,35 @@ class BellazonExtractor(Extractor):
                     ):
                         continue
                     data["num"] += 1
+                    data["num_internal"] += 1
                     if not (alt := text.extr(info, ' alt="', '"')) or (
                             alt.startswith("post-") and "_thumb." in alt):
-                        name = url
+                        dc = text.nameext_from_url(url, data.copy())
                     else:
-                        name = text.unescape(alt)
+                        dc = data.copy()
+                        dc["name"] = name = text.unescape(alt)
+                        dc["filename"] = name.partition(".")[0]
 
-                    dc = text.nameext_from_url(name, data.copy())
                     dc["id"] = text.extr(info, 'data-fileid="', '"')
                     if ext := text.extr(info, 'data-fileext="', '"'):
                         dc["extension"] = ext
                     elif "/core/interface/file/attachment.php" in url:
                         if not dc["id"]:
-                            dc["id"] = url.rpartition("?id=")[2]
+                            dc["id"] = \
+                                url.rpartition("?id=")[2].partition("&")[0]
                         if name := text.extr(info, ">", "<").strip():
-                            text.nameext_from_url(name, dc)
+                            dc["name"] = name = text.unescape(name)
+                            text.nameext_from_name(name, dc)
+                    else:
+                        dc["extension"] = text.ext_from_url(url)
 
                     if url[0] == "/":
                         url = f"https:{url}"
                     yield Message.Url, url, dc
 
                 else:
+                    data["num"] += 1
+                    data["num_external"] += 1
                     yield Message.Queue, url, data
 
     def _pagination(self, base, pnum=None):
@@ -106,7 +114,7 @@ class BellazonExtractor(Extractor):
     def _pagination_reverse(self, base, pnum=None):
         base = f"{self.root}{base}"
 
-        url = f"{base}/page/9999/"  # force redirect to highest page number
+        url = f"{base}/page/{'9999' if pnum is None else pnum}/"
         with self.request(url) as response:
             parts = response.url.rsplit("/", 3)
             pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1
@@ -130,7 +138,7 @@ class BellazonExtractor(Extractor):
         author = schema["author"]
         stats = schema["interactionStatistic"]
         url_t = schema["url"]
-        url_a = author["url"]
+        url_a = author.get("url") or ""
 
         path = text.split_html(text.extr(
             page, '<nav class="ipsBreadcrumb', "</nav>"))[2:-1]
@@ -141,8 +149,8 @@ class BellazonExtractor(Extractor):
             "title": schema["headline"],
             "views": stats[0]["userInteractionCount"],
             "posts": stats[1]["userInteractionCount"],
-            "date" : text.parse_datetime(schema["datePublished"]),
-            "date_updated": text.parse_datetime(schema["dateModified"]),
+            "date" : self.parse_datetime_iso(schema["datePublished"]),
+            "date_updated": self.parse_datetime_iso(schema["dateModified"]),
             "description" : text.unescape(schema["text"]).strip(),
             "section"     : path[-2],
             "author"      : author["name"],
@@ -151,8 +159,12 @@ class BellazonExtractor(Extractor):
 
         thread["id"], _, thread["slug"] = \
             url_t.rsplit("/", 2)[1].partition("-")
-        thread["author_id"], _, thread["author_slug"] = \
-            url_a.rsplit("/", 2)[1].partition("-")
+
+        if url_a:
+            thread["author_id"], _, thread["author_slug"] = \
+                url_a.rsplit("/", 2)[1].partition("-")
+        else:
+            thread["author_id"] = thread["author_slug"] = ""
 
         return thread
 
@@ -162,15 +174,18 @@ class BellazonExtractor(Extractor):
         post = {
             "id": extr('id="elComment_', '"'),
             "author_url": extr(" href='", "'"),
-            "date": text.parse_datetime(extr("datetime='", "'")),
+            "date": self.parse_datetime_iso(extr("datetime='", "'")),
             "content": extr("<!-- Post content -->", "\n\t\t</div>"),
         }
 
         if (pos := post["content"].find(">")) >= 0:
             post["content"] = post["content"][pos+1:].strip()
 
-        post["author_id"], _, post["author_slug"] = \
-            post["author_url"].rsplit("/", 2)[1].partition("-")
+        if url_a := post["author_url"]:
+            post["author_id"], _, post["author_slug"] = \
+                url_a.rsplit("/", 2)[1].partition("-")
+        else:
+            post["author_id"] = post["author_slug"] = ""
 
         return post
author	Unit 193 <unit193@unit193.net>	2025-12-20 05:49:04 -0500
committer	Unit 193 <unit193@unit193.net>	2025-12-20 05:49:04 -0500
commit	a24ec1647aeac35a63b744ea856011ad6e06be3b (patch)
tree	ae94416de786aeddd05d99559098f7f16bb103a6 /gallery_dl/extractor/bellazon.py
parent	33f8a8a37a9cba738ef25fb99955f0730da9eb48 (diff)