1 files changed, 64 insertions, 13 deletions
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index c2d1243..8c6e6d8 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -38,6 +38,7 @@ class PixivExtractor(Extractor):
         self.meta_user = self.config("metadata")
         self.meta_bookmark = self.config("metadata-bookmark")
         self.meta_comments = self.config("comments")
+        self.meta_captions = self.config("captions")
 
     def items(self):
         tags = self.config("tags", "japanese")
@@ -76,8 +77,8 @@ class PixivExtractor(Extractor):
                 detail = self.api.illust_bookmark_detail(work["id"])
                 work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
                                          if tag["is_registered"]]
-            if self.sanity_workaround and not work.get("caption") and \
-                    not work.get("_mypixiv"):
+            if self.meta_captions and not work.get("caption") and \
+                    not work.get("_mypixiv") and not work.get("_ajax"):
                 body = self._request_ajax("/illust/" + str(work["id"]))
                 if body:
                     work["caption"] = text.unescape(body["illustComment"])
@@ -108,10 +109,10 @@ class PixivExtractor(Extractor):
             if self.load_ugoira:
                 try:
                     return self._extract_ugoira(work)
-                except exception.StopExtraction as exc:
+                except Exception as exc:
                     self.log.warning(
-                        "Unable to retrieve Ugoira metatdata (%s - %s)",
-                        work["id"], exc.message)
+                        "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
+                        work["id"], exc.__class__.__name__, exc)
 
         elif work["page_count"] == 1:
             url = meta_single_page["original_image_url"]
@@ -186,6 +187,7 @@ class PixivExtractor(Extractor):
             return None
 
     def _extract_ajax(self, work, body):
+        work["_ajax"] = True
         url = self._extract_ajax_url(body)
         if not url:
             return ()
@@ -243,12 +245,12 @@ class PixivExtractor(Extractor):
             original = body["urls"]["original"]
             if original:
                 return original
-        except KeyError:
+        except Exception:
             pass
 
         try:
             square1200 = body["userIllusts"][body["id"]]["url"]
-        except KeyError:
+        except Exception:
             return
         parts = square1200.rpartition("_p0")[0].split("/")
         del parts[3:5]
@@ -293,9 +295,6 @@ class PixivExtractor(Extractor):
             "x_restrict"      : 0,
         }
 
-    def _web_to_mobile(self, work):
-        return work
-
     def works(self):
         """Return an iterable containing all relevant 'work' objects"""
 
@@ -334,15 +333,17 @@ class PixivUserExtractor(PixivExtractor):
 class PixivArtworksExtractor(PixivExtractor):
     """Extractor for artworks of a pixiv user"""
     subcategory = "artworks"
+    _warning = True
     pattern = (BASE_PATTERN + r"/(?:"
                r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
                r"(?:/([^/?#]+))?/?(?:$|[?#])"
                r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
     example = "https://www.pixiv.net/en/users/12345/artworks"
 
-    def __init__(self, match):
-        PixivExtractor.__init__(self, match)
-        u1, t1, u2, t2 = match.groups()
+    def _init(self):
+        PixivExtractor._init(self)
+
+        u1, t1, u2, t2 = self.groups
         if t1:
             t1 = text.unquote(t1)
         elif t2:
@@ -350,6 +351,14 @@ class PixivArtworksExtractor(PixivExtractor):
         self.user_id = u1 or u2
         self.tag = t1 or t2
 
+        if self.sanity_workaround:
+            self.cookies_domain = d = ".pixiv.net"
+            self._init_cookies()
+            if self._warning and not self.cookies.get("PHPSESSID", domain=d):
+                PixivArtworksExtractor._warning = False
+                self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
+                                 "non R-18 'sanity_level' works.")
+
     def metadata(self):
         if self.config("metadata"):
             self.api.user_detail(self.user_id)
@@ -358,6 +367,19 @@ class PixivArtworksExtractor(PixivExtractor):
     def works(self):
         works = self.api.user_illusts(self.user_id)
 
+        if self.sanity_workaround:
+            body = self._request_ajax(
+                "/user/{}/profile/all".format(self.user_id))
+            try:
+                ajax_ids = list(map(int, body["illusts"]))
+                ajax_ids.extend(map(int, body["manga"]))
+                ajax_ids.sort()
+            except Exception as exc:
+                self.log.warning("Unable to collect artwork IDs using AJAX "
+                                 "API (%s: %s)", exc.__class__.__name__, exc)
+            else:
+                works = self._extend_sanity(works, ajax_ids)
+
         if self.tag:
             tag = self.tag.lower()
             works = (
@@ -367,6 +389,35 @@ class PixivArtworksExtractor(PixivExtractor):
 
         return works
 
+    def _extend_sanity(self, works, ajax_ids):
+        user = {"id": 1}
+        index = len(ajax_ids) - 1
+
+        for work in works:
+            while index >= 0:
+                work_id = work["id"]
+                ajax_id = ajax_ids[index]
+
+                if ajax_id == work_id:
+                    index -= 1
+                    break
+
+                elif ajax_id > work_id:
+                    index -= 1
+                    self.log.debug("Inserting work %s", ajax_id)
+                    yield self._make_work(ajax_id, self.sanity_url, user)
+
+                else:  # ajax_id < work_id
+                    break
+
+            yield work
+
+        while index >= 0:
+            ajax_id = ajax_ids[index]
+            self.log.debug("Inserting work %s", ajax_id)
+            yield self._make_work(ajax_id, self.sanity_url, user)
+            index -= 1
+
 
 class PixivAvatarExtractor(PixivExtractor):
     """Extractor for pixiv avatars"""