New upstream version 1.10.2upstream/1.10.2

author: Unit 193 <unit193@ubuntu.com> 2019-08-26 19:34:45 -0400
committer: Unit 193 <unit193@ubuntu.com> 2019-08-26 19:34:45 -0400
commit: b75d158d014d6c43d7d785c46c9372a9cf84d144 (patch)
tree: 7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d /gallery_dl/extractor/deviantart.py
parent: 64ad8e7bd15df71ab1116eede414558631bcad32 (diff)
1 files changed, 266 insertions, 128 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 63e2913..bd1299b 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -27,7 +27,7 @@ BASE_PATTERN = (
 
 
 class DeviantartExtractor(Extractor):
-    """Base class for deviantart extractors"""
+    """Base class for deviantart extractors using the OAuth API"""
     category = "deviantart"
     directory_fmt = ("{category}", "{author[username]!l}")
     filename_fmt = "{category}_{index}_{title}.{extension}"
@@ -38,11 +38,15 @@ class DeviantartExtractor(Extractor):
         self.offset = 0
         self.flat = self.config("flat", True)
         self.extra = self.config("extra", False)
+        self.quality = self.config("quality", "100")
         self.original = self.config("original", True)
         self.user = match.group(1) or match.group(2)
         self.group = False
         self.api = DeviantartAPI(self)
 
+        if self.quality:
+            self.quality = "q_{}".format(self.quality)
+
         if self.original != "image":
             self._update_content = self._update_content_default
         else:
@@ -81,12 +85,15 @@ class DeviantartExtractor(Extractor):
                         text.ext_from_url(content["src"]) != "gif":
                     self._update_content(deviation, content)
 
-                if deviation["index"] <= 790677560 and \
-                        content["src"].startswith("https://images-wixmp-"):
-                    # https://github.com/r888888888/danbooru/issues/4069
-                    content["src"] = re.sub(
-                        r"(/f/[^/]+/[^/]+)/v\d+/.*",
-                        r"/intermediary\1", content["src"])
+                if content["src"].startswith("https://images-wixmp-"):
+                    if deviation["index"] <= 790677560:
+                        # https://github.com/r888888888/danbooru/issues/4069
+                        content["src"] = re.sub(
+                            r"(/f/[^/]+/[^/]+)/v\d+/.*",
+                            r"/intermediary\1", content["src"])
+                    if self.quality:
+                        content["src"] = re.sub(
+                            r"q_\d+", self.quality, content["src"])
 
                 yield self.commit(deviation, content)
 
@@ -133,8 +140,16 @@ class DeviantartExtractor(Extractor):
     @staticmethod
     def commit(deviation, target):
         url = target["src"]
-        deviation["target"] = text.nameext_from_url(url, target.copy())
-        deviation["extension"] = deviation["target"]["extension"]
+        thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url
+        target = text.nameext_from_url(thumb, target.copy())
+        if target["filename"].endswith("-150"):
+            target["filename"] = target["filename"][:-4]
+        if not target["filename"].count("-"):
+            name, _, hid = target["filename"].rpartition("_")
+            target["filename"] = name + "-" + hid
+        deviation["target"] = target
+        deviation["filename"] = target["filename"]
+        deviation["extension"] = target["extension"] = text.ext_from_url(url)
         return Message.Url, url, deviation
 
     def _commit_journal_html(self, deviation, journal):
@@ -225,14 +240,6 @@ class DeviantartExtractor(Extractor):
         if mtype and mtype.startswith("image/"):
             content.update(data)
 
-    def _html_request(self, url, **kwargs):
-        cookies = {"userinfo": (
-            '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0'
-            'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU'
-            ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}'
-        )}
-        return self.request(url, cookies=cookies, **kwargs)
-
 
 class DeviantartGalleryExtractor(DeviantartExtractor):
     """Extractor for all deviations from an artist's gallery"""
@@ -360,68 +367,6 @@ class DeviantartFolderExtractor(DeviantartExtractor):
         deviation["folder"] = self.folder
 
 
-class DeviantartDeviationExtractor(DeviantartExtractor):
-    """Extractor for single deviations"""
-    subcategory = "deviation"
-    archive_fmt = "{index}.{extension}"
-    pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)"
-    test = (
-        (("https://www.deviantart.com/shimoda7/art/"
-          "For-the-sake-of-a-memory-10073852"), {
-            "options": (("original", 0),),
-            "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
-        }),
-        ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
-            "exception": exception.NotFoundError,
-        }),
-        (("https://www.deviantart.com/myria-moon/art/"
-          "Aime-Moi-part-en-vadrouille-261986576"), {
-            "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
-                        r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
-        }),
-        # wixmp URL rewrite
-        (("https://www.deviantart.com/citizenfresh/art/"
-          "Hverarond-14-the-beauty-of-the-earth-789295466"), {
-            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
-                        r"/intermediary/f/[^/]+/[^.]+\.jpg$")
-        }),
-        # non-download URL for GIFs (#242)
-        (("https://www.deviantart.com/skatergators/art/"
-          "COM-Monique-Model-781571783"), {
-            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
-                        r"/f/[^/]+/[^.]+\.gif\?token="),
-        }),
-        # external URLs from description (#302)
-        (("https://www.deviantart.com/uotapo/art/"
-          "INANAKI-Memorial-Humane7-590297498"), {
-            "options": (("extra", 1), ("original", 0)),
-            "pattern": r"https?://sta\.sh/\w+$",
-            "range": "2-",
-            "count": 4,
-        }),
-        # old-style URLs
-        ("https://shimoda7.deviantart.com"
-         "/art/For-the-sake-of-a-memory-10073852"),
-        ("https://myria-moon.deviantart.com"
-         "/art/Aime-Moi-part-en-vadrouille-261986576"),
-        ("https://zzz.deviantart.com/art/zzz-1234567890"),
-    )
-
-    skip = Extractor.skip
-
-    def __init__(self, match):
-        DeviantartExtractor.__init__(self, match)
-        self.path = match.group(3)
-
-    def deviations(self):
-        url = "{}/{}/{}".format(self.root, self.user, self.path)
-        response = self._html_request(url, fatal=False)
-        deviation_id = text.extract(response.text, '//deviation/', '"')[0]
-        if response.status_code >= 400 or not deviation_id:
-            raise exception.NotFoundError("image")
-        return (self.api.deviation(deviation_id),)
-
-
 class DeviantartStashExtractor(DeviantartExtractor):
     """Extractor for sta.sh-ed deviations"""
     subcategory = "stash"
@@ -558,54 +503,6 @@ class DeviantartJournalExtractor(DeviantartExtractor):
         return self.api.browse_user_journals(self.user, self.offset)
 
 
-class DeviantartScrapsExtractor(DeviantartExtractor):
-    """Extractor for an artist's scraps"""
-    subcategory = "scraps"
-    directory_fmt = ("{category}", "{username}", "Scraps")
-    archive_fmt = "s_{username}_{index}.{extension}"
-    pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b"
-    test = (
-        ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", {
-            "count": 12,
-            "options": (("original", False),),
-        }),
-        ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
-    )
-
-    def deviations(self):
-        url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user)
-        page = self._html_request(url).text
-        csrf, pos = text.extract(page, '"csrf":"', '"')
-        iid , pos = text.extract(page, '"requestid":"', '"', pos)
-
-        url = "https://www.deviantart.com/dapi/v1/gallery/0"
-        data = {
-            "username": self.user,
-            "offset": self.offset,
-            "limit": "24",
-            "catpath": "scraps",
-            "_csrf": csrf,
-            "dapiIid": iid + "-jsok7403-1.1"
-        }
-
-        while True:
-            content = self.request(
-                url, method="POST", data=data).json()["content"]
-
-            for item in content["results"]:
-                if item["html"].startswith('<div class="ad-container'):
-                    continue
-                deviation_url = text.extract(item["html"], 'href="', '"')[0]
-                page = self._html_request(deviation_url).text
-                deviation_id = text.extract(page, '//deviation/', '"')[0]
-                if deviation_id:
-                    yield self.api.deviation(deviation_id)
-
-            if not content["has_more"]:
-                return
-            data["offset"] = content["next_offset"]
-
-
 class DeviantartPopularExtractor(DeviantartExtractor):
     """Extractor for popular deviations"""
     subcategory = "popular"
@@ -649,6 +546,247 @@ class DeviantartPopularExtractor(DeviantartExtractor):
         deviation["popular"] = self.popular
 
 
+class DeviantartExtractorV2(Extractor):
+    """Base class for deviantart extractors using the NAPI"""
+    category = "deviantart"
+    directory_fmt = ("{category}", "{author[username]!l}")
+    filename_fmt = "{category}_{index}_{title}.{extension}"
+    root = "https://www.deviantart.com"
+
+    def __init__(self, match=None):
+        Extractor.__init__(self, match)
+        self.offset = 0
+        self.extra = self.config("extra", False)
+        self.quality = self.config("quality", "100")
+        self.user = match.group(1) or match.group(2)
+
+        if self.quality:
+            self.quality = "q_{}".format(self.quality)
+
+    def items(self):
+        url = (
+            self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch"
+        )
+        params = {
+            "deviationid"    : None,
+            "username"       : None,
+            "type"           : None,
+            "include_session": "false",
+        }
+        headers = {
+            "Referer": self.root,
+        }
+
+        yield Message.Version, 1
+        for deviation in self.deviations():
+            params["deviationid"] = deviation["deviationId"]
+            params["username"] = deviation["author"]["username"]
+            params["type"] = "journal" if deviation["isJournal"] else "art"
+            data = self.request(url, params=params, headers=headers).json()
+
+            if "deviation" not in data:
+                self.log.warning("Skipping %s", params["deviationid"])
+                continue
+            deviation = self._extract(data)
+
+            yield Message.Directory, deviation
+            yield Message.Url, deviation["target"]["src"], deviation
+            if self.extra:
+                for match in DeviantartStashExtractor.pattern.finditer(
+                        deviation["description"]):
+                    deviation["_extractor"] = DeviantartStashExtractor
+                    yield Message.Queue, match.group(0), deviation
+
+    def _extract(self, data):
+        deviation = data["deviation"]
+        extended = deviation["extended"]
+        files = deviation["files"]
+        del deviation["extended"]
+        del deviation["files"]
+
+        # prepare deviation metadata
+        deviation["description"] = extended.get("description", "")
+        deviation["username"] = self.user.lower()
+        deviation["stats"] = extended["stats"]
+        deviation["stats"]["comments"] = data["comments"]["total"]
+        deviation["index"] = deviation["deviationId"]
+        deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
+        deviation["date"] = text.parse_datetime(
+            deviation["publishedTime"])
+        deviation["category_path"] = "/".join(
+            extended[key]["displayNameEn"]
+            for key in ("typeFacet", "contentFacet", "categoryFacet")
+            if key in extended
+        )
+
+        # extract download target
+        target = files[-1]
+        name = files[0]["src"]
+
+        if target["type"] == "gif":
+            pass
+        elif target["type"] == "video":
+            # select largest video
+            target = max(
+                files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
+            name = target["src"]
+        elif target["type"] == "flash":
+            if target["src"].startswith("https://sandbox.deviantart.com"):
+                # extract SWF file from "sandbox"
+                target["src"] = text.extract(
+                    self.request(target["src"]).text,
+                    'id="sandboxembed" src="', '"',
+                )[0]
+        elif "download" in extended:
+            target = extended["download"]
+            target["src"] = target["url"]
+            del target["url"]
+
+        # url rewrites
+        if target["src"].startswith("https://images-wixmp-"):
+            if deviation["index"] <= 790677560:
+                # https://github.com/r888888888/danbooru/issues/4069
+                target["src"] = re.sub(
+                    r"(/f/[^/]+/[^/]+)/v\d+/.*",
+                    r"/intermediary\1", target["src"])
+            if self.quality:
+                target["src"] = re.sub(
+                    r"q_\d+", self.quality, target["src"])
+
+        text.nameext_from_url(name, target)
+        if target["filename"].endswith("-150"):
+            target["filename"] = target["filename"][:-4]
+        if not target["filename"].count("-"):
+            name, _, hid = target["filename"].rpartition("_")
+            target["filename"] = name + "-" + hid
+        deviation["target"] = target
+        deviation["filename"] = target["filename"]
+        deviation["extension"] = target["extension"] = (
+            text.ext_from_url(target["src"]))
+        return deviation
+
+
+class DeviantartDeviationExtractor(DeviantartExtractorV2):
+    """Extractor for single deviations"""
+    subcategory = "deviation"
+    archive_fmt = "{index}.{extension}"
+    pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
+    test = (
+        (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
+            "options": (("original", 0),),
+            "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+        }),
+        ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
+            "count": 0,
+        }),
+        (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
+            "pattern": (r"https://www.deviantart.com/download/261986576"
+                        r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+        }),
+        # wixmp URL rewrite
+        (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
+            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+                        r"/intermediary/f/[^/]+/[^.]+\.jpg$")
+        }),
+        # wixmp URL rewrite v2 (#369)
+        (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
+            "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+        }),
+        # non-download URL for GIFs (#242)
+        (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
+            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+                        r"/f/[^/]+/[^.]+\.gif\?token="),
+        }),
+        # external URLs from description (#302)
+        (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
+            "options": (("extra", 1), ("original", 0)),
+            "pattern": r"https?://sta\.sh/\w+$",
+            "range": "2-",
+            "count": 4,
+        }),
+        # video
+        ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
+            "url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
+            "keyword": {
+                "target": {
+                    "duration": 306,
+                    "extension": "mp4",
+                    "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
+                    "filesize": 9963639,
+                    "quality": "1080p",
+                    "src": str,
+                    "type": "video",
+                },
+            }
+        }),
+        # archive
+        ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
+            "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
+        }),
+        # swf
+        ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
+            "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
+        }),
+        # old-style URLs
+        ("https://shimoda7.deviantart.com"
+         "/art/For-the-sake-of-a-memory-10073852"),
+        ("https://myria-moon.deviantart.com"
+         "/art/Aime-Moi-part-en-vadrouille-261986576"),
+        ("https://zzz.deviantart.com/art/zzz-1234567890"),
+    )
+
+    skip = Extractor.skip
+
+    def __init__(self, match):
+        DeviantartExtractorV2.__init__(self, match)
+        self.type = match.group(3)
+        self.deviation_id = match.group(4)
+
+    def deviations(self):
+        return ({
+            "deviationId": self.deviation_id,
+            "author"     : {"username": self.user},
+            "isJournal"  : self.type == "journal",
+        },)
+
+
+class DeviantartScrapsExtractor(DeviantartExtractorV2):
+    """Extractor for an artist's scraps"""
+    subcategory = "scraps"
+    directory_fmt = ("{category}", "{username}", "Scraps")
+    archive_fmt = "s_{username}_{index}.{extension}"
+    pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
+    test = (
+        ("https://www.deviantart.com/shimoda7/gallery/scraps", {
+            "count": 12,
+        }),
+        ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
+        ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
+    )
+
+    def deviations(self):
+        url = self.root + "/_napi/da-user-profile/api/gallery/contents"
+        params = {
+            "username"     : self.user,
+            "offset"       : self.offset,
+            "limit"        : "24",
+            "scraps_folder": "true",
+        }
+        headers = {
+            "Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
+        }
+
+        while True:
+            data = self.request(url, params=params, headers=headers).json()
+
+            for obj in data["results"]:
+                yield obj["deviation"]
+
+            if not data["hasMore"]:
+                return
+            params["offset"] = data["nextOffset"]
+
+
 class DeviantartAPI():
     """Minimal interface for the DeviantArt API
 
@@ -805,7 +943,7 @@ class DeviantartAPI():
 
             self.authenticate(None if public else self.refresh_token)
             response = self.extractor.request(
-                url, headers=self.headers, params=params, fatal=False)
+                url, headers=self.headers, params=params, fatal=None)
             data = response.json()
             status = response.status_code
author	Unit 193 <unit193@ubuntu.com>	2019-08-26 19:34:45 -0400
committer	Unit 193 <unit193@ubuntu.com>	2019-08-26 19:34:45 -0400
commit	b75d158d014d6c43d7d785c46c9372a9cf84d144 (patch)
tree	7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d /gallery_dl/extractor/deviantart.py
parent	64ad8e7bd15df71ab1116eede414558631bcad32 (diff)