New upstream version 1.10.2upstream/1.10.2

author: Unit 193 <unit193@ubuntu.com> 2019-08-26 19:34:45 -0400
committer: Unit 193 <unit193@ubuntu.com> 2019-08-26 19:34:45 -0400
commit: b75d158d014d6c43d7d785c46c9372a9cf84d144 (patch)
tree: 7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d /gallery_dl/extractor
parent: 64ad8e7bd15df71ab1116eede414558631bcad32 (diff)
24 files changed, 673 insertions, 379 deletions
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 5ea835f..5e2480a 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -21,12 +21,12 @@ class AdultempireGalleryExtractor(GalleryExtractor):
     test = (
         ("https://www.adultempire.com/5998/gallery.html", {
             "range": "1",
-            "keyword": "0533ef1184892be8ac02b17286797c95f389ba63",
+            "keyword": "25c8171f5623678491a0d7bdf38a7a6ebfa4a361",
             "content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
         }),
         ("https://www.adultdvdempire.com/5683/gallery.html", {
             "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
-            "keyword": "59fe5d95929efc5040a819a5f77aba7a022bb85a",
+            "keyword": "0fe9a6e3f0a331b95ba77f66a643705ca86e8ec5",
         }),
     )
 
@@ -42,8 +42,8 @@ class AdultempireGalleryExtractor(GalleryExtractor):
             "studio"    : extr(">studio</small>", "<").strip(),
             "date"      : text.parse_datetime(extr(
                 ">released</small>", "<").strip(), "%m/%d/%Y"),
-            "actors"    : text.split_html(extr(
-                '<ul class="item-details item-cast-list ', '</ul>'))[1:],
+            "actors"    : sorted(text.split_html(extr(
+                '<ul class="item-details item-cast-list ', '</ul>'))[1:]),
         }
 
     def images(self, page):
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index f7b3bc1..2892bd4 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -41,6 +41,7 @@ class ArtstationExtractor(Extractor):
                     player = adict["player_embedded"]
                     url = text.extract(player, 'src="', '"')[0]
                     if not url.startswith(self.root):
+                        asset["extension"] = None
                         yield Message.Url, "ytdl:" + url, asset
                         continue
 
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index c63085a..54a8878 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -41,10 +41,8 @@ class BooruExtractor(SharedConfigMixin, Extractor):
         return pages * self.per_page
 
     def items(self):
-        data = self.get_metadata()
-
         yield Message.Version, 1
-        yield Message.Directory, data
+        data = self.get_metadata()
 
         self.reset_page()
         while True:
@@ -59,9 +57,11 @@ class BooruExtractor(SharedConfigMixin, Extractor):
                 if url.startswith("/"):
                     url = text.urljoin(self.api_url, url)
                 image.update(data)
+                text.nameext_from_url(url, image)
                 if self.extags:
                     self.extended_tags(image)
-                yield Message.Url, url, text.nameext_from_url(url, image)
+                yield Message.Directory, image
+                yield Message.Url, url, image
 
             if len(images) < self.per_page:
                 return
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5c40e2a..a90af1c 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -87,7 +87,8 @@ class Extractor():
                 raise exception.HttpError(exc)
             else:
                 code = response.status_code
-                if 200 <= code < 400 or not fatal and \
+                if 200 <= code < 400 or fatal is None and \
+                        (400 <= code < 500) or not fatal and \
                         (400 <= code < 429 or 431 <= code < 500):
                     if encoding:
                         response.encoding = encoding
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 63e2913..bd1299b 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -27,7 +27,7 @@ BASE_PATTERN = (
 
 
 class DeviantartExtractor(Extractor):
-    """Base class for deviantart extractors"""
+    """Base class for deviantart extractors using the OAuth API"""
     category = "deviantart"
     directory_fmt = ("{category}", "{author[username]!l}")
     filename_fmt = "{category}_{index}_{title}.{extension}"
@@ -38,11 +38,15 @@ class DeviantartExtractor(Extractor):
         self.offset = 0
         self.flat = self.config("flat", True)
         self.extra = self.config("extra", False)
+        self.quality = self.config("quality", "100")
         self.original = self.config("original", True)
         self.user = match.group(1) or match.group(2)
         self.group = False
         self.api = DeviantartAPI(self)
 
+        if self.quality:
+            self.quality = "q_{}".format(self.quality)
+
         if self.original != "image":
             self._update_content = self._update_content_default
         else:
@@ -81,12 +85,15 @@ class DeviantartExtractor(Extractor):
                         text.ext_from_url(content["src"]) != "gif":
                     self._update_content(deviation, content)
 
-                if deviation["index"] <= 790677560 and \
-                        content["src"].startswith("https://images-wixmp-"):
-                    # https://github.com/r888888888/danbooru/issues/4069
-                    content["src"] = re.sub(
-                        r"(/f/[^/]+/[^/]+)/v\d+/.*",
-                        r"/intermediary\1", content["src"])
+                if content["src"].startswith("https://images-wixmp-"):
+                    if deviation["index"] <= 790677560:
+                        # https://github.com/r888888888/danbooru/issues/4069
+                        content["src"] = re.sub(
+                            r"(/f/[^/]+/[^/]+)/v\d+/.*",
+                            r"/intermediary\1", content["src"])
+                    if self.quality:
+                        content["src"] = re.sub(
+                            r"q_\d+", self.quality, content["src"])
 
                 yield self.commit(deviation, content)
 
@@ -133,8 +140,16 @@ class DeviantartExtractor(Extractor):
     @staticmethod
     def commit(deviation, target):
         url = target["src"]
-        deviation["target"] = text.nameext_from_url(url, target.copy())
-        deviation["extension"] = deviation["target"]["extension"]
+        thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url
+        target = text.nameext_from_url(thumb, target.copy())
+        if target["filename"].endswith("-150"):
+            target["filename"] = target["filename"][:-4]
+        if not target["filename"].count("-"):
+            name, _, hid = target["filename"].rpartition("_")
+            target["filename"] = name + "-" + hid
+        deviation["target"] = target
+        deviation["filename"] = target["filename"]
+        deviation["extension"] = target["extension"] = text.ext_from_url(url)
         return Message.Url, url, deviation
 
     def _commit_journal_html(self, deviation, journal):
@@ -225,14 +240,6 @@ class DeviantartExtractor(Extractor):
         if mtype and mtype.startswith("image/"):
             content.update(data)
 
-    def _html_request(self, url, **kwargs):
-        cookies = {"userinfo": (
-            '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0'
-            'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU'
-            ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}'
-        )}
-        return self.request(url, cookies=cookies, **kwargs)
-
 
 class DeviantartGalleryExtractor(DeviantartExtractor):
     """Extractor for all deviations from an artist's gallery"""
@@ -360,68 +367,6 @@ class DeviantartFolderExtractor(DeviantartExtractor):
         deviation["folder"] = self.folder
 
 
-class DeviantartDeviationExtractor(DeviantartExtractor):
-    """Extractor for single deviations"""
-    subcategory = "deviation"
-    archive_fmt = "{index}.{extension}"
-    pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)"
-    test = (
-        (("https://www.deviantart.com/shimoda7/art/"
-          "For-the-sake-of-a-memory-10073852"), {
-            "options": (("original", 0),),
-            "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
-        }),
-        ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
-            "exception": exception.NotFoundError,
-        }),
-        (("https://www.deviantart.com/myria-moon/art/"
-          "Aime-Moi-part-en-vadrouille-261986576"), {
-            "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
-                        r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
-        }),
-        # wixmp URL rewrite
-        (("https://www.deviantart.com/citizenfresh/art/"
-          "Hverarond-14-the-beauty-of-the-earth-789295466"), {
-            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
-                        r"/intermediary/f/[^/]+/[^.]+\.jpg$")
-        }),
-        # non-download URL for GIFs (#242)
-        (("https://www.deviantart.com/skatergators/art/"
-          "COM-Monique-Model-781571783"), {
-            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
-                        r"/f/[^/]+/[^.]+\.gif\?token="),
-        }),
-        # external URLs from description (#302)
-        (("https://www.deviantart.com/uotapo/art/"
-          "INANAKI-Memorial-Humane7-590297498"), {
-            "options": (("extra", 1), ("original", 0)),
-            "pattern": r"https?://sta\.sh/\w+$",
-            "range": "2-",
-            "count": 4,
-        }),
-        # old-style URLs
-        ("https://shimoda7.deviantart.com"
-         "/art/For-the-sake-of-a-memory-10073852"),
-        ("https://myria-moon.deviantart.com"
-         "/art/Aime-Moi-part-en-vadrouille-261986576"),
-        ("https://zzz.deviantart.com/art/zzz-1234567890"),
-    )
-
-    skip = Extractor.skip
-
-    def __init__(self, match):
-        DeviantartExtractor.__init__(self, match)
-        self.path = match.group(3)
-
-    def deviations(self):
-        url = "{}/{}/{}".format(self.root, self.user, self.path)
-        response = self._html_request(url, fatal=False)
-        deviation_id = text.extract(response.text, '//deviation/', '"')[0]
-        if response.status_code >= 400 or not deviation_id:
-            raise exception.NotFoundError("image")
-        return (self.api.deviation(deviation_id),)
-
-
 class DeviantartStashExtractor(DeviantartExtractor):
     """Extractor for sta.sh-ed deviations"""
     subcategory = "stash"
@@ -558,54 +503,6 @@ class DeviantartJournalExtractor(DeviantartExtractor):
         return self.api.browse_user_journals(self.user, self.offset)
 
 
-class DeviantartScrapsExtractor(DeviantartExtractor):
-    """Extractor for an artist's scraps"""
-    subcategory = "scraps"
-    directory_fmt = ("{category}", "{username}", "Scraps")
-    archive_fmt = "s_{username}_{index}.{extension}"
-    pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b"
-    test = (
-        ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", {
-            "count": 12,
-            "options": (("original", False),),
-        }),
-        ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
-    )
-
-    def deviations(self):
-        url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user)
-        page = self._html_request(url).text
-        csrf, pos = text.extract(page, '"csrf":"', '"')
-        iid , pos = text.extract(page, '"requestid":"', '"', pos)
-
-        url = "https://www.deviantart.com/dapi/v1/gallery/0"
-        data = {
-            "username": self.user,
-            "offset": self.offset,
-            "limit": "24",
-            "catpath": "scraps",
-            "_csrf": csrf,
-            "dapiIid": iid + "-jsok7403-1.1"
-        }
-
-        while True:
-            content = self.request(
-                url, method="POST", data=data).json()["content"]
-
-            for item in content["results"]:
-                if item["html"].startswith('<div class="ad-container'):
-                    continue
-                deviation_url = text.extract(item["html"], 'href="', '"')[0]
-                page = self._html_request(deviation_url).text
-                deviation_id = text.extract(page, '//deviation/', '"')[0]
-                if deviation_id:
-                    yield self.api.deviation(deviation_id)
-
-            if not content["has_more"]:
-                return
-            data["offset"] = content["next_offset"]
-
-
 class DeviantartPopularExtractor(DeviantartExtractor):
     """Extractor for popular deviations"""
     subcategory = "popular"
@@ -649,6 +546,247 @@ class DeviantartPopularExtractor(DeviantartExtractor):
         deviation["popular"] = self.popular
 
 
+class DeviantartExtractorV2(Extractor):
+    """Base class for deviantart extractors using the NAPI"""
+    category = "deviantart"
+    directory_fmt = ("{category}", "{author[username]!l}")
+    filename_fmt = "{category}_{index}_{title}.{extension}"
+    root = "https://www.deviantart.com"
+
+    def __init__(self, match=None):
+        Extractor.__init__(self, match)
+        self.offset = 0
+        self.extra = self.config("extra", False)
+        self.quality = self.config("quality", "100")
+        self.user = match.group(1) or match.group(2)
+
+        if self.quality:
+            self.quality = "q_{}".format(self.quality)
+
+    def items(self):
+        url = (
+            self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch"
+        )
+        params = {
+            "deviationid"    : None,
+            "username"       : None,
+            "type"           : None,
+            "include_session": "false",
+        }
+        headers = {
+            "Referer": self.root,
+        }
+
+        yield Message.Version, 1
+        for deviation in self.deviations():
+            params["deviationid"] = deviation["deviationId"]
+            params["username"] = deviation["author"]["username"]
+            params["type"] = "journal" if deviation["isJournal"] else "art"
+            data = self.request(url, params=params, headers=headers).json()
+
+            if "deviation" not in data:
+                self.log.warning("Skipping %s", params["deviationid"])
+                continue
+            deviation = self._extract(data)
+
+            yield Message.Directory, deviation
+            yield Message.Url, deviation["target"]["src"], deviation
+            if self.extra:
+                for match in DeviantartStashExtractor.pattern.finditer(
+                        deviation["description"]):
+                    deviation["_extractor"] = DeviantartStashExtractor
+                    yield Message.Queue, match.group(0), deviation
+
+    def _extract(self, data):
+        deviation = data["deviation"]
+        extended = deviation["extended"]
+        files = deviation["files"]
+        del deviation["extended"]
+        del deviation["files"]
+
+        # prepare deviation metadata
+        deviation["description"] = extended.get("description", "")
+        deviation["username"] = self.user.lower()
+        deviation["stats"] = extended["stats"]
+        deviation["stats"]["comments"] = data["comments"]["total"]
+        deviation["index"] = deviation["deviationId"]
+        deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
+        deviation["date"] = text.parse_datetime(
+            deviation["publishedTime"])
+        deviation["category_path"] = "/".join(
+            extended[key]["displayNameEn"]
+            for key in ("typeFacet", "contentFacet", "categoryFacet")
+            if key in extended
+        )
+
+        # extract download target
+        target = files[-1]
+        name = files[0]["src"]
+
+        if target["type"] == "gif":
+            pass
+        elif target["type"] == "video":
+            # select largest video
+            target = max(
+                files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
+            name = target["src"]
+        elif target["type"] == "flash":
+            if target["src"].startswith("https://sandbox.deviantart.com"):
+                # extract SWF file from "sandbox"
+                target["src"] = text.extract(
+                    self.request(target["src"]).text,
+                    'id="sandboxembed" src="', '"',
+                )[0]
+        elif "download" in extended:
+            target = extended["download"]
+            target["src"] = target["url"]
+            del target["url"]
+
+        # url rewrites
+        if target["src"].startswith("https://images-wixmp-"):
+            if deviation["index"] <= 790677560:
+                # https://github.com/r888888888/danbooru/issues/4069
+                target["src"] = re.sub(
+                    r"(/f/[^/]+/[^/]+)/v\d+/.*",
+                    r"/intermediary\1", target["src"])
+            if self.quality:
+                target["src"] = re.sub(
+                    r"q_\d+", self.quality, target["src"])
+
+        text.nameext_from_url(name, target)
+        if target["filename"].endswith("-150"):
+            target["filename"] = target["filename"][:-4]
+        if not target["filename"].count("-"):
+            name, _, hid = target["filename"].rpartition("_")
+            target["filename"] = name + "-" + hid
+        deviation["target"] = target
+        deviation["filename"] = target["filename"]
+        deviation["extension"] = target["extension"] = (
+            text.ext_from_url(target["src"]))
+        return deviation
+
+
+class DeviantartDeviationExtractor(DeviantartExtractorV2):
+    """Extractor for single deviations"""
+    subcategory = "deviation"
+    archive_fmt = "{index}.{extension}"
+    pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
+    test = (
+        (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
+            "options": (("original", 0),),
+            "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+        }),
+        ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
+            "count": 0,
+        }),
+        (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
+            "pattern": (r"https://www.deviantart.com/download/261986576"
+                        r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+        }),
+        # wixmp URL rewrite
+        (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
+            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+                        r"/intermediary/f/[^/]+/[^.]+\.jpg$")
+        }),
+        # wixmp URL rewrite v2 (#369)
+        (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
+            "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+        }),
+        # non-download URL for GIFs (#242)
+        (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
+            "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+                        r"/f/[^/]+/[^.]+\.gif\?token="),
+        }),
+        # external URLs from description (#302)
+        (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
+            "options": (("extra", 1), ("original", 0)),
+            "pattern": r"https?://sta\.sh/\w+$",
+            "range": "2-",
+            "count": 4,
+        }),
+        # video
+        ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
+            "url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
+            "keyword": {
+                "target": {
+                    "duration": 306,
+                    "extension": "mp4",
+                    "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
+                    "filesize": 9963639,
+                    "quality": "1080p",
+                    "src": str,
+                    "type": "video",
+                },
+            }
+        }),
+        # archive
+        ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
+            "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
+        }),
+        # swf
+        ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
+            "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
+        }),
+        # old-style URLs
+        ("https://shimoda7.deviantart.com"
+         "/art/For-the-sake-of-a-memory-10073852"),
+        ("https://myria-moon.deviantart.com"
+         "/art/Aime-Moi-part-en-vadrouille-261986576"),
+        ("https://zzz.deviantart.com/art/zzz-1234567890"),
+    )
+
+    skip = Extractor.skip
+
+    def __init__(self, match):
+        DeviantartExtractorV2.__init__(self, match)
+        self.type = match.group(3)
+        self.deviation_id = match.group(4)
+
+    def deviations(self):
+        return ({
+            "deviationId": self.deviation_id,
+            "author"     : {"username": self.user},
+            "isJournal"  : self.type == "journal",
+        },)
+
+
+class DeviantartScrapsExtractor(DeviantartExtractorV2):
+    """Extractor for an artist's scraps"""
+    subcategory = "scraps"
+    directory_fmt = ("{category}", "{username}", "Scraps")
+    archive_fmt = "s_{username}_{index}.{extension}"
+    pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
+    test = (
+        ("https://www.deviantart.com/shimoda7/gallery/scraps", {
+            "count": 12,
+        }),
+        ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
+        ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
+    )
+
+    def deviations(self):
+        url = self.root + "/_napi/da-user-profile/api/gallery/contents"
+        params = {
+            "username"     : self.user,
+            "offset"       : self.offset,
+            "limit"        : "24",
+            "scraps_folder": "true",
+        }
+        headers = {
+            "Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
+        }
+
+        while True:
+            data = self.request(url, params=params, headers=headers).json()
+
+            for obj in data["results"]:
+                yield obj["deviation"]
+
+            if not data["hasMore"]:
+                return
+            params["offset"] = data["nextOffset"]
+
+
 class DeviantartAPI():
     """Minimal interface for the DeviantArt API
 
@@ -805,7 +943,7 @@ class DeviantartAPI():
 
             self.authenticate(None if public else self.refresh_token)
             response = self.extractor.request(
-                url, headers=self.headers, params=params, fatal=False)
+                url, headers=self.headers, params=params, fatal=None)
             data = response.json()
             status = response.status_code
 
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index ce2e83b..4ec7f00 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -33,16 +33,16 @@ class GelbooruExtractor(booru.XmlParserMixin,
             self.session.cookies["fringeBenefits"] = "yup"
 
     def items_noapi(self):
-        data = self.get_metadata()
-
         yield Message.Version, 1
-        yield Message.Directory, data
+        data = self.get_metadata()
 
         for post in self.get_posts():
             post = self.get_post_data(post)
             url = post["file_url"]
             post.update(data)
-            yield Message.Url, url, text.nameext_from_url(url, post)
+            text.nameext_from_url(url, post)
+            yield Message.Directory, post
+            yield Message.Url, url, post
 
     def get_posts(self):
         """Return an iterable containing all relevant post objects"""
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index c112465..e4f18b3 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -21,7 +21,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
     test = (
         ("https://hitomi.la/galleries/867789.html", {
             "url": "cb759868d090fe0e2655c3e29ebf146054322b6d",
-            "keyword": "067b5d9b9c0f98530cd5dd2444e0f5a5b4b00d38",
+            "keyword": "d097a8db8e810045131b4510c41714004f9eff3a",
         }),
         ("https://hitomi.la/galleries/1036181.html", {
             # "aa" subdomain for gallery-id ending in 1 (#142)
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 6980185..76b2c38 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -41,14 +41,14 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
     pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
     test = (
         ("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
-            "url": "fb01925129a1ff1941762eaa3a2783a66de6847f",
+            "url": "76d976788ae2757ac81694736b07b72356f5c4c8",
             "keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a",
             "content": "596e6bfa157f2c7169805d50075c2986549973a8",
         }),
         ("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
             #  more than 100 images; see issue #219
             "count": 107,
-            "url": "f92ce5b17676b6ea69288f0aef26f4cdbea7fd8d",
+            "url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
         }),
         ("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
             "exception": exception.NotFoundError,
@@ -108,7 +108,7 @@ class ImagebamImageExtractor(ImagebamExtractor):
                r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
     test = (
         ("http://www.imagebam.com/image/94d56c502511890", {
-            "url": "b384893c35a01a09c58018db71ddc4cf2480be95",
+            "url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
             "keyword": "4263d4840007524129792b8587a562b5d20c2687",
             "content": "0c8768055e4e20e7c7259608b67799171b691140",
         }),
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 442634b..4aa670b 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -17,6 +17,7 @@ import json
 class ImgbbExtractor(Extractor):
     """Base class for imgbb extractors"""
     category = "imgbb"
+    directory_fmt = ("{category}", "{user}")
     filename_fmt = "{title} {id}.{extension}"
     archive_fmt = "{id}"
     root = "https://imgbb.com"
@@ -145,7 +146,6 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
 class ImgbbUserExtractor(ImgbbExtractor):
     """Extractor for user profiles in imgbb.com"""
     subcategory = "user"
-    directory_fmt = ("{category}", "{user}")
     pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$"
     test = ("https://folkie.imgbb.com", {
         "range": "1-80",
@@ -177,3 +177,34 @@ class ImgbbUserExtractor(ImgbbExtractor):
             "params_hidden[userid]": user,
             "params_hidden[from]"  : "user",
         })
+
+
+class ImgbbImageExtractor(ImgbbExtractor):
+    subcategory = "image"
+    pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?&#]+)"
+    test = ("https://ibb.co/NLZHgqS", {
+        "url": "fbca86bac09de6fc0304054b2170b423ca1e84fa",
+        "keyword": "5d70e779bad03b2dc5273b627638045168671157",
+    })
+
+    def __init__(self, match):
+        ImgbbExtractor.__init__(self, match)
+        self.image_id = match.group(1)
+
+    def items(self):
+        url = "https://ibb.co/" + self.image_id
+        extr = text.extract_from(self.request(url).text)
+
+        image = {
+            "id"    : self.image_id,
+            "title" : text.unescape(extr('"og:title" content="', '"')),
+            "url"   : extr('"og:image" content="', '"'),
+            "width" : text.parse_int(extr('"og:image:width" content="', '"')),
+            "height": text.parse_int(extr('"og:image:height" content="', '"')),
+            "user"  : extr('rel="author">', '<').lower(),
+        }
+        image["extension"] = text.ext_from_url(image["url"])
+
+        yield Message.Version, 1
+        yield Message.Directory, image
+        yield Message.Url, image["url"], image
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index c5e3d17..8523523 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -20,13 +20,19 @@ class ImgurExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.item_id = match.group(1)
+        self.key = match.group(1)
         self.mp4 = self.config("mp4", True)
 
-    def _get_data(self, path):
+    def _extract_data(self, path):
         response = self.request(self.root + path, notfound=self.subcategory)
-        data = text.extract(response.text, "image               : ", ",\n")[0]
-        return self._clean(json.loads(data))
+        data = json.loads(text.extract(
+            response.text, "image               : ", ",\n")[0])
+        try:
+            del data["adConfig"]
+            del data["isAd"]
+        except KeyError:
+            pass
+        return data
 
     def _prepare(self, image):
         image["ext"] = image["ext"].partition("?")[0]
@@ -37,18 +43,9 @@ class ImgurExtractor(Extractor):
         image["extension"] = image["ext"][1:]
         return url
 
-    @staticmethod
-    def _clean(data):
-        try:
-            del data["adConfig"]
-            del data["isAd"]
-        except KeyError:
-            pass
-        return data
-
 
 class ImgurImageExtractor(ImgurExtractor):
-    """Extractor for individual images from imgur.com"""
+    """Extractor for individual images on imgur.com"""
     subcategory = "image"
     filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
     archive_fmt = "{hash}"
@@ -101,22 +98,21 @@ class ImgurImageExtractor(ImgurExtractor):
     )
 
     def items(self):
-        image = self._get_data("/" + self.item_id)
+        image = self._extract_data("/" + self.key)
         url = self._prepare(image)
-
         yield Message.Version, 1
         yield Message.Directory, image
         yield Message.Url, url, image
 
 
 class ImgurAlbumExtractor(ImgurExtractor):
-    """Extractor for image albums from imgur.com"""
+    """Extractor for imgur albums"""
     subcategory = "album"
     directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
     filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
     archive_fmt = "{album[hash]}_{hash}"
     pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
-               r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})")
+               r"/(?:a|t/unmuted)/(\w{7}|\w{5})")
     test = (
         ("https://imgur.com/a/TcBmP", {
             "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
@@ -147,7 +143,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
                 "width": int,
             },
         }),
-        ("https://imgur.com/gallery/eD9CT", {  # large album
+        ("https://imgur.com/a/eD9CT", {  # large album
             "url": "4ee94de31ff26be416271bc0b1ea27b9349c9937",
         }),
         ("https://imgur.com/a/RhJXhVT/all", {  # 7 character album hash
@@ -164,13 +160,13 @@ class ImgurAlbumExtractor(ImgurExtractor):
     )
 
     def items(self):
-        album = self._get_data("/a/" + self.item_id + "/all")
+        album = self._extract_data("/a/" + self.key + "/all")
         images = album["album_images"]["images"]
         del album["album_images"]
 
         if int(album["num_images"]) > len(images):
             url = "{}/ajaxalbums/getimages/{}/hit.json".format(
-                self.root, self.item_id)
+                self.root, self.key)
             images = self.request(url).json()["data"]["images"]
 
         yield Message.Version, 1
@@ -180,3 +176,32 @@ class ImgurAlbumExtractor(ImgurExtractor):
             image["num"] = num
             image["album"] = album
             yield Message.Url, url, image
+
+
+class ImgurGalleryExtractor(ImgurExtractor):
+    """Extractor for imgur galleries"""
+    subcategory = "gallery"
+    pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
+               r"/gallery/(\w{7}|\w{5})")
+    test = (
+        ("https://imgur.com/gallery/zf2fIms", {  # non-album gallery (#380)
+            "pattern": "https://imgur.com/zf2fIms",
+        }),
+        ("https://imgur.com/gallery/eD9CT", {
+            "pattern": "https://imgur.com/a/eD9CT",
+        }),
+    )
+
+    def items(self):
+        url = self.root + "/a/" + self.key
+        with self.request(url, method="HEAD", fatal=False) as response:
+            code = response.status_code
+
+        if code < 400:
+            extr = ImgurAlbumExtractor
+        else:
+            extr = ImgurImageExtractor
+            url = self.root + "/" + self.key
+
+        yield Message.Version, 1
+        yield Message.Queue, url, {"_extractor": extr}
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 475e24b..e5cfe8b 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -8,11 +8,10 @@
 
 """Extract images from https://www.instagram.com/"""
 
-import hashlib
-import json
 from .common import Extractor, Message
 from .. import text, exception
 from ..cache import cache
+import json
 
 
 class InstagramExtractor(Extractor):
@@ -37,10 +36,11 @@ class InstagramExtractor(Extractor):
             data.update(metadata)
             yield Message.Directory, data
 
-            if data['typename'] == 'GraphImage':
+            if data['typename'] in ('GraphImage', 'GraphStoryImage', 'GraphStoryVideo'):
                 yield Message.Url, data['display_url'], \
                     text.nameext_from_url(data['display_url'], data)
             elif data['typename'] == 'GraphVideo':
+                data["extension"] = None
                 yield Message.Url, \
                     'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data
 
@@ -140,33 +140,113 @@ class InstagramExtractor(Extractor):
 
         return medias
 
+    def _extract_stories(self, url):
+        if self.highlight_id:
+            user_id = ''
+            highlight_id = '"{}"'.format(self.highlight_id)
+            query_hash = '30a89afdd826d78a5376008a7b81c205'
+        else:
+            page = self.request(url).text
+            shared_data = self._extract_shared_data(page)
+
+            # If no stories are present the URL redirects to `ProfilePage'
+            if 'StoriesPage' not in shared_data['entry_data']:
+                return []
+
+            user_id = '"{}"'.format(
+                shared_data['entry_data']['StoriesPage'][0]['user']['id'])
+            highlight_id = ''
+            query_hash = 'cda12de4f7fd3719c0569ce03589f4c4'
+
+        variables = (
+            '{{'
+            '"reel_ids":[{}],"tag_names":[],"location_ids":[],'
+            '"highlight_reel_ids":[{}],"precomposed_overlay":true,'
+            '"show_story_viewer_list":true,'
+            '"story_viewer_fetch_count":50,"story_viewer_cursor":"",'
+            '"stories_video_dash_manifest":false}}'
+        ).format(user_id, highlight_id)
+        headers = {
+            "X-Requested-With": "XMLHttpRequest",
+        }
+        url = '{}/graphql/query/?query_hash={}&variables={}'.format(
+            self.root,
+            query_hash,
+            variables,
+        )
+        shared_data = self.request(url, headers=headers).json()
+
+        # If there are stories present but the user is not authenticated or
+        # does not have permissions no stories are returned.
+        if not shared_data['data']['reels_media']:
+            return []   # no stories present
+
+        medias = []
+        for media in shared_data['data']['reels_media'][0]['items']:
+            media_data = {
+                'owner_id': media['owner']['id'],
+                'username': media['owner']['username'],
+                'date': text.parse_timestamp(media['taken_at_timestamp']),
+                'expires': text.parse_timestamp(media['expiring_at_timestamp']),
+                'media_id': media['id'],
+                'typename': media['__typename'],
+            }
+            if media['__typename'] == 'GraphStoryImage':
+                media_data.update({
+                    'display_url': media['display_url'],
+                    'height': text.parse_int(media['dimensions']['height']),
+                    'width': text.parse_int(media['dimensions']['width']),
+                })
+            elif media['__typename'] == 'GraphStoryVideo':
+                vr = media['video_resources'][0]
+                media_data.update({
+                    'duration': text.parse_float(media['video_duration']),
+                    'display_url': vr['src'],
+                    'height': text.parse_int(vr['config_height']),
+                    'width': text.parse_int(vr['config_width']),
+                })
+            medias.append(media_data)
+
+        return medias
+
     def _extract_page(self, url, page_type):
         shared_data_fields = {
             'ProfilePage': {
+                'page': 'ProfilePage',
                 'node': 'user',
                 'node_id': 'id',
                 'edge_to_medias': 'edge_owner_to_timeline_media',
                 'variables_id': 'id',
-                'query_hash': '66eb9403e44cc12e5b5ecda48b667d41',
+                'query_hash': 'f2405b236d85e8296cf30347c9f08c2a',
+            },
+            'ProfileChannelPage': {
+                'page': 'ProfilePage',
+                'node': 'user',
+                'node_id': 'id',
+                'edge_to_medias': 'edge_felix_video_timeline',
+                'variables_id': 'id',
+                'query_hash': 'bc78b344a68ed16dd5d7f264681c4c76',
             },
             'TagPage': {
+                'page': 'TagPage',
                 'node': 'hashtag',
                 'node_id': 'name',
                 'edge_to_medias': 'edge_hashtag_to_media',
                 'variables_id': 'tag_name',
-                'query_hash': 'f92f56d47dc7a55b606908374b43a314',
+                'query_hash': 'f12c9ec5e46a3173b2969c712ad84744',
             },
         }
 
         page = self.request(url).text
         shared_data = self._extract_shared_data(page)
         psdf = shared_data_fields[page_type]
+        csrf = shared_data["config"]["csrf_token"]
 
         while True:
             # Deal with different structure of pages: the first page
             # has interesting data in `entry_data', next pages in `data'.
             if 'entry_data' in shared_data:
-                base_shared_data = shared_data['entry_data'][page_type][0]['graphql']
+                base_shared_data = shared_data['entry_data'][psdf['page']][0]['graphql']
 
                 # variables_id is available only in the first page
                 variables_id = base_shared_data[psdf['node']][psdf['node_id']]
@@ -192,7 +272,8 @@ class InstagramExtractor(Extractor):
             )
             headers = {
                 "X-Requested-With": "XMLHttpRequest",
-                "X-Instagram-GIS": hashlib.md5(variables.encode()).hexdigest(),
+                "X-CSRFToken": csrf,
+                "X-IG-App-ID": "936619743392459",
             }
             url = '{}/graphql/query/?query_hash={}&variables={}'.format(
                 self.root,
@@ -204,14 +285,20 @@ class InstagramExtractor(Extractor):
     def _extract_profilepage(self, url):
         yield from self._extract_page(url, 'ProfilePage')
 
+    def _extract_profilechannelpage(self, url):
+        yield from self._extract_page(url, 'ProfileChannelPage')
+
     def _extract_tagpage(self, url):
         yield from self._extract_page(url, 'TagPage')
 
+    def _extract_storiespage(self, url):
+        yield from self._extract_stories(url)
+
 
 class InstagramImageExtractor(InstagramExtractor):
     """Extractor for PostPage"""
     subcategory = "image"
-    pattern = r"(?:https?://)?(?:www\.)?instagram\.com/p/([^/?&#]+)"
+    pattern = r"(?:https?://)?(?:www\.)?instagram\.com/(?:p|tv)/([^/?&#]+)"
     test = (
         # GraphImage
         ("https://www.instagram.com/p/BqvsDleB3lV/", {
@@ -258,6 +345,22 @@ class InstagramImageExtractor(InstagramExtractor):
             }
         }),
 
+        # GraphVideo (IGTV)
+        ("https://www.instagram.com/tv/BkQjCfsBIzi/", {
+            "url": "64208f408e11cbbca86c2df4488e90262ae9d9ec",
+            "keyword": {
+                "date": "type:datetime",
+                "description": str,
+                "height": int,
+                "likes": int,
+                "media_id": "1806097553666903266",
+                "shortcode": "BkQjCfsBIzi",
+                "typename": "GraphVideo",
+                "username": "instagram",
+                "width": int,
+            }
+        }),
+
         # GraphSidecar with 2 embedded GraphVideo objects
         ("https://www.instagram.com/p/BtOvDOfhvRr/", {
             "count": 2,
@@ -283,10 +386,11 @@ class InstagramUserExtractor(InstagramExtractor):
     """Extractor for ProfilePage"""
     subcategory = "user"
     pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
-               r"/(?!p/|explore/|directory/|accounts/)([^/?&#]+)")
+               r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
+               r"([^/?&#]+)/?$")
     test = ("https://www.instagram.com/instagram/", {
-        "range": "1-12",
-        "count": ">= 12",
+        "range": "1-16",
+        "count": ">= 16",
     })
 
     def __init__(self, match):
@@ -298,6 +402,26 @@ class InstagramUserExtractor(InstagramExtractor):
         return self._extract_profilepage(url)
 
 
+class InstagramChannelExtractor(InstagramExtractor):
+    """Extractor for ProfilePage channel"""
+    subcategory = "channel"
+    pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+               r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
+               r"([^/?&#]+)/channel")
+    test = ("https://www.instagram.com/instagram/channel/", {
+        "range": "1-16",
+        "count": ">= 16",
+    })
+
+    def __init__(self, match):
+        InstagramExtractor.__init__(self, match)
+        self.username = match.group(1)
+
+    def instagrams(self):
+        url = '{}/{}/channel/'.format(self.root, self.username)
+        return self._extract_profilechannelpage(url)
+
+
 class InstagramTagExtractor(InstagramExtractor):
     """Extractor for TagPage"""
     subcategory = "tag"
@@ -305,8 +429,8 @@ class InstagramTagExtractor(InstagramExtractor):
     pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
                r"/explore/tags/([^/?&#]+)")
     test = ("https://www.instagram.com/explore/tags/instagram/", {
-        "range": "1-12",
-        "count": ">= 12",
+        "range": "1-16",
+        "count": ">= 16",
     })
 
     def __init__(self, match):
@@ -319,3 +443,22 @@ class InstagramTagExtractor(InstagramExtractor):
     def instagrams(self):
         url = '{}/explore/tags/{}/'.format(self.root, self.tag)
         return self._extract_tagpage(url)
+
+
+class InstagramStoriesExtractor(InstagramExtractor):
+    """Extractor for StoriesPage"""
+    subcategory = "stories"
+    pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+               r"/stories/([^/?&#]+)(?:/(\d+))?")
+    test = (
+        ("https://www.instagram.com/stories/instagram/"),
+        ("https://www.instagram.com/stories/highlights/18042509488170095/"),
+    )
+
+    def __init__(self, match):
+        InstagramExtractor.__init__(self, match)
+        self.username, self.highlight_id = match.groups()
+
+    def instagrams(self):
+        url = '{}/stories/{}/'.format(self.root, self.username)
+        return self._extract_storiespage(url)
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 879d38b..a73eb86 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -62,7 +62,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
     test = (
         ("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
             "url": "7e4984a271a1072ac6483e4228a045895aff86f3",
-            "keyword": "ab4e5b71583fd439b4c8012a642aa8b58d8d0758",
+            "keyword": "07c0b915f2ab1cc3bbf28b76e7950fccee1213f3",
             "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
         }),
         ("https://luscious.net/albums/virgin-killer-sweater_282582/", {
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 282c389..1ca1073 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -93,7 +93,7 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
     test = (
         ("https://blitzwuff.newgrounds.com/art", {
             "url": "24b19c4a135a09889fac7b46a74e427e4308d02b",
-            "keyword": "98566e0c8096a8099b8d71962fea7e31c8b098d4",
+            "keyword": "62981f7bdd66e1f1c72ab1d9b932423c156bc9a1",
         }),
         ("https://blitzwuff.newgrounds.com/"),
     )
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 4884497..ab5932d 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -11,6 +11,8 @@
 from .common import Extractor, Message
 from .. import text
 from ..cache import memcache
+import collections
+import json
 
 
 class PatreonExtractor(Extractor):
@@ -33,70 +35,92 @@ class PatreonExtractor(Extractor):
         for post in self.posts():
             yield Message.Directory, post
 
+            ids = set()
             post["num"] = 0
             content = post.get("content")
             postfile = post.get("post_file")
 
-            for url in text.extract_iter(content or "", 'src="', '"'):
+            for image in post["images"]:
+                url = image.get("download_url")
+                if not url:
+                    continue
+                ids.add(url.split("/")[-2])
+                name = image.get("file_name") or self._filename(url) or url
+
                 post["num"] += 1
-                yield Message.Url, url, text.nameext_from_url(url, post)
+                post["type"] = "image"
+                yield Message.Url, url, text.nameext_from_url(name, post)
 
-            if postfile:
+            if postfile and postfile["url"].split("/")[-2] not in ids:
                 post["num"] += 1
+                post["type"] = "postfile"
                 text.nameext_from_url(postfile["name"], post)
                 yield Message.Url, postfile["url"], post
 
             for attachment in post["attachments"]:
                 post["num"] += 1
+                post["type"] = "attachment"
                 text.nameext_from_url(attachment["name"], post)
                 yield Message.Url, attachment["url"], post
 
+            if content:
+                for url in text.extract_iter(content, 'src="', '"'):
+                    post["num"] += 1
+                    post["type"] = "content"
+                    yield Message.Url, url, text.nameext_from_url(url, post)
+
     def posts(self):
         """Return all relevant post objects"""
 
     def _pagination(self, url):
         headers = {"Referer": self.root}
-        empty = []
 
         while url:
             posts = self.request(url, headers=headers).json()
 
-            if "included" not in posts:
-                return
-
-            # collect attachments
-            attachments = {}
-            for inc in posts["included"]:
-                if inc["type"] == "attachment":
-                    attachments[inc["id"]] = inc["attributes"]
-
-            # update posts
-            for post in posts["data"]:
-                attr = post["attributes"]
-                attr["id"] = text.parse_int(post["id"])
-                attr["date"] = text.parse_datetime(
-                    attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
-                attr["creator"] = self._user(
-                    post["relationships"]["user"]["links"]["related"])
-
-                # add attachments to post attributes
-                files = post["relationships"].get("attachments")
-                if files:
-                    attr["attachments"] = [
-                        attachments[f["id"]]
-                        for f in files["data"]
-                    ]
-                else:
-                    attr["attachments"] = empty
-
-                yield attr
+            if "included" in posts:
+                included = self._transform(posts["included"])
+                for post in posts["data"]:
+                    yield self._process(post, included)
 
             if "links" not in posts:
                 return
             url = posts["links"].get("next")
 
+    def _process(self, post, included):
+        """Process and extend a 'post' object"""
+        attr = post["attributes"]
+        attr["id"] = text.parse_int(post["id"])
+        attr["images"] = self._files(post, included, "images")
+        attr["attachments"] = self._files(post, included, "attachments")
+        attr["date"] = text.parse_datetime(
+            attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+        attr["creator"] = self._user(
+            post["relationships"]["user"]["links"]["related"])
+        return attr
+
+    @staticmethod
+    def _transform(included):
+        """Transform 'included' into an easier to handle format"""
+        result = collections.defaultdict(dict)
+        for inc in included:
+            result[inc["type"]][inc["id"]] = inc["attributes"]
+        return result
+
+    @staticmethod
+    def _files(post, included, key):
+        """Build a list of files"""
+        files = post["relationships"].get(key)
+        if files and files.get("data"):
+            return [
+                included[file["type"]][file["id"]]
+                for file in files["data"]
+            ]
+        return []
+
     @memcache(keyarg=1)
     def _user(self, url):
+        """Fetch user information"""
         user = self.request(url).json()["data"]
         attr = user["attributes"]
         attr["id"] = user["id"]
@@ -104,14 +128,21 @@ class PatreonExtractor(Extractor):
             attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
         return attr
 
+    def _filename(self, url):
+        """Fetch filename from its Content-Disposition header"""
+        response = self.request(url, method="HEAD", fatal=False)
+        cd = response.headers.get("Content-Disposition")
+        return text.extract(cd, 'filename="', '"')[0]
+
     @staticmethod
     def _build_url(endpoint, query):
         return (
             "https://www.patreon.com/api/" + endpoint +
 
-            "?include=user,attachments,user_defined_tags,campaign,poll.choices"
-            ",poll.current_user_responses.user,poll.current_user_responses.cho"
-            "ice,poll.current_user_responses.poll,access_rules.tier.null"
+            "?include=user,images,attachments,user_defined_tags,campaign,poll."
+            "choices,poll.current_user_responses.user,poll.current_user_respon"
+            "ses.choice,poll.current_user_responses.poll,access_rules.tier.nul"
+            "l"
 
             "&fields[post]=change_visibility_at,comment_count,content,current_"
             "user_can_delete,current_user_can_view,current_user_has_liked,embe"
@@ -133,7 +164,8 @@ class PatreonCreatorExtractor(PatreonExtractor):
     """Extractor for a creator's works"""
     subcategory = "creator"
     pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
-               r"/(?!(?:home|join|login|signup)(?:$|[/?&#]))([^/?&#]+)/?")
+               r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
+               r"([^/?&#]+)/?")
     test = ("https://www.patreon.com/koveliana", {
         "range": "1-25",
         "count": ">= 25",
@@ -144,6 +176,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
             "creator": dict,
             "date": "type:datetime",
             "id": int,
+            "images": list,
             "like_count": int,
             "post_type": str,
             "published_at": str,
@@ -181,3 +214,26 @@ class PatreonUserExtractor(PatreonExtractor):
             "&filter[is_following]=true"
         ))
         return self._pagination(url)
+
+
+class PatreonPostExtractor(PatreonExtractor):
+    """Extractor for media from a single post"""
+    subcategory = "post"
+    pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
+               r"/posts/[^/?&#]*?(\d+)")
+    test = ("https://www.patreon.com/posts/precious-metal-23563293", {
+        "count": 4,
+    })
+
+    def __init__(self, match):
+        PatreonExtractor.__init__(self, match)
+        self.post_id = match.group(1)
+
+    def posts(self):
+        url = "{}/posts/{}".format(self.root, self.post_id)
+        page = self.request(url).text
+        data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
+        post = json.loads(data + "}")["post"]
+
+        included = self._transform(post["included"])
+        return (self._process(post["data"], included),)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 76d4dc4..4f8ee9c 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -18,8 +18,8 @@ class PixivExtractor(Extractor):
     """Base class for pixiv extractors"""
     category = "pixiv"
     directory_fmt = ("{category}", "{user[id]} {user[account]}")
-    filename_fmt = "{category}_{user[id]}_{id}{num}.{extension}"
-    archive_fmt = "{id}{num}.{extension}"
+    filename_fmt = "{id}_p{num}.{extension}"
+    archive_fmt = "{id}{suffix}.{extension}"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
@@ -40,9 +40,10 @@ class PixivExtractor(Extractor):
             del work["meta_single_page"]
             del work["image_urls"]
             del work["meta_pages"]
-            work["num"] = ""
+            work["num"] = 0
             work["tags"] = [tag["name"] for tag in work["tags"]]
             work["date"] = text.parse_datetime(work["create_date"])
+            work["suffix"] = ""
             work.update(metadata)
 
             yield Message.Directory, work
@@ -55,20 +56,17 @@ class PixivExtractor(Extractor):
                 url = ugoira["zip_urls"]["medium"].replace(
                     "_ugoira600x600", "_ugoira1920x1080")
                 work["frames"] = ugoira["frames"]
-                work["extension"] = "zip"
-                yield Message.Url, url, work
+                yield Message.Url, url, text.nameext_from_url(url, work)
 
             elif work["page_count"] == 1:
                 url = meta_single_page["original_image_url"]
-                work["extension"] = url.rpartition(".")[2]
-                yield Message.Url, url, work
+                yield Message.Url, url, text.nameext_from_url(url, work)
 
             else:
-                for num, img in enumerate(meta_pages):
+                for work["num"], img in enumerate(meta_pages):
                     url = img["image_urls"]["original"]
-                    work["num"] = "_p{:02}".format(num)
-                    work["extension"] = url.rpartition(".")[2]
-                    yield Message.Url, url, work
+                    work["suffix"] = "_p{:02}".format(work["num"])
+                    yield Message.Url, url, text.nameext_from_url(url, work)
 
     def works(self):
         """Return an iterable containing all relevant 'work'-objects"""
diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py
index fa4eb81..aa5c9c6 100644
--- a/gallery_dl/extractor/pururin.py
+++ b/gallery_dl/extractor/pururin.py
@@ -29,7 +29,7 @@ class PururinGalleryExtractor(GalleryExtractor):
                 "artist"    : ["Shoda Norihiro"],
                 "group"     : ["Obsidian Order"],
                 "parody"    : ["Kantai Collection"],
-                "characters": ["Iowa", "Teitoku"],
+                "characters": ["Admiral", "Iowa"],
                 "tags"      : list,
                 "type"      : "Doujinshi",
                 "collection": "",
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 59d502a..f97454b 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -117,6 +117,8 @@ class ReactorExtractor(SharedConfigMixin, Extractor):
             url = text.extract(image, ' src="', '"')[0]
             if not url:
                 continue
+            if url.startswith("//"):
+                url = "http:" + url
             width = text.extract(image, ' width="', '"')[0]
             height = text.extract(image, ' height="', '"')[0]
             image_id = url.rpartition("-")[2].partition(".")[0]
@@ -268,8 +270,8 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
             "keyword": "dbe148d576f2fc9431020c557ddb78f449e48c47",
         }),
         ("http://joyreactor.com/post/3668724", {  # youtube embed
-            "url": "be2589e2e8f3ffcaf41b34bc28bfad850ccea34a",
-            "keyword": "da61b9e2887db95759950df5fb89c9d32f8e7651",
+            "url": "bf1666eddcff10c9b58f6be63fa94e4e13074214",
+            "keyword": "989112c7888e9cc80fd35870180c6c98165d953b",
         }),
         ("http://joyreactor.cc/post/1299", {  # "malformed" JSON
             "url": "ac900743ed7cf1baf3db3b531c3bc414bf1ffcde",
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 2ba4b99..94e95e8 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -234,7 +234,7 @@ class RedditAPI():
         url = "https://oauth.reddit.com" + endpoint
         params["raw_json"] = 1
         self.authenticate()
-        response = self.extractor.request(url, params=params, fatal=False)
+        response = self.extractor.request(url, params=params, fatal=None)
         remaining = response.headers.get("x-ratelimit-remaining")
         if remaining and float(remaining) < 2:
             wait = int(response.headers["x-ratelimit-reset"])
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index da9735e..bb8a2ae 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -40,17 +40,18 @@ class SankakuExtractor(SharedConfigMixin, Extractor):
 
     def items(self):
         self.login()
-        data = self.get_metadata()
 
         yield Message.Version, 1
-        yield Message.Directory, data
+        data = self.get_metadata()
 
         for post_id in util.advance(self.get_posts(), self.start_post):
             self.wait()
             post = self.get_post_data(post_id)
             url = post["file_url"]
             post.update(data)
-            yield Message.Url, url, text.nameext_from_url(url, post)
+            text.nameext_from_url(url, post)
+            yield Message.Directory, post
+            yield Message.Url, url, post
 
     def skip(self, num):
         self.start_post += num
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index afd4eaa..38b7813 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -78,6 +78,7 @@ class SexcomExtractor(Extractor):
                     path += "/hd"
                 data["url"] = self.root + path
             else:
+                data["extension"] = None
                 data["url"] = "ytdl:" + text.extract(
                     extr('<iframe', '>'), ' src="', '"')[0]
         else:
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index 5ad372d..8567155 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -8,14 +8,16 @@
 
 """Extract hentai-manga from https://www.simply-hentai.com/"""
 
-from .common import GalleryExtractor, Extractor, Message
+from .common import GalleryExtractor
 from .. import text, util, exception
+import json
 
 
 class SimplyhentaiGalleryExtractor(GalleryExtractor):
     """Extractor for image galleries from simply-hentai.com"""
     category = "simplyhentai"
     archive_fmt = "{image_id}"
+    root = "https://www.simply-hentai.com"
     pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
                r"(?!/(?:album|gifs?|images?|series)(?:/|$))"
                r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)")
@@ -23,7 +25,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
         (("https://original-work.simply-hentai.com"
           "/amazon-no-hiyaku-amazon-elixir"), {
             "url": "258289249990502c3138719cb89e995a60861e49",
-            "keyword": "eba83ccdbab3022a2280c77aa747f9458196138b",
+            "keyword": "8b2400e4b466e8f46802fa5a6b917d2788bb7e8e",
         }),
         ("https://www.simply-hentai.com/notfound", {
             "exception": exception.GalleryDLException,
@@ -40,144 +42,30 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
         self.session.headers["Referer"] = url
 
     def metadata(self, page):
-        extr = text.extract_from(page)
-        split = text.split_html
-
-        title = extr('<meta property="og:title" content="', '"')
-        if not title:
+        path = text.extract(page, '<a class="preview" href="', '"')[0]
+        if not path:
             raise exception.NotFoundError("gallery")
-        data = {
-            "title"     : text.unescape(title),
-            "gallery_id": text.parse_int(extr('/Album/', '/')),
-            "parody"    : split(extr('box-title">Series</div>', '</div>')),
-            "language"  : text.remove_html(extr(
-                'box-title">Language</div>', '</div>')) or None,
-            "characters": split(extr('box-title">Characters</div>', '</div>')),
-            "tags"      : split(extr('box-title">Tags</div>', '</div>')),
-            "artist"    : split(extr('box-title">Artists</div>', '</div>')),
-            "date"      : text.parse_datetime(text.remove_html(
-                extr('Uploaded', '</div>')), "%d.%m.%Y"),
+        page = self.request(self.root + path).text
+        data = json.loads(text.unescape(text.extract(
+            page, 'data-react-class="Reader" data-react-props="', '"')[0]))
+        self.manga = manga = data["manga"]
+
+        return {
+            "title"     : manga["title"],
+            "parody"    : manga["series"]["title"],
+            "language"  : manga["language"]["name"],
+            "lang"      : util.language_to_code(manga["language"]["name"]),
+            "characters": [x["name"] for x in manga["characters"]],
+            "tags"      : [x["name"] for x in manga["tags"]],
+            "artist"    : [x["name"] for x in manga["artists"]],
+            "gallery_id": text.parse_int(text.extract(
+                manga["images"][0]["sizes"]["full"], "/Album/", "/")[0]),
+            "date"      : text.parse_datetime(
+                manga["publish_date"], "%Y-%m-%dT%H:%M:%S.%f%z"),
         }
-        data["lang"] = util.language_to_code(data["language"])
-        return data
 
     def images(self, _):
-        url = self.chapter_url + "/all-pages"
-        headers = {"Accept": "application/json"}
-        images = self.request(url, headers=headers).json()
         return [
-            (urls["full"], {"image_id": text.parse_int(image_id)})
-            for image_id, urls in sorted(images.items())
+            (image["sizes"]["full"], {"image_id": image["id"]})
+            for image in self.manga["images"]
         ]
-
-
-class SimplyhentaiImageExtractor(Extractor):
-    """Extractor for individual images from simply-hentai.com"""
-    category = "simplyhentai"
-    subcategory = "image"
-    directory_fmt = ("{category}", "{type}s")
-    filename_fmt = "{category}_{token}{title:?_//}.{extension}"
-    archive_fmt = "{token}"
-    pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
-               r"/(image|gif)/[^/?&#]+)")
-    test = (
-        (("https://www.simply-hentai.com/image"
-          "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
-            "url": "0338eb137830ab6f81e5f410d3936ef785d063d9",
-            "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2",
-        }),
-        ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", {
-            "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1",
-            "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65",
-        }),
-    )
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.page_url = "https://www." + match.group(1)
-        self.type = match.group(2)
-
-    def items(self):
-        extr = text.extract_from(self.request(self.page_url).text)
-        title = extr('"og:title" content="'      , '"')
-        descr = extr('"og:description" content="', '"')
-        url = extr('&quot;image&quot;:&quot;'  , '&')
-        url = extr("&quot;content&quot;:&quot;", "&") or url
-
-        tags = text.extract(descr, " tagged with ", " online for free ")[0]
-        if tags:
-            tags = tags.split(", ")
-            tags[-1] = tags[-1].partition(" ")[2]
-        else:
-            tags = []
-
-        data = text.nameext_from_url(url, {
-            "title": text.unescape(title) if title else "",
-            "tags": tags,
-            "type": self.type,
-        })
-        data["token"] = data["filename"].rpartition("_")[2]
-
-        yield Message.Version, 1
-        yield Message.Directory, data
-        yield Message.Url, url, data
-
-
-class SimplyhentaiVideoExtractor(Extractor):
-    """Extractor for hentai videos from simply-hentai.com"""
-    category = "simplyhentai"
-    subcategory = "video"
-    directory_fmt = ("{category}", "{type}s")
-    filename_fmt = "{title}{episode:?_//>02}.{extension}"
-    archive_fmt = "{title}_{episode}"
-    pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"
-    test = (
-        ("https://videos.simply-hentai.com/creamy-pie-episode-02", {
-            "pattern": r"https://www\.googleapis\.com/drive/v3/files"
-                       r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
-            "keyword": "706790708b14773efc1e075ddd3b738a375348a5",
-            "count": 1,
-        }),
-        (("https://videos.simply-hentai.com"
-          "/1715-tifa-in-hentai-gang-bang-3d-movie"), {
-            "url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
-            "keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874",
-        }),
-    )
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.page_url = "https://" + match.group(1)
-
-    def items(self):
-        page = self.request(self.page_url).text
-
-        title, pos = text.extract(page, "<title>", "</title>")
-        tags , pos = text.extract(page, ">Tags</div>", "</div>", pos)
-        date , pos = text.extract(page, ">Upload Date</div>", "</div>", pos)
-        title = title.rpartition(" - ")[0]
-
-        if "<video" in page:
-            video_url = text.extract(page, '<source src="', '"', pos)[0]
-            episode = 0
-        else:
-            # video url from myhentai.tv embed
-            pos = page.index('<div class="video-frame-container">', pos)
-            embed_url = text.extract(page, 'src="', '"', pos)[0].replace(
-                "embedplayer.php?link=", "embed.php?name=")
-            embed_page = self.request(embed_url).text
-            video_url = text.extract(embed_page, '"file":"', '"')[0]
-            title, _, episode = title.rpartition(" Episode ")
-
-        data = text.nameext_from_url(video_url, {
-            "title": text.unescape(title),
-            "episode": text.parse_int(episode),
-            "tags": text.split_html(tags)[::2],
-            "type": "video",
-            "date": text.parse_datetime(text.remove_html(
-                date), "%B %d, %Y %H:%M"),
-        })
-
-        yield Message.Version, 1
-        yield Message.Directory, data
-        yield Message.Url, video_url, data
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ccba640..3672a6d 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -54,6 +54,7 @@ class TwitterExtractor(Extractor):
 
             if self.videos and "-videoContainer" in tweet:
                 data["num"] = 1
+                data["extension"] = None
                 url = "ytdl:{}/{}/status/{}".format(
                     self.root, data["user"], data["tweet_id"])
                 yield Message.Url, url, data
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index b9c223c..463733f 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -70,7 +70,7 @@ class WikiartArtistExtractor(WikiartExtractor):
     pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)"
     test = ("https://www.wikiart.org/en/thomas-cole", {
         "url": "f1eee8158f5b8b7380382ab730a8f53884715c8b",
-        "keyword": "b62678394ce645815963883d5c9642255307225f",
+        "keyword": "c61f5a4774b977106000e9554d19cfb9438a7032",
     })
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 9699806..23750db 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -13,13 +13,16 @@ from .. import text
 import json
 
 
-BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?xhamster\.(?:com|one|desi)"
+BASE_PATTERN = r"(?:https?://)?((?:[^.]+\.)?xhamster\d?\.(?:com|one|desi))"
 
 
 class XhamsterExtractor(Extractor):
     """Base class for xhamster extractors"""
     category = "xhamster"
-    root = "https://xhamster.com"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.root = "https://" + match.group(1)
 
 
 class XhamsterGalleryExtractor(XhamsterExtractor):
@@ -66,16 +69,21 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
                 },
             },
         }),
+        ("https://jp.xhamster2.com/photos/gallery/11748968", {
+            "pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
+            "count": ">= 144",
+        }),
         ("https://xhamster.com/photos/gallery/make-the-world-better-11748968"),
         ("https://xhamster.com/photos/gallery/11748968"),
         ("https://xhamster.one/photos/gallery/11748968"),
         ("https://xhamster.desi/photos/gallery/11748968"),
+        ("https://xhamster2.com/photos/gallery/11748968"),
         ("https://en.xhamster.com/photos/gallery/11748968"),
     )
 
     def __init__(self, match):
         XhamsterExtractor.__init__(self, match)
-        self.path = match.group(1)
+        self.path = match.group(2)
         self.data = None
 
     def items(self):
@@ -154,7 +162,7 @@ class XhamsterUserExtractor(XhamsterExtractor):
 
     def __init__(self, match):
         XhamsterExtractor.__init__(self, match)
-        self.user = match.group(1)
+        self.user = match.group(2)
 
     def items(self):
         yield Message.Version, 1
author	Unit 193 <unit193@ubuntu.com>	2019-08-26 19:34:45 -0400
committer	Unit 193 <unit193@ubuntu.com>	2019-08-26 19:34:45 -0400
commit	b75d158d014d6c43d7d785c46c9372a9cf84d144 (patch)
tree	7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d /gallery_dl/extractor
parent	64ad8e7bd15df71ab1116eede414558631bcad32 (diff)