New upstream version 1.21.1.upstream/1.21.1

author: Unit 193 <unit193@unit193.net> 2022-04-09 00:15:19 -0400
committer: Unit 193 <unit193@unit193.net> 2022-04-09 00:15:19 -0400
commit: 2fe1dfed848fc26b7419e3bfe91a62e686960429 (patch)
tree: 901cb64e2a1748df2bb8c7abc60ff6d72ae4bc27 /gallery_dl/extractor
parent: c2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (diff)
17 files changed, 561 insertions, 177 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 1bec48e..6d6c7ee 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -43,6 +43,7 @@ modules = [
     "gelbooru_v01",
     "gelbooru_v02",
     "gfycat",
+    "gofile",
     "hbrowse",
     "hentai2read",
     "hentaicosplays",
@@ -125,6 +126,7 @@ modules = [
     "speakerdeck",
     "subscribestar",
     "tapas",
+    "telegraph",
     "toyhouse",
     "tsumino",
     "tumblr",
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 06ec571..fa590b9 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,6 +11,8 @@
 from .common import Extractor, Message
 from .. import text, util, exception
 from ..cache import cache
+from email.utils import parsedate_tz
+from datetime import datetime
 
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
 
@@ -144,7 +146,8 @@ class AryionExtractor(Extractor):
 
         title, _, artist = text.unescape(extr(
             "<title>g4 :: ", "<")).rpartition(" by ")
-        data = {
+
+        return {
             "id"    : text.parse_int(post_id),
             "url"   : url,
             "user"  : self.user or artist,
@@ -152,7 +155,7 @@ class AryionExtractor(Extractor):
             "artist": artist,
             "path"  : text.split_html(extr(
                 "cookiecrumb'>", '</span'))[4:-1:2],
-            "date"  : extr("class='pretty-date' title='", "'"),
+            "date"  : datetime(*parsedate_tz(lmod)[:6]),
             "size"  : text.parse_int(clen),
             "views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
             "width" : text.parse_int(extr("Resolution</b>:", "x")),
@@ -167,12 +170,6 @@ class AryionExtractor(Extractor):
             "_mtime"   : lmod,
         }
 
-        d1, _, d2 = data["date"].partition(",")
-        data["date"] = text.parse_datetime(
-            d1[:-2] + d2, "%b %d %Y %I:%M %p", -5)
-
-        return data
-
 
 class AryionGalleryExtractor(AryionExtractor):
     """Extractor for a user's gallery on eka's portal"""
@@ -249,7 +246,7 @@ class AryionPostExtractor(AryionExtractor):
                 "title"    : "I'm on subscribestar now too!",
                 "description": r"re:Doesn't hurt to have a backup, right\?",
                 "tags"     : ["Non-Vore", "subscribestar"],
-                "date"     : "dt:2019-02-16 19:30:00",
+                "date"     : "dt:2019-02-16 19:30:34",
                 "path"     : [],
                 "views"    : int,
                 "favorites": int,
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index e3559f9..ff49d89 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -183,7 +183,7 @@ class Extractor():
         elif until:
             if isinstance(until, datetime.datetime):
                 # convert to UTC timestamp
-                until = (until - util.EPOCH) / util.SECOND
+                until = util.datetime_to_timestamp(until)
             else:
                 until = float(until)
             seconds = until - now
@@ -373,7 +373,6 @@ class Extractor():
                         self.log.warning(
                             "Cookie '%s' will expire in less than %s hour%s",
                             cookie.name, hours + 1, "s" if hours else "")
-                        continue
 
                 names.discard(cookie.name)
                 if not names:
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 6a8744a..b63cfc1 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -165,22 +165,24 @@ class FuraffinityExtractor(Extractor):
     def _pagination_search(self, query):
         url = self.root + "/search/"
         data = {
-            "page"           : 0,
-            "next_page"      : "Next",
+            "page"           : 1,
             "order-by"       : "relevancy",
             "order-direction": "desc",
             "range"          : "all",
-            "rating-general" : "on",
-            "rating-mature"  : "on",
-            "rating-adult"   : "on",
-            "type-art"       : "on",
-            "type-music"     : "on",
-            "type-flash"     : "on",
-            "type-story"     : "on",
-            "type-photo"     : "on",
-            "type-poetry"    : "on",
+            "range_from"     : "",
+            "range_to"       : "",
+            "rating-general" : "1",
+            "rating-mature"  : "1",
+            "rating-adult"   : "1",
+            "type-art"       : "1",
+            "type-music"     : "1",
+            "type-flash"     : "1",
+            "type-story"     : "1",
+            "type-photo"     : "1",
+            "type-poetry"    : "1",
             "mode"           : "extended",
         }
+
         data.update(query)
         if "page" in query:
             data["page"] = text.parse_int(query["page"])
@@ -194,7 +196,11 @@ class FuraffinityExtractor(Extractor):
 
             if not post_id:
                 return
-            data["page"] += 1
+
+            if "next_page" in data:
+                data["page"] += 1
+            else:
+                data["next_page"] = "Next"
 
 
 class FuraffinityGalleryExtractor(FuraffinityExtractor):
@@ -255,9 +261,10 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
             "range": "45-50",
             "count": 6,
         }),
-        ("https://www.furaffinity.net/search/cute&rating-general=0", {
-            "range": "1",
-            "count": 1,
+        # first page of search results (#2402)
+        ("https://www.furaffinity.net/search/?q=leaf&range=1day", {
+            "range": "1-3",
+            "count": 3,
         }),
     )
 
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
new file mode 100644
index 0000000..37d2986
--- /dev/null
+++ b/gallery_dl/extractor/gofile.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from .common import Extractor, Message
+from .. import exception
+from ..cache import memcache
+
+
+class GofileFolderExtractor(Extractor):
+    category = "gofile"
+    subcategory = "folder"
+    root = "https://gofile.io"
+    directory_fmt = ("{category}", "{name} ({code})")
+    archive_fmt = "{id}"
+    pattern = r"(?:https?://)?(?:www\.)?gofile\.io/d/([^/?#]+)"
+    test = (
+        ("https://gofile.io/d/5qHmQj", {
+            "pattern": r"https://file\d+\.gofile\.io/download"
+                       r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}"
+                       r"/test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26!\.png",
+            "keyword": {
+                "createTime": int,
+                "directLink": "re:https://store3.gofile.io/download/direct/.+",
+                "downloadCount": int,
+                "extension": "png",
+                "filename": "test-テスト-%22&!",
+                "folder": {
+                    "childs": [
+                        "346429cc-aee4-4996-be3f-e58616fe231f",
+                        "765b6b12-b354-4e14-9a45-f763fa455682",
+                        "2a44600a-4a59-4389-addc-4a0d542c457b"
+                    ],
+                    "code": "5qHmQj",
+                    "createTime": 1648536501,
+                    "id": "45cd45d1-dc78-4553-923f-04091c621699",
+                    "isRoot": True,
+                    "name": "root",
+                    "public": True,
+                    "totalDownloadCount": int,
+                    "totalSize": 364,
+                    "type": "folder"
+                },
+                "id": r"re:\w{8}-\w{4}-\w{4}-\w{4}-\w{12}",
+                "link": r"re:https://file17.gofile.io/download/.+\.png",
+                "md5": "re:[0-9a-f]{32}",
+                "mimetype": "image/png",
+                "name": "test-テスト-%22&!.png",
+                "num": int,
+                "parentFolder": "45cd45d1-dc78-4553-923f-04091c621699",
+                "serverChoosen": "file17",
+                "size": 182,
+                "thumbnail": r"re:https://store3.gofile.io/download/.+\.png",
+                "type": "file"
+            },
+        }),
+        ("https://gofile.io/d/346429cc-aee4-4996-be3f-e58616fe231f", {
+            "content": "0c8768055e4e20e7c7259608b67799171b691140",
+        }),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.content_id = match.group(1)
+
+    def items(self):
+        recursive = self.config("recursive")
+
+        token = self.config("api-token")
+        if token is None:
+            self.log.debug("creating temporary account")
+            token = self._create_account()
+        self.session.cookies.set("accountToken", token, domain=".gofile.io")
+
+        folder = self._get_content(self.content_id, token)
+        yield Message.Directory, folder
+
+        num = 0
+        contents = folder.pop("contents")
+        for content_id in folder["childs"]:
+            content = contents[content_id]
+            content["folder"] = folder
+
+            if content["type"] == "file":
+                num += 1
+                content["num"] = num
+                content["filename"], _, content["extension"] = \
+                    content["name"].rpartition(".")
+                yield Message.Url, content["link"], content
+
+            elif content["type"] == "folder":
+                if recursive:
+                    url = "https://gofile.io/d/" + content["id"]
+                    content["_extractor"] = GofileFolderExtractor
+                    yield Message.Queue, url, content
+
+            else:
+                self.log.debug("'%s' is of unknown type (%s)",
+                               content.get("name"), content["type"])
+
+    @memcache()
+    def _create_account(self):
+        return self._api_request("createAccount")["token"]
+
+    def _get_content(self, content_id, token):
+        return self._api_request("getContent", {
+            "contentId"   : content_id,
+            "token"       : token,
+            "websiteToken": "websiteToken",
+        })
+
+    def _api_request(self, endpoint, params=None):
+        response = self.request(
+            "https://api.gofile.io/" + endpoint, params=params).json()
+
+        if response["status"] != "ok":
+            if response["status"] == "error-notFound":
+                raise exception.NotFoundError("content")
+            raise exception.StopExtraction(
+                "%s failed (Status: %s)", endpoint, response["status"])
+
+        return response["data"]
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 34eaaab..ca7e692 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -28,8 +28,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
         ("https://hitomi.la/galleries/867789.html", {
             "pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+"
                        r"/[0-9a-f]{64}\.webp",
-            "keyword": "4b584d09d535694d7d757c47daf5c15d116420d2",
-            "options": (("metadata", True),),
+            "keyword": "86af5371f38117a07407f11af689bdd460b09710",
             "count": 16,
         }),
         # download test
@@ -77,23 +76,18 @@ class HitomiGalleryExtractor(GalleryExtractor):
 
     def metadata(self, page):
         self.info = info = json.loads(page.partition("=")[2])
+        iget = info.get
 
-        data = self._data_from_gallery_info(info)
-        if self.config("metadata", False):
-            data.update(self._data_from_gallery_page(info))
-        return data
-
-    def _data_from_gallery_info(self, info):
-        language = info.get("language")
+        language = iget("language")
         if language:
             language = language.capitalize()
 
-        date = info.get("date")
+        date = iget("date")
         if date:
             date += ":00"
 
         tags = []
-        for tinfo in info.get("tags") or ():
+        for tinfo in iget("tags") or ():
             tag = string.capwords(tinfo["tag"])
             if tinfo.get("female"):
                 tag += " ♀"
@@ -109,35 +103,10 @@ class HitomiGalleryExtractor(GalleryExtractor):
             "lang"      : util.language_to_code(language),
             "date"      : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
             "tags"      : tags,
-        }
-
-    def _data_from_gallery_page(self, info):
-        url = "{}/galleries/{}.html".format(self.root, info["id"])
-
-        # follow redirects
-        while True:
-            response = self.request(url, fatal=False)
-            if b"<title>Redirect</title>" not in response.content:
-                break
-            url = text.extract(
-                response.text, 'http-equiv="refresh" content="', '"',
-            )[0].partition("=")[2]
-
-        if response.status_code >= 400:
-            return {}
-
-        def prep(value):
-            return [
-                text.unescape(string.capwords(v))
-                for v in text.extract_iter(value or "", '.html">', '<')
-            ]
-
-        extr = text.extract_from(response.text)
-        return {
-            "artist"    : prep(extr('<h2>', '</h2>')),
-            "group"     : prep(extr('<td>Group</td><td>', '</td>')),
-            "parody"    : prep(extr('<td>Series</td><td>', '</td>')),
-            "characters": prep(extr('<td>Characters</td><td>', '</td>')),
+            "artist"    : [o["artist"] for o in iget("artists") or ()],
+            "group"     : [o["group"] for o in iget("groups") or ()],
+            "parody"    : [o["parody"] for o in iget("parodys") or ()],
+            "characters": [o["character"] for o in iget("characters") or ()]
         }
 
     def images(self, _):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 20a4c1a..e07b64e 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # Copyright 2018-2020 Leonardo Taccari
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -43,6 +43,7 @@ class InstagramExtractor(Extractor):
         self.login()
         data = self.metadata()
         videos = self.config("videos", True)
+        previews = self.config("previews", False)
         video_headers = {"User-Agent": "Mozilla/5.0"}
 
         for post in self.posts():
@@ -56,14 +57,18 @@ class InstagramExtractor(Extractor):
 
             yield Message.Directory, post
             for file in files:
-                url = file.get("video_url")
-                if not url:
-                    url = file["display_url"]
-                elif not videos:
-                    continue
-                else:
-                    file["_http_headers"] = video_headers
                 file.update(post)
+
+                url = file.get("video_url")
+                if url:
+                    if videos:
+                        file["_http_headers"] = video_headers
+                        text.nameext_from_url(url, file)
+                        yield Message.Url, url, file
+                    if not previews:
+                        continue
+
+                url = file["display_url"]
                 yield Message.Url, url, text.nameext_from_url(url, file)
 
     def metadata(self):
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 9537263..7287c38 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -42,6 +42,7 @@ class KemonopartyExtractor(Extractor):
             r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
         find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
         generators = self._build_file_generators(self.config("files"))
+        duplicates = self.config("duplicates")
         comments = self.config("comments")
         username = dms = None
 
@@ -84,7 +85,7 @@ class KemonopartyExtractor(Extractor):
                 match = find_hash(url)
                 if match:
                     post["hash"] = hash = match.group(1)
-                    if hash in hashes:
+                    if hash in hashes and not duplicates:
                         self.log.debug("Skipping %s (duplicate)", url)
                         continue
                     hashes.add(hash)
@@ -273,6 +274,11 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
         ("https://kemono.party/patreon/user/4158582/post/32099982", {
             "count": 2,
         }),
+        # allow duplicates (#2440)
+        ("https://kemono.party/patreon/user/4158582/post/32099982", {
+            "options": (("duplicates", True),),
+            "count": 3,
+        }),
         # DMs (#2008)
         ("https://kemono.party/patreon/user/34134344/post/38129255", {
             "options": (("dms", True),),
@@ -323,8 +329,9 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
         }),
         (("https://kemono.party/discord"
           "/server/256559665620451329/channel/462437519519383555#"), {
-            "pattern": r"https://kemono\.party/data/attachments/discord"
-                       r"/256559665620451329/\d+/\d+/.+",
+            "pattern": r"https://kemono\.party/data/("
+                       r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
+                       r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
             "count": ">= 2",
         }),
         # 'inline' files
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
index 85ec806..6e66772 100644
--- a/gallery_dl/extractor/kissgoddess.py
+++ b/gallery_dl/extractor/kissgoddess.py
@@ -20,7 +20,7 @@ class KissgoddessGalleryExtractor(GalleryExtractor):
     test = ("https://kissgoddess.com/album/18285.html", {
         "pattern": r"https://pic\.kissgoddess\.com"
                    r"/gallery/16473/18285/s/\d+\.jpg",
-        "count": 8,
+        "count": 19,
         "keyword": {
             "gallery_id": 18285,
             "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
@@ -45,6 +45,8 @@ class KissgoddessGalleryExtractor(GalleryExtractor):
         while page:
             for url in text.extract_iter(page, "<img src='", "'"):
                 yield url, None
+            for url in text.extract_iter(page, "<img data-original='", "'"):
+                yield url, None
 
             pnum += 1
             url = "{}/album/{}_{}.html".format(
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index 1b3dd18..0b0da65 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -64,7 +64,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
         self.slug = extr('vm.IndexName = "', '"')
 
         data = self._transform_chapter(data)
-        data["manga"] = extr('vm.SeriesName = "', '"')
+        data["manga"] = text.unescape(extr('vm.SeriesName = "', '"'))
         return data
 
     def images(self, page):
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 6d0e94b..e9fde97 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -113,10 +113,16 @@ class NewgroundsExtractor(Extractor):
             if self.flash:
                 url += "/format/flash"
 
-        response = self.request(url, fatal=False)
-        if response.status_code >= 400:
-            return {}
-        page = response.text
+        with self.request(url, fatal=False) as response:
+            if response.status_code >= 400:
+                return {}
+            page = response.text
+
+        pos = page.find('id="adults_only"')
+        if pos >= 0:
+            msg = text.extract(page, 'class="highlight">', '<', pos)[0]
+            self.log.warning('"%s"', msg)
+
         extr = text.extract_from(page)
         data = extract_data(extr, post_url)
 
@@ -230,16 +236,20 @@ class NewgroundsExtractor(Extractor):
             yield fmt[1][0]["src"]
 
     def _pagination(self, kind):
-        root = self.user_root
+        url = "{}/{}".format(self.user_root, kind)
+        params = {
+            "page": 1,
+            "isAjaxRequest": "1",
+        }
         headers = {
-            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Referer": url,
             "X-Requested-With": "XMLHttpRequest",
-            "Referer": root,
         }
-        url = "{}/{}/page/1".format(root, kind)
 
         while True:
-            with self.request(url, headers=headers, fatal=False) as response:
+            with self.request(
+                    url, params=params, headers=headers,
+                    fatal=False) as response:
                 try:
                     data = response.json()
                 except ValueError:
@@ -250,14 +260,17 @@ class NewgroundsExtractor(Extractor):
                     msg = ", ".join(text.unescape(e) for e in data["errors"])
                     raise exception.StopExtraction(msg)
 
-            for year in data["sequence"]:
-                for item in data["years"][str(year)]["items"]:
+            for year, items in data["items"].items():
+                for item in items:
                     page_url = text.extract(item, 'href="', '"')[0]
-                    yield text.urljoin(root, page_url)
+                    if page_url[0] == "/":
+                        page_url = self.root + page_url
+                    yield page_url
 
-            if not data["more"]:
+            more = data.get("load_more")
+            if not more or len(more) < 8:
                 return
-            url = text.urljoin(root, data["more"])
+            params["page"] += 1
 
 
 class NewgroundsImageExtractor(NewgroundsExtractor):
@@ -293,7 +306,12 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
         ("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", {
             "url": "84eec95e663041a80630df72719f231e157e5f5d",
             "count": 2,
-        })
+        }),
+        # "adult" rated (#2456)
+        ("https://www.newgrounds.com/art/view/kekiiro/red", {
+            "options": (("username", None),),
+            "count": 1,
+        }),
     )
 
     def __init__(self, match):
@@ -360,6 +378,11 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
             "pattern": r"https://uploads\.ungrounded\.net/alternate/1482000"
                        r"/1482860_alternate_102516\.720p\.mp4\?\d+",
         }),
+        # "adult" rated (#2456)
+        ("https://www.newgrounds.com/portal/view/717744", {
+            "options": (("username", None),),
+            "count": 1,
+        }),
     )
 
     def __init__(self, match):
@@ -454,25 +477,28 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
         )
 
     def _pagination(self, kind):
-        num = 1
+        url = "{}/favorites/{}".format(self.user_root, kind)
+        params = {
+            "page": 1,
+            "isAjaxRequest": "1",
+        }
         headers = {
-            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Referer": url,
             "X-Requested-With": "XMLHttpRequest",
-            "Referer": self.user_root,
         }
 
         while True:
-            url = "{}/favorites/{}/{}".format(self.user_root, kind, num)
-            response = self.request(url, headers=headers)
+            response = self.request(url, params=params, headers=headers)
             if response.history:
                 return
 
-            favs = self._extract_favorites(response.text)
+            data = response.json()
+            favs = self._extract_favorites(data.get("component") or "")
             yield from favs
 
             if len(favs) < 24:
                 return
-            num += 1
+            params["page"] += 1
 
     def _extract_favorites(self, page):
         return [
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 25344e8..2079b73 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2021 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -20,8 +20,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
 class PinterestExtractor(Extractor):
     """Base class for pinterest extractors"""
     category = "pinterest"
-    filename_fmt = "{category}_{id}.{extension}"
-    archive_fmt = "{id}"
+    filename_fmt = "{category}_{id}{media_id:?_//}.{extension}"
+    archive_fmt = "{id}{media_id}"
     root = "https://www.pinterest.com"
 
     def __init__(self, match):
@@ -35,28 +35,39 @@ class PinterestExtractor(Extractor):
 
         yield Message.Directory, data
         for pin in self.pins():
+            pin.update(data)
 
-            try:
-                media = self._media_from_pin(pin)
-            except Exception:
-                self.log.debug("Unable to fetch download URL for pin %s",
-                               pin.get("id"))
-                continue
+            carousel_data = pin.get("carousel_data")
+            if carousel_data:
+                for num, slot in enumerate(carousel_data["carousel_slots"], 1):
+                    slot["media_id"] = slot.pop("id")
+                    pin.update(slot)
+                    pin["num"] = num
+                    size, image = next(iter(slot["images"].items()))
+                    url = image["url"].replace("/" + size + "/", "/originals/")
+                    yield Message.Url, url, text.nameext_from_url(url, pin)
 
-            if not videos and media.get("duration") is not None:
-                continue
+            else:
+                try:
+                    media = self._media_from_pin(pin)
+                except Exception:
+                    self.log.debug("Unable to fetch download URL for pin %s",
+                                   pin.get("id"))
+                    continue
 
-            pin.update(data)
-            pin.update(media)
-            url = media["url"]
-            text.nameext_from_url(url, pin)
+                if videos or media.get("duration") is None:
+                    pin.update(media)
+                    pin["num"] = 0
+                    pin["media_id"] = ""
+
+                    url = media["url"]
+                    text.nameext_from_url(url, pin)
 
-            if pin["extension"] == "m3u8":
-                url = "ytdl:" + url
-                pin["extension"] = "mp4"
-                pin["_ytdl_extra"] = {"protocol": "m3u8_native"}
+                    if pin["extension"] == "m3u8":
+                        url = "ytdl:" + url
+                        pin["extension"] = "mp4"
 
-            yield Message.Url, url, pin
+                    yield Message.Url, url, pin
 
     def metadata(self):
         """Return general metadata"""
@@ -124,7 +135,8 @@ class PinterestBoardExtractor(PinterestExtractor):
     subcategory = "board"
     directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
     archive_fmt = "{board[id]}_{id}"
-    pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/(?!_saved)([^/?#&]+)/?$"
+    pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)"
+               "/(?!_saved|_created)([^/?#&]+)/?$")
     test = (
         ("https://www.pinterest.com/g1952849/test-/", {
             "pattern": r"https://i\.pinimg\.com/originals/",
@@ -192,6 +204,28 @@ class PinterestUserExtractor(PinterestExtractor):
                 yield Message.Queue, self.root + url, board
 
 
+class PinterestCreatedExtractor(PinterestExtractor):
+    """Extractor for a user's created pins"""
+    subcategory = "created"
+    directory_fmt = ("{category}", "{user}")
+    pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$"
+    test = ("https://www.pinterest.com/amazon/_created", {
+        "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
+                   r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
+        "count": 10,
+    })
+
+    def __init__(self, match):
+        PinterestExtractor.__init__(self, match)
+        self.user = text.unquote(match.group(1))
+
+    def metadata(self):
+        return {"user": self.user}
+
+    def pins(self):
+        return self.api.user_activity_pins(self.user)
+
+
 class PinterestSectionExtractor(PinterestExtractor):
     """Extractor for board sections on pinterest.com"""
     subcategory = "section"
@@ -385,6 +419,16 @@ class PinterestAPI():
         options = {"board_id": board_id, "add_vase": True}
         return self._pagination("BoardRelatedPixieFeed", options)
 
+    def user_activity_pins(self, user):
+        """Yield pins created by 'user'"""
+        options = {
+            "exclude_add_pin_rep": True,
+            "field_set_key"      : "grid_item",
+            "is_own_profile_pins": False,
+            "username"           : user,
+        }
+        return self._pagination("UserActivityPins", options)
+
     def search(self, query):
         """Yield pins from searches"""
         options = {"query": query, "scope": "pins", "rs": "typed"}
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 965391c..2af917d 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -8,6 +8,7 @@
 
 from .common import Extractor, Message
 from .. import text
+import itertools
 
 
 class SkebExtractor(Extractor):
@@ -22,7 +23,6 @@ class SkebExtractor(Extractor):
         Extractor.__init__(self, match)
         self.user_name = match.group(1)
         self.thumbnails = self.config("thumbnails", False)
-        self.sent_requests = self.config("sent-requests", False)
 
     def items(self):
         for user_name, post_num in self.posts():
@@ -35,18 +35,18 @@ class SkebExtractor(Extractor):
     def posts(self):
         """Return post number"""
 
-    def _pagination(self):
-        url = "{}/api/users/{}/works".format(self.root, self.user_name)
-        params = {"role": "creator", "sort": "date", "offset": 0}
+    def _pagination(self, url, params):
         headers = {"Referer": self.root, "Authorization": "Bearer null"}
-        do_requests = self.sent_requests
+        params["offset"] = 0
 
         while True:
             posts = self.request(url, params=params, headers=headers).json()
 
             for post in posts:
-                post_num = post["path"].rpartition("/")[2]
-                user_name = post["path"].split("/")[1][1:]
+                parts = post["path"].split("/")
+                user_name = parts[1][1:]
+                post_num = parts[3]
+
                 if post["private"]:
                     self.log.debug("Skipping @%s/%s (private)",
                                    user_name, post_num)
@@ -54,13 +54,7 @@ class SkebExtractor(Extractor):
                 yield user_name, post_num
 
             if len(posts) < 30:
-                if do_requests:
-                    params["offset"] = 0
-                    params['role'] = "client"
-                    do_requests = False
-                    continue
-                else:
-                    return
+                return
             params["offset"] += 30
 
     def _get_post_data(self, user_name, post_num):
@@ -134,6 +128,54 @@ class SkebPostExtractor(SkebExtractor):
     """Extractor for a single skeb post"""
     subcategory = "post"
     pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
+    test = ("https://skeb.jp/@kanade_cocotte/works/38", {
+        "count": 2,
+        "keyword": {
+            "anonymous": False,
+            "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ",
+            "client": {
+                "avatar_url": "https://pbs.twimg.com/profile_images"
+                              "/1471184042791895042/f0DcWFGl.jpg",
+                "header_url": None,
+                "id": 1196514,
+                "name": "湊ラギ",
+                "screen_name": "minato_ragi",
+            },
+            "completed_at": "2022-02-27T14:03:45.442Z",
+            "content_category": "preview",
+            "creator": {
+                "avatar_url": "https://pbs.twimg.com/profile_images"
+                              "/1225470417063645184/P8_SiB0V.jpg",
+                "header_url": "https://pbs.twimg.com/profile_banners"
+                              "/71243217/1647958329/1500x500",
+                "id": 159273,
+                "name": "イチノセ奏",
+                "screen_name": "kanade_cocotte",
+            },
+            "date": "dt:2022-02-27 14:03:45",
+            "file_id": int,
+            "file_url": str,
+            "genre": "art",
+            "nsfw": False,
+            "original": {
+                "byte_size": int,
+                "duration": None,
+                "extension": "re:psd|png",
+                "frame_rate": None,
+                "height": 3727,
+                "is_movie": False,
+                "width": 2810,
+            },
+            "post_num": "38",
+            "post_url": "https://skeb.jp/@kanade_cocotte/works/38",
+            "source_body": None,
+            "source_thanks": None,
+            "tags": list,
+            "thanks": None,
+            "translated_body": False,
+            "translated_thanks": None,
+        }
+    })
 
     def __init__(self, match):
         SkebExtractor.__init__(self, match)
@@ -146,7 +188,23 @@ class SkebPostExtractor(SkebExtractor):
 class SkebUserExtractor(SkebExtractor):
     """Extractor for all posts from a skeb user"""
     subcategory = "user"
-    pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)"
+    pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$"
+    test = ("https://skeb.jp/@kanade_cocotte", {
+        "pattern": r"https://skeb\.imgix\.net/uploads/origins/[\w-]+"
+                   r"\?bg=%23fff&auto=format&txtfont=bold&txtshad=70"
+                   r"&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150"
+                   r"&txt=SAMPLE&w=800&s=\w+",
+        "range": "1-5",
+    })
 
     def posts(self):
-        return self._pagination()
+        url = "{}/api/users/{}/works".format(self.root, self.user_name)
+
+        params = {"role": "creator", "sort": "date"}
+        posts = self._pagination(url, params)
+
+        if self.config("sent-requests", False):
+            params = {"role": "client", "sort": "date"}
+            posts = itertools.chain(posts, self._pagination(url, params))
+
+        return posts
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
new file mode 100644
index 0000000..8e9bf2c
--- /dev/null
+++ b/gallery_dl/extractor/telegraph.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractor for https://telegra.ph/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class TelegraphGalleryExtractor(GalleryExtractor):
+    """Extractor for articles from telegra.ph"""
+
+    category = "telegraph"
+    root = "https://telegra.ph"
+    directory_fmt = ("{category}", "{slug}")
+    filename_fmt = "{num_formatted}_{filename}.{extension}"
+    archive_fmt = "{slug}_{num}"
+    pattern = r"(?:https?://)(?:www\.)??telegra\.ph(/[^/?#]+)"
+    test = (
+        ("https://telegra.ph/Telegraph-Test-03-28", {
+            "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.png",
+            "keyword": {
+                "author": "mikf",
+                "caption": r"re:test|",
+                "count": 2,
+                "date": "dt:2022-03-28 16:01:36",
+                "description": "Just a test",
+                "post_url": "https://telegra.ph/Telegraph-Test-03-28",
+                "slug": "Telegraph-Test-03-28",
+                "title": "Telegra.ph Test",
+            },
+        }),
+        ("https://telegra.ph/森-03-28", {
+            "pattern": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+            "count": 1,
+            "keyword": {
+                "author": "&",
+                "caption": "kokiri",
+                "count": 1,
+                "date": "dt:2022-03-28 16:31:26",
+                "description": "コキリの森",
+                "extension": "jpg",
+                "filename": "3ea79d23b0dd0889f215a",
+                "num": 1,
+                "num_formatted": "1",
+                "post_url": "https://telegra.ph/森-03-28",
+                "slug": "森-03-28",
+                "title": '"森"',
+                "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+            },
+        }),
+    )
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        data = {
+            "title": text.unescape(extr(
+                'property="og:title" content="', '"')),
+            "description": text.unescape(extr(
+                'property="og:description" content="', '"')),
+            "date": text.parse_datetime(extr(
+                'property="article:published_time" content="', '"'),
+                "%Y-%m-%dT%H:%M:%S%z"),
+            "author": text.unescape(extr(
+                'property="article:author" content="', '"')),
+            "post_url": text.unescape(extr(
+                'rel="canonical" href="', '"')),
+        }
+        data["slug"] = data["post_url"][19:]
+        return data
+
+    def images(self, page):
+        figures = tuple(text.extract_iter(page, "<figure>", "</figure>"))
+        num_zeroes = len(str(len(figures)))
+        num = 0
+
+        result = []
+        for figure in figures:
+            src, pos = text.extract(figure, 'src="', '"')
+            if src.startswith("/embed/"):
+                continue
+            caption, pos = text.extract(figure, "<figcaption>", "<", pos)
+            url = self.root + src
+            num += 1
+
+            result.append((url, {
+                "url"          : url,
+                "caption"      : text.unescape(caption),
+                "num"          : num,
+                "num_formatted": str(num).zfill(num_zeroes),
+            }))
+        return result
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index ec8ab35..355ca21 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -36,8 +36,9 @@ class TwibooruExtractor(BooruExtractor):
         post["date"] = text.parse_datetime(
             post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
 
-        name, sep, rest = post["name"].rpartition(".")
-        post["filename"] = name if sep else rest
+        if "name" in post:
+            name, sep, rest = post["name"].rpartition(".")
+            post["filename"] = name if sep else rest
 
 
 class TwibooruPostExtractor(TwibooruExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 6d51834..4c46170 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,7 +15,7 @@ import json
 
 BASE_PATTERN = (
     r"(?:https?://)?(?:www\.|mobile\.)?"
-    r"(?:twitter\.com|nitter\.net)"
+    r"(?:(?:fx)?twitter\.com|nitter\.net)"
 )
 
 
@@ -217,23 +217,24 @@ class TwitterExtractor(Extractor):
         if "legacy" in tweet:
             tweet = tweet["legacy"]
 
+        tget = tweet.get
         entities = tweet["entities"]
         tdata = {
             "tweet_id"      : text.parse_int(tweet["id_str"]),
             "retweet_id"    : text.parse_int(
-                tweet.get("retweeted_status_id_str")),
+                tget("retweeted_status_id_str")),
             "quote_id"      : text.parse_int(
-                tweet.get("quoted_status_id_str")),
+                tget("quoted_status_id_str")),
             "reply_id"      : text.parse_int(
-                tweet.get("in_reply_to_status_id_str")),
+                tget("in_reply_to_status_id_str")),
             "date"          : text.parse_datetime(
                 tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
             "user"          : user,
             "lang"          : tweet["lang"],
-            "favorite_count": tweet["favorite_count"],
-            "quote_count"   : tweet["quote_count"],
-            "reply_count"   : tweet["reply_count"],
-            "retweet_count" : tweet["retweet_count"],
+            "favorite_count": tget("favorite_count"),
+            "quote_count"   : tget("quote_count"),
+            "reply_count"   : tget("reply_count"),
+            "retweet_count" : tget("retweet_count"),
         }
 
         hashtags = entities.get("hashtags")
@@ -248,7 +249,7 @@ class TwitterExtractor(Extractor):
                 "nick": u["name"],
             } for u in mentions]
 
-        content = tweet["full_text"]
+        content = tget("full_text") or tget("text") or ""
         urls = entities.get("urls")
         if urls:
             for url in urls:
@@ -269,33 +270,36 @@ class TwitterExtractor(Extractor):
         return tdata
 
     def _transform_user(self, user):
+        uid = user.get("rest_id") or user["id_str"]
+
         try:
-            return self._user_cache[user.get("rest_id") or user["id_str"]]
+            return self._user_cache[uid]
         except KeyError:
             pass
 
-        uid = user.get("rest_id") or user["id_str"]
         if "legacy" in user:
             user = user["legacy"]
+
+        uget = user.get
         entities = user["entities"]
 
         self._user_cache[uid] = udata = {
             "id"              : text.parse_int(uid),
             "name"            : user["screen_name"],
             "nick"            : user["name"],
-            "location"        : user["location"],
+            "location"        : uget("location"),
             "date"            : text.parse_datetime(
-                user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
-            "verified"        : user.get("verified", False),
-            "profile_banner"  : user.get("profile_banner_url", ""),
-            "profile_image"   : user.get(
+                uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
+            "verified"        : uget("verified", False),
+            "profile_banner"  : uget("profile_banner_url", ""),
+            "profile_image"   : uget(
                 "profile_image_url_https", "").replace("_normal.", "."),
-            "favourites_count": user["favourites_count"],
-            "followers_count" : user["followers_count"],
-            "friends_count"   : user["friends_count"],
-            "listed_count"    : user["listed_count"],
-            "media_count"     : user["media_count"],
-            "statuses_count"  : user["statuses_count"],
+            "favourites_count": uget("favourites_count"),
+            "followers_count" : uget("followers_count"),
+            "friends_count"   : uget("friends_count"),
+            "listed_count"    : uget("listed_count"),
+            "media_count"     : uget("media_count"),
+            "statuses_count"  : uget("statuses_count"),
         }
 
         descr = user["description"]
@@ -653,6 +657,11 @@ class TwitterTweetExtractor(TwitterExtractor):
         ("https://twitter.com/i/web/status/1486373748911575046", {
             "count": 4,
         }),
+        # age-restricted (#2354)
+        ("https://twitter.com/mightbecursed/status/1492954264909479936", {
+            "options": (("syndication", True),),
+            "count": 1,
+        }),
     )
 
     def __init__(self, match):
@@ -770,6 +779,7 @@ class TwitterAPI():
         }
 
         self._nsfw_warning = True
+        self._syndication = extractor.config("syndication")
         self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
         self._user = None
 
@@ -1153,9 +1163,10 @@ class TwitterAPI():
                 elif esw("conversationthread-"):
                     tweets.extend(entry["content"]["items"])
                 elif esw("tombstone-"):
-                    self._report_tombstone(
-                        entry,
-                        entry["content"]["itemContent"]["tombstoneInfo"])
+                    item = entry["content"]["itemContent"]
+                    item["tweet_results"] = \
+                        {"result": {"tombstone": item["tombstoneInfo"]}}
+                    tweets.append(entry)
                 elif esw("cursor-bottom-"):
                     cursor = entry["content"]
                     if not cursor.get("stopOnEmptyResponse", True):
@@ -1168,8 +1179,10 @@ class TwitterAPI():
                     tweet = ((entry.get("content") or entry["item"])
                              ["itemContent"]["tweet_results"]["result"])
                     if "tombstone" in tweet:
-                        self._report_tombstone(entry, tweet["tombstone"])
-                        continue
+                        tweet = self._process_tombstone(
+                            entry, tweet["tombstone"])
+                        if not tweet:
+                            continue
                     if "tweet" in tweet:
                         tweet = tweet["tweet"]
                     legacy = tweet["legacy"]
@@ -1259,10 +1272,45 @@ class TwitterAPI():
                 return
             variables["cursor"] = cursor
 
-    def _report_tombstone(self, entry, tombstone):
+    def _process_tombstone(self, entry, tombstone):
         text = (tombstone.get("richText") or tombstone["text"])["text"]
-        if text.startswith("Age-restricted") and self._nsfw_warning:
-            self.extractor.log.warning(text)
-            self._nsfw_warning = False
-        self.extractor.log.debug(
-            "Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text)
+        tweet_id = entry["entryId"].rpartition("-")[2]
+
+        if text.startswith("Age-restricted"):
+            if self._syndication:
+                return self._syndication_tweet(tweet_id)
+            elif self._nsfw_warning:
+                self._nsfw_warning = False
+                self.extractor.log.warning('"%s"', text)
+
+        self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
+
+    def _syndication_tweet(self, tweet_id):
+        tweet = self.extractor.request(
+            "https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json()
+
+        tweet["user"]["description"] = ""
+        tweet["user"]["entities"] = {"description": {}}
+
+        if "video" in tweet:
+            video = tweet["video"]
+            del video["variants"][:-1]
+            video["variants"][0]["url"] = video["variants"][0]["src"]
+            tweet["extended_entities"] = {"media": [{
+                "video_info"   : video,
+                "original_info": {"width" : 0, "height": 0},
+            }]}
+        elif "photos" in tweet:
+            for p in tweet["photos"]:
+                p["media_url_https"] = p["url"]
+                p["original_info"] = {
+                    "width" : p["width"],
+                    "height": p["height"],
+                }
+            tweet["extended_entities"] = {"media": tweet["photos"]}
+
+        return {
+            "rest_id": tweet["id_str"],
+            "legacy" : tweet,
+            "user"   : tweet["user"],
+        }
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 2405dc3..6036322 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -193,7 +193,7 @@ class UnsplashSearchExtractor(UnsplashExtractor):
     """Extractor for unsplash search results"""
     subcategory = "search"
     pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
-    test = ("https://unsplash.com/s/photos/nature", {
+    test = ("https://unsplash.com/s/photos/hair-style", {
         "pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
                    r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
         "range": "1-30",
@@ -206,7 +206,7 @@ class UnsplashSearchExtractor(UnsplashExtractor):
 
     def photos(self):
         url = self.root + "/napi/search/photos"
-        params = {"query": text.unquote(self.item)}
+        params = {"query": text.unquote(self.item.replace('-', ' '))}
         if self.query:
             params.update(text.parse_query(self.query))
         return self._pagination(url, params, True)
author	Unit 193 <unit193@unit193.net>	2022-04-09 00:15:19 -0400
committer	Unit 193 <unit193@unit193.net>	2022-04-09 00:15:19 -0400
commit	2fe1dfed848fc26b7419e3bfe91a62e686960429 (patch)
tree	901cb64e2a1748df2bb8c7abc60ff6d72ae4bc27 /gallery_dl/extractor
parent	c2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (diff)