27 files changed, 833 insertions, 222 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 7504fa4..b64fa2f 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -7,7 +7,6 @@
 # published by the Free Software Foundation.
 
 import sys
-import json
 import logging
 from . import version, config, option, output, extractor, job, util, exception
 
@@ -32,81 +31,6 @@ def progress(urls, pformat):
         yield pinfo["url"]
 
 
-def parse_inputfile(file, log):
-    """Filter and process strings from an input file.
-
-    Lines starting with '#' and empty lines will be ignored.
-    Lines starting with '-' will be interpreted as a key-value pair separated
-      by an '='. where 'key' is a dot-separated option name and 'value' is a
-      JSON-parsable value. These configuration options will be applied while
-      processing the next URL.
-    Lines starting with '-G' are the same as above, except these options will
-      be applied for *all* following URLs, i.e. they are Global.
-    Everything else will be used as a potential URL.
-
-    Example input file:
-
-    # settings global options
-    -G base-directory = "/tmp/"
-    -G skip = false
-
-    # setting local options for the next URL
-    -filename="spaces_are_optional.jpg"
-    -skip    = true
-
-    https://example.org/
-
-    # next URL uses default filename and 'skip' is false.
-    https://example.com/index.htm # comment1
-    https://example.com/404.htm   # comment2
-    """
-    gconf = []
-    lconf = []
-
-    for line in file:
-        line = line.strip()
-
-        if not line or line[0] == "#":
-            # empty line or comment
-            continue
-
-        elif line[0] == "-":
-            # config spec
-            if len(line) >= 2 and line[1] == "G":
-                conf = gconf
-                line = line[2:]
-            else:
-                conf = lconf
-                line = line[1:]
-
-            key, sep, value = line.partition("=")
-            if not sep:
-                log.warning("input file: invalid <key>=<value> pair: %s", line)
-                continue
-
-            try:
-                value = json.loads(value.strip())
-            except ValueError as exc:
-                log.warning("input file: unable to parse '%s': %s", value, exc)
-                continue
-
-            key = key.strip().split(".")
-            conf.append((key[:-1], key[-1], value))
-
-        else:
-            # url
-            if " #" in line:
-                line = line.partition(" #")[0].rstrip()
-            elif "\t#" in line:
-                line = line.partition("\t#")[0].rstrip()
-            if gconf or lconf:
-                yield util.ExtendedUrl(line, gconf, lconf)
-                gconf = []
-                lconf = []
-            else:
-                yield line
-
-
 def main():
     try:
         if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
@@ -275,12 +199,12 @@ def main():
                     try:
                         if inputfile == "-":
                             if sys.stdin:
-                                urls += parse_inputfile(sys.stdin, log)
+                                urls += util.parse_inputfile(sys.stdin, log)
                             else:
                                 log.warning("input file: stdin is not readable")
                         else:
                             with open(inputfile, encoding="utf-8") as file:
-                                urls += parse_inputfile(file, log)
+                                urls += util.parse_inputfile(file, log)
                     except OSError as exc:
                         log.warning("input file: %s", exc)
 
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
new file mode 100644
index 0000000..8fffeb0
--- /dev/null
+++ b/gallery_dl/extractor/2chen.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://2chen.moe/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class _2chenThreadExtractor(Extractor):
+    """Extractor for 2chen threads"""
+    category = "2chen"
+    subcategory = "thread"
+    directory_fmt = ("{category}", "{board}", "{thread} {title}")
+    filename_fmt = "{time} {filename}.{extension}"
+    archive_fmt = "{board}_{thread}_{hash}"
+    root = "https://2chen.moe"
+    pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)/(\d+)"
+    test = (
+        ("https://2chen.moe/jp/303786", {
+            "count": ">= 10",
+        }),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.board, self.thread = match.groups()
+
+    def items(self):
+        url = "{}/{}/{}".format(self.root, self.board, self.thread)
+        page = self.request(url, encoding="utf-8").text
+        data = self.metadata(page)
+        yield Message.Directory, data
+        for post in self.posts(page):
+            if not post["url"]:
+                continue
+            post.update(data)
+            post["url"] = self.root + post["url"]
+            post["time"] = text.parse_int(post["date"].timestamp())
+            yield Message.Url, post["url"], text.nameext_from_url(
+                post["filename"], post)
+
+    def metadata(self, page):
+        board, pos = text.extract(page, 'class="board">/', '/<')
+        title = text.extract(page, "<h3>", "</h3>", pos)[0]
+        return {
+            "board" : board,
+            "thread": self.thread,
+            "title" : text.unescape(title),
+        }
+
+    def posts(self, page):
+        """Return iterable with relevant posts"""
+        return map(self.parse, text.extract_iter(
+            page, 'class="glass media', '</article>'))
+
+    def parse(self, post):
+        extr = text.extract_from(post)
+        return {
+            "name"    : text.unescape(extr("<span>", "</span>")),
+            "date"    : text.parse_datetime(
+                extr("<time", "<").partition(">")[2],
+                "%d %b %Y (%a) %H:%M:%S"
+            ),
+            "no"      : extr('href="#p', '"'),
+            "url"     : extr('</span><a href="', '"'),
+            "filename": text.unescape(extr('download="', '"')),
+            "hash"    : extr('data-hash="', '"'),
+        }
+
+
+class _2chenBoardExtractor(Extractor):
+    """Extractor for 2chen boards"""
+    category = "2chen"
+    subcategory = "board"
+    root = "https://2chen.moe"
+    pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)(?:/catalog)?/?$"
+    test = (
+        ("https://2chen.moe/co/", {
+            "pattern": _2chenThreadExtractor.pattern
+        }),
+        ("https://2chen.moe/co"),
+        ("https://2chen.moe/co/catalog")
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.board = match.group(1)
+
+    def items(self):
+        url = "{}/{}/catalog".format(self.root, self.board)
+        page = self.request(url).text
+        data = {"_extractor": _2chenThreadExtractor}
+        for thread in text.extract_iter(
+                page, '<figure><a href="', '"'):
+            yield Message.Queue, self.root + thread, data
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
new file mode 100644
index 0000000..1e020c2
--- /dev/null
+++ b/gallery_dl/extractor/8chan.py
@@ -0,0 +1,172 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://8chan.moe/"""
+
+from .common import Extractor, Message
+from .. import text
+from ..cache import memcache
+from datetime import datetime, timedelta
+import itertools
+
+BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)"
+
+
+class _8chanExtractor(Extractor):
+    """Base class for 8chan extractors"""
+    category = "8chan"
+    root = "https://8chan.moe"
+
+    def __init__(self, match):
+        self.root = "https://8chan." + match.group(1)
+        Extractor.__init__(self, match)
+
+    @memcache()
+    def _prepare_cookies(self):
+        # fetch captcha cookies
+        # (necessary to download without getting interrupted)
+        now = datetime.utcnow()
+        url = self.root + "/captcha.js"
+        params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")}
+        self.request(url, params=params).content
+
+        # adjust cookies
+        # - remove 'expires' timestamp
+        # - move 'captchaexpiration' value forward by 1 month)
+        domain = self.root.rpartition("/")[2]
+        for cookie in self.session.cookies:
+            if cookie.domain.endswith(domain):
+                cookie.expires = None
+                if cookie.name == "captchaexpiration":
+                    cookie.value = (now + timedelta(30, 300)).strftime(
+                        "%a, %d %b %Y %H:%M:%S GMT")
+
+        return self.session.cookies
+
+
+class _8chanThreadExtractor(_8chanExtractor):
+    """Extractor for 8chan threads"""
+    subcategory = "thread"
+    directory_fmt = ("{category}", "{boardUri}",
+                     "{threadId} {subject[:50]}")
+    filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
+    archive_fmt = "{boardUri}_{postId}_{num}"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+    test = (
+        ("https://8chan.moe/vhs/res/4.html", {
+            "pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
+            "count": 14,
+            "keyword": {
+                "archived": False,
+                "autoSage": False,
+                "boardDescription": "Film and Cinema",
+                "boardMarkdown": None,
+                "boardName": "Movies",
+                "boardUri": "vhs",
+                "creation": r"re:\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z",
+                "cyclic": False,
+                "email": None,
+                "id": "re:^[0-9a-f]{6}$",
+                "locked": False,
+                "markdown": str,
+                "maxFileCount": 5,
+                "maxFileSize": "32.00 MB",
+                "maxMessageLength": 8001,
+                "message": str,
+                "mime": str,
+                "name": "Anonymous",
+                "num": int,
+                "originalName": str,
+                "path": r"re:/.media/[0-9a-f]{64}\.\w+$",
+                "pinned": False,
+                "postId": int,
+                "signedRole": None,
+                "size": int,
+                "threadId": 4,
+                "thumb": r"re:/.media/t_[0-9a-f]{64}$",
+                "uniquePosters": 9,
+                "usesCustomCss": True,
+                "usesCustomJs": False,
+                "wsPort": 8880,
+                "wssPort": 2087,
+            },
+        }),
+        ("https://8chan.se/vhs/res/4.html"),
+        ("https://8chan.cc/vhs/res/4.html"),
+    )
+
+    def __init__(self, match):
+        _8chanExtractor.__init__(self, match)
+        _, self.board, self.thread = match.groups()
+
+    def items(self):
+        # fetch thread data
+        url = "{}/{}/res/{}.".format(self.root, self.board, self.thread)
+        self.session.headers["Referer"] = url + "html"
+        thread = self.request(url + "json").json()
+        thread["postId"] = thread["threadId"]
+        thread["_http_headers"] = {"Referer": url + "html"}
+
+        try:
+            self.session.cookies = self._prepare_cookies()
+        except Exception as exc:
+            self.log.debug("Failed to fetch captcha cookies:  %s: %s",
+                           exc.__class__.__name__, exc, exc_info=True)
+
+        # download files
+        posts = thread.pop("posts", ())
+        yield Message.Directory, thread
+        for post in itertools.chain((thread,), posts):
+            files = post.pop("files", ())
+            if not files:
+                continue
+            thread.update(post)
+            for num, file in enumerate(files):
+                file.update(thread)
+                file["num"] = num
+                text.nameext_from_url(file["originalName"], file)
+                yield Message.Url, self.root + file["path"], file
+
+
+class _8chanBoardExtractor(_8chanExtractor):
+    """Extractor for 8chan boards"""
+    subcategory = "board"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
+    test = (
+        ("https://8chan.moe/vhs/"),
+        ("https://8chan.moe/vhs/2.html", {
+            "pattern": _8chanThreadExtractor.pattern,
+            "count": 23,
+        }),
+        ("https://8chan.se/vhs/"),
+        ("https://8chan.cc/vhs/"),
+    )
+
+    def __init__(self, match):
+        _8chanExtractor.__init__(self, match)
+        _, self.board, self.page = match.groups()
+        self.session.headers["Referer"] = self.root + "/"
+
+    def items(self):
+        page = text.parse_int(self.page, 1)
+        url = "{}/{}/{}.json".format(self.root, self.board, page)
+        board = self.request(url).json()
+        threads = board["threads"]
+
+        while True:
+            for thread in threads:
+                thread["_extractor"] = _8chanThreadExtractor
+                url = "{}/{}/res/{}.html".format(
+                    self.root, self.board, thread["threadId"])
+                yield Message.Queue, url, thread
+
+            page += 1
+            if page > board["pageCount"]:
+                return
+            url = "{}/{}/{}.json".format(self.root, self.board, page)
+            threads = self.request(url).json()["threads"]
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index fed6998..851f660 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -10,11 +10,13 @@ import re
 
 modules = [
     "2chan",
+    "2chen",
     "35photo",
     "3dbooru",
     "420chan",
     "4chan",
     "500px",
+    "8chan",
     "8kun",
     "8muses",
     "adultempire",
@@ -90,6 +92,7 @@ modules = [
     "mememuseum",
     "myhentaigallery",
     "myportfolio",
+    "nana",
     "naver",
     "naverwebtoon",
     "newgrounds",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 62626a1..14d1e6b 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -76,7 +76,12 @@ class ArtstationExtractor(Extractor):
     def get_project_assets(self, project_id):
         """Return all assets associated with 'project_id'"""
         url = "{}/projects/{}.json".format(self.root, project_id)
-        data = self.request(url).json()
+
+        try:
+            data = self.request(url).json()
+        except exception.HttpError as exc:
+            self.log.warning(exc)
+            return
 
         data["title"] = text.unescape(data["title"])
         data["description"] = text.unescape(text.remove_html(
@@ -406,6 +411,10 @@ class ArtstationImageExtractor(ArtstationExtractor):
             "options": (("external", True),),
             "pattern": "ytdl:https://www.youtube.com/embed/JNFfJtwwrU0",
         }),
+        # 404 (#3016)
+        ("https://www.artstation.com/artwork/3q3mXB", {
+            "count": 0,
+        }),
         # alternate URL patterns
         ("https://sungchoi.artstation.com/projects/LQVJr"),
         ("https://artstn.co/p/LQVJr"),
@@ -419,7 +428,10 @@ class ArtstationImageExtractor(ArtstationExtractor):
     def metadata(self):
         self.assets = list(ArtstationExtractor.get_project_assets(
             self, self.project_id))
-        self.user = self.assets[0]["user"]["username"]
+        try:
+            self.user = self.assets[0]["user"]["username"]
+        except IndexError:
+            self.user = ""
         return ArtstationExtractor.metadata(self)
 
     def projects(self):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index f7ee51f..e304717 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -122,8 +122,7 @@ class Extractor():
             seconds = (self._interval() -
                        (time.time() - Extractor.request_timestamp))
             if seconds > 0.0:
-                self.log.debug("Sleeping for %.5s seconds", seconds)
-                time.sleep(seconds)
+                self.sleep(seconds, "request")
 
         while True:
             try:
@@ -169,8 +168,9 @@ class Extractor():
             self.log.debug("%s (%s/%s)", msg, tries, retries+1)
             if tries > retries:
                 break
-            time.sleep(
-                max(tries, self._interval()) if self._interval else tries)
+            self.sleep(
+                max(tries, self._interval()) if self._interval else tries,
+                "retry")
             tries += 1
 
         raise exception.HttpError(msg, response)
@@ -202,6 +202,11 @@ class Extractor():
             self.log.info("Waiting until %s for %s.", isotime, reason)
         time.sleep(seconds)
 
+    def sleep(self, seconds, reason):
+        self.log.debug("Sleeping %.2f seconds (%s)",
+                       seconds, reason)
+        time.sleep(seconds)
+
     def _get_auth_info(self):
         """Return authentication information as (username, password) tuple"""
         username = self.config("username")
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 8c2ed53..c455ce1 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -88,10 +88,7 @@ class DanbooruExtractor(BaseExtractor):
 
             if post["extension"] == "zip":
                 if self.ugoira:
-                    post["frames"] = self.request(
-                        "{}/posts/{}.json?only=pixiv_ugoira_frame_data".format(
-                            self.root, post["id"])
-                    ).json()["pixiv_ugoira_frame_data"]["data"]
+                    post["frames"] = self._ugoira_frames(post)
                     post["_http_adjust_extension"] = False
                 else:
                     url = post["large_file_url"]
@@ -139,6 +136,18 @@ class DanbooruExtractor(BaseExtractor):
                 else:
                     return
 
+    def _ugoira_frames(self, post):
+        data = self.request("{}/posts/{}.json?only=media_metadata".format(
+            self.root, post["id"])
+        ).json()["media_metadata"]["metadata"]
+
+        ext = data["ZIP:ZipFileName"].rpartition(".")[2]
+        print(post["id"], ext)
+        fmt = ("{:>06}." + ext).format
+        delays = data["Ugoira:FrameDelays"]
+        return [{"file": fmt(index), "delay": delay}
+                for index, delay in enumerate(delays)]
+
 
 INSTANCES = {
     "danbooru": {
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 6897476..cb2aa24 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -72,7 +72,7 @@ class DeviantartExtractor(Extractor):
     def items(self):
         self.api = DeviantartOAuthAPI(self)
 
-        if self.user:
+        if self.user and self.config("group", True):
             profile = self.api.user_profile(self.user)
             self.group = not profile
             if self.group:
@@ -938,11 +938,11 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
     def deviations(self):
         url = "{}/{}/{}/{}".format(
             self.root, self.user, self.type, self.deviation_id)
-        appurl = text.extract(self._limited_request(url).text,
-                              'property="da:appurl" content="', '"')[0]
-        if not appurl:
+        uuid = text.extract(self._limited_request(url).text,
+                            '"deviationUuid\\":\\"', '\\')[0]
+        if not uuid:
             raise exception.NotFoundError("deviation")
-        return (self.api.deviation(appurl.rpartition("/")[2]),)
+        return (self.api.deviation(uuid),)
 
 
 class DeviantartScrapsExtractor(DeviantartExtractor):
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 6ddf2ec..8b90250 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2017-2021 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -19,7 +19,7 @@ class DirectlinkExtractor(Extractor):
     archive_fmt = filename_fmt
     pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
                r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
-               r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$")
+               r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
     test = (
         (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
             "url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
@@ -31,9 +31,9 @@ class DirectlinkExtractor(Extractor):
             "keyword": "29dad729c40fb09349f83edafa498dba1297464a",
         }),
         # more complex example
-        ("https://example.org/path/to/file.webm?que=1&ry=2#fragment", {
-            "url": "114b8f1415cc224b0f26488ccd4c2e7ce9136622",
-            "keyword": "06014abd503e3b2b58aa286f9bdcefdd2ae336c0",
+        ("https://example.org/path/to/file.webm?que=1?&ry=2/#fragment", {
+            "url": "6fb1061390f8aada3db01cb24b51797c7ee42b31",
+            "keyword": "3d7abc31d45ba324e59bc599c3b4862452d5f29c",
         }),
         # percent-encoded characters
         ("https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E", {
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 8481248..f692a90 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -68,6 +68,16 @@ class FanboxExtractor(Extractor):
                 post["html"] = content_body["html"]
             if post["type"] == "article":
                 post["articleBody"] = content_body.copy()
+            if "blocks" in content_body:
+                content = []
+                append = content.append
+                for block in content_body["blocks"]:
+                    if "text" in block:
+                        append(block["text"])
+                    if "links" in block:
+                        for link in block["links"]:
+                            append(link["url"])
+                post["content"] = "\n".join(content)
 
         post["date"] = text.parse_datetime(post["publishedDatetime"])
         post["text"] = content_body.get("text") if content_body else None
@@ -271,6 +281,19 @@ class FanboxPostExtractor(FanboxExtractor):
                 "hasAdultContent": True
             },
         }),
+        # 'content' metadata (#3020)
+        ("https://www.fanbox.cc/@official-en/posts/4326303", {
+            "keyword": {
+                "content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, "
+                           r"September 5th, 2022, we are happy to announce "
+                           r"the start of the FANBOX hashtag event "
+                           r"#MySetupTour ! \nAbout the event\nTo join this "
+                           r"event .+ \nPlease check this page for further "
+                           r"details regarding the Privacy & Terms.\n"
+                           r"https://fanbox.pixiv.help/.+/10184952456601\n\n\n"
+                           r"Thank you for your continued support of FANBOX.$",
+            },
+        }),
     )
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index bece905..69c07d0 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -27,9 +27,9 @@ class GenericExtractor(Extractor):
     pattern += r"""
         (?P<scheme>https?://)?          # optional http(s) scheme
         (?P<domain>[-\w\.]+)            # required domain
-        (?P<path>/[^?&#]*)?             # optional path
-        (?:\?(?P<query>[^/?#]*))?       # optional query
-        (?:\#(?P<fragment>.*))?$        # optional fragment
+        (?P<path>/[^?#]*)?              # optional path
+        (?:\?(?P<query>[^#]*))?         # optional query
+        (?:\#(?P<fragment>.*))?         # optional fragment
         """
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index f8b0c3b..cc110aa 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -115,12 +115,16 @@ class HitomiGalleryExtractor(GalleryExtractor):
 
         fmt = self.config("format") or "webp"
         if fmt == "original":
-            subdomain, fmt, ext = "b", "images", None
+            subdomain, fmt, ext, check = "b", "images", None, False
         else:
-            subdomain, ext = "a", fmt
+            subdomain, ext, check = "a", fmt, True
 
         result = []
         for image in self.info["files"]:
+            if check:
+                if not image.get("has" + fmt):
+                    fmt = ext = "webp"
+                check = False
             ihash = image["hash"]
             idata = text.nameext_from_url(image["name"])
             if ext:
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index b1c0e9e..2c899eb 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -44,7 +44,9 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
         ("https://www.imagefap.com/gallery/5486966", {
             "pattern": r"https://cdnh?\.imagefap\.com"
                        r"/images/full/\d+/\d+/\d+\.jpg",
-            "keyword": "3e24eace5b09639b881ebd393165862feb46adde",
+            "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
+            "archive": False,
+            "count": 62,
         }),
         ("https://www.imagefap.com/gallery.php?gid=7102714"),
         ("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -73,32 +75,42 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
 
         title, _, descr = descr.partition(" porn picture gallery by ")
         uploader, _, tags = descr.partition(" to see hottest ")
+        self._count = text.parse_int(count)
         return {
             "gallery_id": text.parse_int(self.gid),
             "title": text.unescape(title),
             "uploader": uploader,
             "tags": tags[:-11].split(", "),
-            "count": text.parse_int(count),
+            "count": self._count,
         }
 
     def get_images(self):
         """Collect image-urls and -metadata"""
-        num = 0
         url = "{}/photo/{}/".format(self.root, self.image_id)
         params = {"gid": self.gid, "idx": 0, "partial": "true"}
+        headers = {
+            "Content-Type": "application/x-www-form-urlencoded",
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": "{}?pgid=&gid={}&page=0".format(url, self.image_id)
+        }
+
+        num = 0
+        total = self._count
         while True:
-            pos = 0
-            page = self.request(url, params=params).text
-            for _ in range(24):
-                imgurl, pos = text.extract(page, '<a href="', '"', pos)
-                if not imgurl:
-                    return
+            page = self.request(url, params=params, headers=headers).text
+
+            cnt = 0
+            for image_url in text.extract_iter(page, '<a href="', '"'):
                 num += 1
-                data = text.nameext_from_url(imgurl)
+                cnt += 1
+                data = text.nameext_from_url(image_url)
                 data["num"] = num
                 data["image_id"] = text.parse_int(data["filename"])
-                yield imgurl, data
-            params["idx"] += 24
+                yield image_url, data
+
+            if cnt < 24 and num >= total:
+                return
+            params["idx"] += cnt
 
 
 class ImagefapImageExtractor(ImagefapExtractor):
@@ -170,40 +182,49 @@ class ImagefapUserExtractor(ImagefapExtractor):
         self.user, self.user_id = match.groups()
 
     def items(self):
-        for gid, name in self.get_gallery_data():
-            url = "{}/gallery/{}".format(self.root, gid)
-            data = {
-                "gallery_id": text.parse_int(gid),
-                "title": text.unescape(name),
-                "_extractor": ImagefapGalleryExtractor,
-            }
-            yield Message.Queue, url, data
-
-    def get_gallery_data(self):
-        """Yield all gallery_ids of a specific user"""
-        folders = self.get_gallery_folders()
-        url = "{}/ajax_usergallery_folder.php".format(self.root)
-        params = {"userid": self.user_id}
-        for folder_id in folders:
-            params["id"] = folder_id
-            page = self.request(url, params=params).text
-
-            pos = 0
-            while True:
-                gid, pos = text.extract(page, '<a  href="/gallery/', '"', pos)
-                if not gid:
-                    break
-                name, pos = text.extract(page, "<b>", "<", pos)
-                yield gid, name
-
-    def get_gallery_folders(self):
-        """Create a list of all folder_ids of a specific user"""
+        for folder_id in self.folders():
+            for gallery_id, name in self.galleries(folder_id):
+                url = "{}/gallery/{}".format(self.root, gallery_id)
+                data = {
+                    "gallery_id": text.parse_int(gallery_id),
+                    "title"     : text.unescape(name),
+                    "_extractor": ImagefapGalleryExtractor,
+                }
+                yield Message.Queue, url, data
+
+    def folders(self):
+        """Return a list of folder_ids of a specific user"""
         if self.user:
             url = "{}/profile/{}/galleries".format(self.root, self.user)
         else:
             url = "{}/usergallery.php?userid={}".format(
                 self.root, self.user_id)
-        page = self.request(url).text
-        self.user_id, pos = text.extract(page, '?userid=', '"')
-        folders, pos = text.extract(page, ' id="tgl_all" value="', '"', pos)
-        return folders.split("|")[:-1]
+
+        response = self.request(url)
+        self.user = response.url.split("/")[-2]
+        folders = text.extract(response.text, ' id="tgl_all" value="', '"')[0]
+        return folders.rstrip("|").split("|")
+
+    def galleries(self, folder_id):
+        """Yield gallery_ids of a folder"""
+        if folder_id == "-1":
+            url = "{}/profile/{}/galleries?folderid=-1".format(
+                self.root, self.user)
+        else:
+            url = "{}/organizer/{}/".format(self.root, folder_id)
+        params = {"page": 0}
+
+        while True:
+            extr = text.extract_from(self.request(url, params=params).text)
+            cnt = 0
+
+            while True:
+                gid = extr('<a  href="/gallery/', '"')
+                if not gid:
+                    break
+                yield gid, extr("<b>", "<")
+                cnt += 1
+
+            if cnt < 25:
+                break
+            params["page"] += 1
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 425d541..4775613 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -338,6 +338,14 @@ class InstagramExtractor(Extractor):
                                          "username" : user["username"],
                                          "full_name": user["full_name"]})
 
+    def _init_cursor(self):
+        return self.config("cursor") or None
+
+    def _update_cursor(self, cursor):
+        self.log.debug("Cursor: %s", cursor)
+        self._cursor = cursor
+        return cursor
+
 
 class InstagramUserExtractor(InstagramExtractor):
     """Extractor for an Instagram user profile"""
@@ -409,8 +417,8 @@ class InstagramTaggedExtractor(InstagramExtractor):
             self.user_id = self.item[3:]
             return {"tagged_owner_id": self.user_id}
 
+        self.user_id = self.api.user_id(self.item)
         user = self.api.user(self.item)
-        self.user_id = user["id"]
 
         return {
             "tagged_owner_id" : user["id"],
@@ -693,7 +701,15 @@ class InstagramRestAPI():
     def user_id(self, screen_name):
         if screen_name.startswith("id:"):
             return screen_name[3:]
-        return self.user(screen_name)["id"]
+        user = self.user(screen_name)
+        if user is None:
+            raise exception.AuthorizationError(
+                "Login required to access this profile")
+        if user["is_private"] and not user["followed_by_viewer"]:
+            name = user["username"]
+            s = "" if name.endswith("s") else "s"
+            raise exception.StopExtraction("%s'%s posts are private", name, s)
+        return user["id"]
 
     def user_clips(self, user_id):
         endpoint = "/v1/clips/user/"
@@ -741,6 +757,9 @@ class InstagramRestAPI():
     def _pagination(self, endpoint, params=None, media=False):
         if params is None:
             params = {}
+        extr = self.extractor
+        params["max_id"] = extr._init_cursor()
+
         while True:
             data = self._call(endpoint, params=params)
 
@@ -752,9 +771,12 @@ class InstagramRestAPI():
 
             if not data.get("more_available"):
                 return
-            params["max_id"] = data["next_max_id"]
+            params["max_id"] = extr._update_cursor(data["next_max_id"])
 
     def _pagination_post(self, endpoint, params):
+        extr = self.extractor
+        params["max_id"] = extr._init_cursor()
+
         while True:
             data = self._call(endpoint, method="POST", data=params)
 
@@ -764,9 +786,12 @@ class InstagramRestAPI():
             info = data["paging_info"]
             if not info.get("more_available"):
                 return
-            params["max_id"] = info["max_id"]
+            params["max_id"] = extr._update_cursor(info["max_id"])
 
     def _pagination_sections(self, endpoint, params):
+        extr = self.extractor
+        params["max_id"] = extr._init_cursor()
+
         while True:
             info = self._call(endpoint, method="POST", data=params)
 
@@ -774,19 +799,22 @@ class InstagramRestAPI():
 
             if not info.get("more_available"):
                 return
-            params["max_id"] = info["next_max_id"]
             params["page"] = info["next_page"]
+            params["max_id"] = extr._update_cursor(info["next_max_id"])
 
 
 class InstagramGraphqlAPI():
 
     def __init__(self, extractor):
         self.extractor = extractor
-        self.user = InstagramRestAPI(extractor).user
         self.user_collection = self.user_saved = self.reels_media = \
             self.highlights_media = self._login_required
         self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
 
+        api = InstagramRestAPI(extractor)
+        self.user = api.user
+        self.user_id = api.user_id
+
     @staticmethod
     def _login_required(_=None):
         raise exception.AuthorizationError("Login required")
@@ -824,11 +852,6 @@ class InstagramGraphqlAPI():
         return self._pagination(query_hash, variables,
                                 "hashtag", "edge_hashtag_to_media")
 
-    def user_id(self, screen_name):
-        if screen_name.startswith("id:"):
-            return screen_name[3:]
-        return self.user(screen_name)["id"]
-
     def user_clips(self, user_id):
         query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
         variables = {"id": user_id, "first": 50}
@@ -871,9 +894,8 @@ class InstagramGraphqlAPI():
 
     def _pagination(self, query_hash, variables,
                     key_data="user", key_edge=None):
-        cursor = self.extractor.config("cursor")
-        if cursor:
-            variables["after"] = cursor
+        extr = self.extractor
+        variables["after"] = extr._init_cursor()
 
         while True:
             data = self._call(query_hash, variables)[key_data]
@@ -890,35 +912,55 @@ class InstagramGraphqlAPI():
                 raise exception.StopExtraction(
                     "%s'%s posts are private", self.item, s)
 
-            variables["after"] = self._cursor = info["end_cursor"]
-            self.extractor.log.debug("Cursor: %s", self._cursor)
+            variables["after"] = extr._update_cursor(info["end_cursor"])
 
 
-@cache(maxage=360*24*3600, keyarg=1)
+@cache(maxage=90*24*3600, keyarg=1)
 def _login_impl(extr, username, password):
     extr.log.info("Logging in as %s", username)
 
+    user_agent = ("Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 "
+                  "(KHTML, like Gecko) Chrome/106.0.5249.79 Mobile "
+                  "Safari/537.36 Instagram 255.1.0.17.102")
+
+    headers = {
+        "User-Agent"    : user_agent,
+        "Sec-Fetch-Dest": "document",
+        "Sec-Fetch-Mode": "navigate",
+        "Sec-Fetch-Site": "none",
+        "Sec-Fetch-User": "?1",
+    }
     url = extr.root + "/accounts/login/"
-    page = extr.request(url).text
+    response = extr.request(url, headers=headers)
+
+    extract = text.extract_from(response.text)
+    csrf_token = extract('"csrf_token":"', '"')
+    device_id = extract('"device_id":"', '"')
+    rollout_hash = extract('"rollout_hash":"', '"')
+
+    cset = extr.session.cookies.set
+    cset("csrftoken", csrf_token, domain=extr.cookiedomain)
+    cset("ig_did", device_id, domain=extr.cookiedomain)
 
     headers = {
-        "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0],
+        "User-Agent"      : user_agent,
+        "Accept"          : "*/*",
+        "X-CSRFToken"     : csrf_token,
+        "X-Instagram-AJAX": rollout_hash,
         "X-IG-App-ID"     : "936619743392459",
-        "X-ASBD-ID"       : "437806",
+        "X-ASBD-ID"       : "198387",
         "X-IG-WWW-Claim"  : "0",
         "X-Requested-With": "XMLHttpRequest",
+        "Origin"          : extr.root,
         "Referer"         : url,
+        "Sec-Fetch-Dest"  : "empty",
+        "Sec-Fetch-Mode"  : "cors",
+        "Sec-Fetch-Site"  : "same-origin",
     }
-    url = extr.root + "/data/shared_data/"
-    data = extr.request(url, headers=headers).json()
-
-    headers["X-CSRFToken"] = data["config"]["csrf_token"]
-    headers["X-Instagram-AJAX"] = data["rollout_hash"]
-    headers["Origin"] = extr.root
     data = {
-        "username"     : username,
-        "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
+        "enc_password"        : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
             int(time.time()), password),
+        "username"            : username,
         "queryParams"         : "{}",
         "optIntoOneTap"       : "false",
         "stopDeletionNonce"   : "",
@@ -930,11 +972,8 @@ def _login_impl(extr, username, password):
     if not response.json().get("authenticated"):
         raise exception.AuthenticationError()
 
-    cget = extr.session.cookies.get
-    return {
-        name: cget(name)
-        for name in ("sessionid", "mid", "ig_did")
-    }
+    return {cookie.name: cookie.value
+            for cookie in extr.session.cookies}
 
 
 def id_from_shortcode(shortcode):
diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py
new file mode 100644
index 0000000..6062418
--- /dev/null
+++ b/gallery_dl/extractor/nana.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://nana.my.id/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, exception
+import json
+
+
+class NanaGalleryExtractor(GalleryExtractor):
+    """Extractor for image galleries from nana.my.id"""
+    category = "nana"
+    directory_fmt = ("{category}", "{title}")
+    pattern = r"(?:https?://)?nana\.my\.id/reader/([^/?#]+)"
+    test = (
+        (("https://nana.my.id/reader/"
+          "059f7de55a4297413bfbd432ce7d6e724dd42bae"), {
+            "pattern": r"https://nana\.my\.id/reader/"
+                       r"\w+/image/page\?path=.*\.\w+",
+            "title"  : "Everybody Loves Shion",
+            "artist" : "fuzui",
+            "tags"   : list,
+            "count"  : 29,
+        }),
+        (("https://nana.my.id/reader/"
+          "77c8712b67013e427923573379f5bafcc0c72e46"), {
+            "pattern": r"https://nana\.my\.id/reader/"
+                       r"\w+/image/page\?path=.*\.\w+",
+            "title"  : "Lovey-Dovey With an Otaku-Friendly Gyaru",
+            "artist" : "Sueyuu",
+            "tags"   : ["Sueyuu"],
+            "count"  : 58,
+        }),
+    )
+
+    def __init__(self, match):
+        self.gallery_id = match.group(1)
+        url = "https://nana.my.id/reader/" + self.gallery_id
+        GalleryExtractor.__init__(self, match, url)
+
+    def metadata(self, page):
+        title = text.unescape(
+            text.extract(page, '</a>&nbsp; ', '</div>')[0])
+        artist = text.unescape(text.extract(
+            page, '<title>', '</title>')[0])[len(title):-10]
+        tags = text.extract(page, 'Reader.tags = "', '"')[0]
+
+        return {
+            "gallery_id": self.gallery_id,
+            "title"     : title,
+            "artist"    : artist[4:] if artist.startswith(" by ") else "",
+            "tags"      : tags.split(", ") if tags else (),
+            "lang"      : "en",
+            "language"  : "English",
+        }
+
+    def images(self, page):
+        data = json.loads(text.extract(page, "Reader.pages = ", ".pages")[0])
+        return [
+            ("https://nana.my.id" + image, None)
+            for image in data["pages"]
+        ]
+
+
+class NanaSearchExtractor(Extractor):
+    """Extractor for nana search results"""
+    category = "nana"
+    subcategory = "search"
+    pattern = r"(?:https?://)?nana\.my\.id(?:/?\?([^#]+))"
+    test = (
+        ('https://nana.my.id/?q=+"elf"&sort=desc', {
+            "pattern": NanaGalleryExtractor.pattern,
+            "range": "1-100",
+            "count": 100,
+        }),
+        ("https://nana.my.id/?q=favorites%3A", {
+            "pattern": NanaGalleryExtractor.pattern,
+            "count": ">= 2",
+        }),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.params = text.parse_query(match.group(1))
+        self.params["p"] = text.parse_int(self.params.get("p"), 1)
+        self.params["q"] = self.params.get("q") or ""
+
+    def items(self):
+        if "favorites:" in self.params["q"]:
+            favkey = self.config("favkey")
+            if not favkey:
+                raise exception.AuthenticationError(
+                    "'Favorite key' not provided. "
+                    "Please see 'https://nana.my.id/tutorial'")
+            self.session.cookies.set("favkey", favkey, domain="nana.my.id")
+
+        data = {"_extractor": NanaGalleryExtractor}
+        while True:
+            try:
+                page = self.request(
+                    "https://nana.my.id", params=self.params).text
+            except exception.HttpError:
+                return
+
+            for gallery in text.extract_iter(
+                    page, '<div class="id3">', '</div>'):
+                url = "https://nana.my.id" + text.extract(
+                    gallery, '<a href="', '"')[0]
+                yield Message.Queue, url, data
+
+            self.params["p"] += 1
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 2c8e72c..73911b2 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -326,6 +326,55 @@ class NijieNuitaExtractor(NijieExtractor):
             page, "<title>", "さんの抜いた")[0] or "")
 
 
+class NijieFeedExtractor(NijieExtractor):
+    """Extractor for nijie liked user feed"""
+    subcategory = "feed"
+    pattern = BASE_PATTERN + r"/like_user_view\.php"
+    test = (
+        ("https://nijie.info/like_user_view.php", {
+            "range": "1-10",
+            "count": 10,
+        }),
+        ("https://horne.red/like_user_view.php"),
+    )
+
+    def image_ids(self):
+        return self._pagination("like_user_view")
+
+    @staticmethod
+    def _extract_user_name(page):
+        return ""
+
+
+class NijiefollowedExtractor(NijieExtractor):
+    """Extractor for followed nijie users"""
+    subcategory = "followed"
+    pattern = BASE_PATTERN + r"/like_my\.php"
+    test = (
+        ("https://nijie.info/like_my.php"),
+        ("https://horne.red/like_my.php"),
+    )
+
+    def items(self):
+        self.login()
+
+        url = self.root + "/like_my.php"
+        params = {"p": 1}
+        data = {"_extractor": NijieUserExtractor}
+
+        while True:
+            page = self.request(url, params=params).text
+
+            for user_id in text.extract_iter(
+                    page, '"><a href="/members.php?id=', '"'):
+                user_url = "{}/members.php?id={}".format(self.root, user_id)
+                yield Message.Queue, user_url, data
+
+            if '<a rel="next"' not in page:
+                return
+            params["p"] += 1
+
+
 class NijieImageExtractor(NijieExtractor):
     """Extractor for a nijie work/image"""
     subcategory = "image"
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 713330d..f381f12 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -62,10 +62,11 @@ class NozomiExtractor(Extractor):
 
             yield Message.Directory, post
             for post["num"], image in enumerate(images, 1):
-                post["url"] = url = text.urljoin(self.root, image["imageurl"])
-                text.nameext_from_url(url, post)
-                post["is_video"] = bool(image.get("is_video"))
-                post["dataid"] = post["filename"]
+                post["filename"] = post["dataid"] = did = image["dataid"]
+                post["extension"] = ext = image["type"]
+                post["is_video"] = video = bool(image.get("is_video"))
+                post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
+                    "v" if video else "i", did[-1], did[-3:-1], did, ext)
                 yield Message.Url, url, post
 
     def posts(self):
@@ -109,7 +110,6 @@ class NozomiPostExtractor(NozomiExtractor):
                 "height"   : 768,
                 "is_video" : False,
                 "postid"   : 3649262,
-                "source"   : "danbooru",
                 "tags"     : list,
                 "type"     : "jpg",
                 "url"      : str,
@@ -119,7 +119,7 @@ class NozomiPostExtractor(NozomiExtractor):
         #  multiple images per post
         ("https://nozomi.la/post/25588032.html", {
             "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
-            "keyword": "f60e048df36308b6b25dfaac419b586895d360bc",
+            "keyword": "2a2998af93c6438863c4077bd386b613b8bc2957",
             "count": 7,
         }),
         # empty 'date' (#1163)
@@ -160,7 +160,7 @@ class NozomiTagExtractor(NozomiExtractor):
     archive_fmt = "t_{search_tags}_{dataid}"
     pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
     test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
-        "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$",
+        "pattern": r"^https://[iv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
         "count": ">= 25",
         "range": "1-25",
     })
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 3a4fb0e..1111c3a 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -10,6 +10,7 @@
 
 from .common import Extractor, Message
 from .. import text
+from ..cache import cache
 
 
 class RedgifsExtractor(Extractor):
@@ -88,7 +89,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
     pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)"
     test = (
         ("https://www.redgifs.com/browse?tags=JAV", {
-            "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
+            "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.mp4",
             "range": "1-10",
             "count": 10,
         }),
@@ -131,6 +132,12 @@ class RedgifsAPI():
 
     def __init__(self, extractor):
         self.extractor = extractor
+        self.headers = {
+            "Referer"      : extractor.root + "/",
+            "authorization": "Bearer " + self._fetch_bearer_token(extractor),
+            "content-type" : "application/json",
+            "Origin"       : extractor.root,
+        }
 
     def gif(self, gif_id):
         endpoint = "/v2/gifs/" + gif_id.lower()
@@ -149,7 +156,8 @@ class RedgifsAPI():
 
     def _call(self, endpoint, params=None):
         url = self.API_ROOT + endpoint
-        return self.extractor.request(url, params=params).json()
+        return self.extractor.request(
+            url, params=params, headers=self.headers).json()
 
     def _pagination(self, endpoint, params):
         params["page"] = 1
@@ -161,3 +169,17 @@ class RedgifsAPI():
             if params["page"] >= data["pages"]:
                 return
             params["page"] += 1
+
+    @cache(maxage=3600)
+    def _fetch_bearer_token(self, extr):
+        extr.log.debug("Retrieving Bearer token")
+
+        page = extr.request(extr.root + "/").text
+        index = text.extract(page, "/assets/js/index", ".js")[0]
+
+        url = extr.root + "/assets/js/index" + index + ".js"
+        page = extr.request(url, encoding="utf-8").text
+        token = "ey" + text.extract(page, '="ey', '"')[0]
+
+        extr.log.debug("Token: '%s'", token)
+        return token
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 447ce00..324a3c6 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -17,7 +17,7 @@ import re
 BASE_PATTERN = (
     r"(?:tumblr:(?:https?://)?([^/]+)|"
     r"(?:https?://)?"
-    r"(?:www\.tumblr\.com/blog/(?:view/)?([\w-]+)|"
+    r"(?:www\.tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
     r"([\w-]+\.tumblr\.com)))"
 )
 
@@ -250,9 +250,9 @@ class TumblrExtractor(Extractor):
             return updated, (resized == updated)
 
     def _original_image_fallback(self, url, post_id):
-        yield self._update_image_token(url)[0]
-        yield self._update_image_token(url)[0]
-        yield self._update_image_token(url)[0]
+        for _ in range(3):
+            self.sleep(120, "image token")
+            yield self._update_image_token(url)[0]
         self.log.warning("Unable to fetch higher-resolution "
                          "version of %s (%s)", url, post_id)
 
@@ -298,6 +298,7 @@ class TumblrUserExtractor(TumblrExtractor):
         ("tumblr:www.b-authentique.com"),
         ("https://www.tumblr.com/blog/view/smarties-art"),
         ("https://www.tumblr.com/blog/smarties-art"),
+        ("https://www.tumblr.com/smarties-art"),
     )
 
     def posts(self):
@@ -354,6 +355,8 @@ class TumblrPostExtractor(TumblrExtractor):
         }),
         ("http://demo.tumblr.com/image/459265350"),
         ("https://www.tumblr.com/blog/view/smarties-art/686047436641353728"),
+        ("https://www.tumblr.com/blog/smarties-art/686047436641353728"),
+        ("https://www.tumblr.com/smarties-art/686047436641353728"),
     )
 
     def __init__(self, match):
@@ -381,6 +384,8 @@ class TumblrTagExtractor(TumblrExtractor):
             "count": 1,
         }),
         ("https://www.tumblr.com/blog/view/smarties-art/tagged/undertale"),
+        ("https://www.tumblr.com/blog/smarties-art/tagged/undertale"),
+        ("https://www.tumblr.com/smarties-art/tagged/undertale"),
     )
 
     def __init__(self, match):
@@ -402,6 +407,8 @@ class TumblrLikesExtractor(TumblrExtractor):
             "count": 1,
         }),
         ("https://www.tumblr.com/blog/view/mikf123/likes"),
+        ("https://www.tumblr.com/blog/mikf123/likes"),
+        ("https://www.tumblr.com/mikf123/likes"),
     )
 
     def posts(self):
@@ -435,11 +442,15 @@ class TumblrAPI(oauth.OAuth1API):
 
     def posts(self, blog, params):
         """Retrieve published posts"""
-        params.update({"offset": 0, "limit": 50, "reblog_info": "true"})
+        params["offset"] = self.extractor.config("offset") or 0
+        params["limit"] = 50
+        params["reblog_info"] = "true"
+
         if self.posts_type:
             params["type"] = self.posts_type
         if self.before:
             params["before"] = self.before
+
         while True:
             data = self._call(blog, "posts", params)
             self.BLOG_CACHE[blog] = data["blog"]
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 623ed94..8bea18c 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -210,7 +210,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
 class UnsplashSearchExtractor(UnsplashExtractor):
     """Extractor for unsplash search results"""
     subcategory = "search"
-    pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
+    pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
     test = ("https://unsplash.com/s/photos/hair-style", {
         "pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
                    r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 25b00fe..9b6831b 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -79,7 +79,8 @@ class VkExtractor(Extractor):
 
             if len(payload) < 4:
                 self.log.debug(payload)
-                raise exception.AuthorizationError(payload[0])
+                raise exception.AuthorizationError(
+                    text.unescape(payload[0]) if payload[0] else None)
 
             total = payload[1]
             photos = payload[3]
@@ -103,7 +104,7 @@ class VkPhotosExtractor(VkExtractor):
     subcategory = "photos"
     pattern = (BASE_PATTERN + r"/(?:"
                r"(?:albums|photos|id)(-?\d+)"
-               r"|(?!album-?\d+_)([^/?#]+))")
+               r"|(?!(?:album|tag)-?\d+_?)([^/?#]+))")
     test = (
         ("https://vk.com/id398982326", {
             "pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1"
@@ -182,9 +183,6 @@ class VkAlbumExtractor(VkExtractor):
     directory_fmt = ("{category}", "{user[id]}", "{album[id]}")
     pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
     test = (
-        ("https://vk.com/album232175027_00", {
-            "count": 8,
-        }),
         ("https://vk.com/album-165740836_281339889", {
             "count": 12,
         }),
@@ -192,6 +190,9 @@ class VkAlbumExtractor(VkExtractor):
         ("https://vk.com/album-53775183_00", {
             "exception": exception.AuthorizationError,
         }),
+        ("https://vk.com/album232175027_00", {
+            "exception": exception.AuthorizationError,
+        }),
     )
 
     def __init__(self, match):
@@ -207,3 +208,25 @@ class VkAlbumExtractor(VkExtractor):
             "user": {"id": self.user_id},
             "album": {"id": self.album_id},
         }
+
+
+class VkTaggedExtractor(VkExtractor):
+    """Extractor for a vk tagged photos"""
+    subcategory = "tagged"
+    directory_fmt = ("{category}", "{user[id]}", "tags")
+    pattern = BASE_PATTERN + r"/tag(-?\d+)$"
+    test = (
+        ("https://vk.com/tag304303884", {
+            "count": 44,
+        }),
+    )
+
+    def __init__(self, match):
+        VkExtractor.__init__(self, match)
+        self.user_id = match.group(1)
+
+    def photos(self):
+        return self._pagination("tag{}".format(self.user_id))
+
+    def metadata(self):
+        return {"user": {"id": self.user_id}}
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 0ad8523..47451bd 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -52,7 +52,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
     subcategory = "search"
     directory_fmt = ("{category}", "{search[q]}")
     archive_fmt = "s_{search[q]}_{id}"
-    pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^/?#]+))?"
+    pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
     test = (
         ("https://wallhaven.cc/search?q=touhou"),
         (("https://wallhaven.cc/search?q=id%3A87"
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 7b22b1d..2f48ffd 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -8,7 +8,6 @@
 
 import sys
 import json
-import time
 import errno
 import logging
 import functools
@@ -74,9 +73,10 @@ class Job():
         log = extractor.log
         msg = None
 
-        sleep = util.build_duration_func(extractor.config("sleep-extractor"))
+        sleep = util.build_duration_func(
+            extractor.config("sleep-extractor"))
         if sleep:
-            time.sleep(sleep())
+            extractor.sleep(sleep(), "extractor")
 
         try:
             for msg in extractor:
@@ -238,7 +238,7 @@ class DownloadJob(Job):
             return
 
         if self.sleep:
-            time.sleep(self.sleep())
+            self.extractor.sleep(self.sleep(), "download")
 
         # download from URL
         if not self.download(url):
@@ -527,11 +527,11 @@ class SimulationJob(DownloadJob):
         if not kwdict["extension"]:
             kwdict["extension"] = "jpg"
         self.pathfmt.set_filename(kwdict)
-        self.out.skip(self.pathfmt.path)
         if self.sleep:
-            time.sleep(self.sleep())
+            self.extractor.sleep(self.sleep(), "download")
         if self.archive:
             self.archive.add(kwdict)
+        self.out.skip(self.pathfmt.path)
 
     def handle_directory(self, kwdict):
         if not self.pathfmt:
@@ -697,17 +697,18 @@ class DataJob(Job):
         self.ascii = config.get(("output",), "ascii", ensure_ascii)
 
         private = config.get(("output",), "private")
-        self.filter = util.identity if private else util.filter_dict
+        self.filter = dict.copy if private else util.filter_dict
 
     def run(self):
+        extractor = self.extractor
         sleep = util.build_duration_func(
-            self.extractor.config("sleep-extractor"))
+            extractor.config("sleep-extractor"))
         if sleep:
-            time.sleep(sleep())
+            extractor.sleep(sleep(), "extractor")
 
         # collect data
         try:
-            for msg in self.extractor:
+            for msg in extractor:
                 self.dispatch(msg)
         except exception.StopExtraction:
             pass
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 84ee7af..28c07c3 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -105,6 +105,9 @@ class PathFormat():
             strip = ". "
         self.strip = strip
 
+        if WINDOWS:
+            self.extended = config("path-extended", True)
+
         basedir = extractor._parentdir
         if not basedir:
             basedir = config("base-directory")
@@ -178,7 +181,7 @@ class PathFormat():
         else:
             self.directory = directory = self.basedirectory
 
-        if WINDOWS:
+        if WINDOWS and self.extended:
             # Enable longer-than-260-character paths
             directory = os.path.abspath(directory)
             if directory.startswith("\\\\"):
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index d9baed3..b21e483 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -19,15 +19,9 @@ class MetadataPP(PostProcessor):
     def __init__(self, job, options):
         PostProcessor.__init__(self, job)
 
-        mode = options.get("mode", "json")
-        if mode == "custom":
-            self.write = self._write_custom
-            cfmt = options.get("content-format") or options.get("format")
-            if isinstance(cfmt, list):
-                cfmt = "\n".join(cfmt) + "\n"
-            self._content_fmt = formatter.parse(cfmt).format_map
-            ext = "txt"
-        elif mode == "tags":
+        mode = options.get("mode")
+        cfmt = options.get("content-format") or options.get("format")
+        if mode == "tags":
             self.write = self._write_tags
             ext = "txt"
         elif mode == "modify":
@@ -41,6 +35,12 @@ class MetadataPP(PostProcessor):
             self.run = self._run_delete
             self.fields = options.get("fields")
             ext = None
+        elif mode == "custom" or not mode and cfmt:
+            self.write = self._write_custom
+            if isinstance(cfmt, list):
+                cfmt = "\n".join(cfmt) + "\n"
+            self._content_fmt = formatter.parse(cfmt).format_map
+            ext = "txt"
         else:
             self.write = self._write_json
             self.indent = options.get("indent", 4)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 4ba1cba..1650b0a 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -418,6 +418,82 @@ CODES = {
 }
 
 
+def parse_inputfile(file, log):
+    """Filter and process strings from an input file.
+
+    Lines starting with '#' and empty lines will be ignored.
+    Lines starting with '-' will be interpreted as a key-value pair separated
+      by an '='. where 'key' is a dot-separated option name and 'value' is a
+      JSON-parsable value. These configuration options will be applied while
+      processing the next URL.
+    Lines starting with '-G' are the same as above, except these options will
+      be applied for *all* following URLs, i.e. they are Global.
+    Everything else will be used as a potential URL.
+
+    Example input file:
+
+    # settings global options
+    -G base-directory = "/tmp/"
+    -G skip = false
+
+    # setting local options for the next URL
+    -filename="spaces_are_optional.jpg"
+    -skip    = true
+
+    https://example.org/
+
+    # next URL uses default filename and 'skip' is false.
+    https://example.com/index.htm # comment1
+    https://example.com/404.htm   # comment2
+    """
+    gconf = []
+    lconf = []
+    strip_comment = None
+
+    for line in file:
+        line = line.strip()
+
+        if not line or line[0] == "#":
+            # empty line or comment
+            continue
+
+        elif line[0] == "-":
+            # config spec
+            if len(line) >= 2 and line[1] == "G":
+                conf = gconf
+                line = line[2:]
+            else:
+                conf = lconf
+                line = line[1:]
+
+            key, sep, value = line.partition("=")
+            if not sep:
+                log.warning("input file: invalid <key>=<value> pair: %s", line)
+                continue
+
+            try:
+                value = json.loads(value.strip())
+            except ValueError as exc:
+                log.warning("input file: unable to parse '%s': %s", value, exc)
+                continue
+
+            key = key.strip().split(".")
+            conf.append((key[:-1], key[-1], value))
+
+        else:
+            # url
+            if " #" in line or "\t#" in line:
+                if strip_comment is None:
+                    strip_comment = re.compile(r"\s+#.*").sub
+                line = strip_comment("", line)
+            if gconf or lconf:
+                yield ExtendedUrl(line, gconf, lconf)
+                gconf = []
+                lconf = []
+            else:
+                yield line
+
+
 class UniversalNone():
     """None-style object that supports more operations than None itself"""
     __slots__ = ()
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 13cb9a0..f758857 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.23.2"
+__version__ = "1.23.3"