New upstream version 1.17.5.upstream/1.17.5

author: Unit 193 <unit193@unit193.net> 2021-06-05 20:55:36 -0400
committer: Unit 193 <unit193@unit193.net> 2021-06-05 20:55:36 -0400
commit: 8a644b7a06c504263a478d3681eed10b4161b5be (patch)
tree: b3d668588e5c0be8c75467e50499f73ff9ec7c05 /gallery_dl/extractor
parent: e7eb1f9779f2e223575ab23a6bc1abf2222e7d27 (diff)
27 files changed, 460 insertions, 233 deletions
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index edb9d46..27634de 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -104,7 +104,8 @@ class _35photoUserExtractor(_35photoExtractor):
                r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)")
     test = (
         ("https://35photo.pro/liya", {
-            "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
+            "pattern": r"https://([a-z][0-9]\.)?35photo\.pro"
+                       r"/photos_(main|series)/.*\.jpg",
             "count": 9,
         }),
         ("https://35photo.pro/suhoveev", {
@@ -214,7 +215,7 @@ class _35photoImageExtractor(_35photoExtractor):
     test = ("https://35photo.pro/photo_753340/", {
         "count": 1,
         "keyword": {
-            "url"        : r"re:https://m\d+.35photo.pro/photos_main/.*.jpg",
+            "url"        : r"re:https://35photo\.pro/photos_main/.*\.jpg",
             "id"         : 753340,
             "title"      : "Winter walk",
             "description": str,
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 0583eb9..c2c5a66 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -146,7 +146,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
         }),
         # unavailable photos (#1335)
         ("https://500px.com/p/Light_Expression_Photography/galleries/street", {
-            "count": ">= 7",
+            "count": 0,
         }),
         ("https://500px.com/fashvamp/galleries/lera"),
     )
@@ -172,7 +172,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
         }
         gallery = self._request_graphql(
             "GalleriesDetailQueryRendererQuery", variables,
-            "fb8bb66d31b58903e2f01ebe66bbe7937b982753be3211855b7bce4e286c1a49",
+            "eda3c77ca4efe4b3347ec9c08befe3bd2c58099ebfb1f680d829fcd26d34f12d",
         )["gallery"]
 
         self._photos = gallery["photos"]
@@ -200,8 +200,8 @@ class _500pxGalleryExtractor(_500pxExtractor):
             variables["cursor"] = photos["pageInfo"]["endCursor"]
             photos = self._request_graphql(
                 "GalleriesDetailPaginationContainerQuery", variables,
-                "457c66d976f56863c81795f03e98cb54"
-                "3c7c6cdae7abeab8fe9e8e8a67479fa9",
+                "466cf6661a07e7fdca465edb39118efb"
+                "80fb157c6d3f620c7f518cdae0832c78",
             )["galleryByOwnerIdAndSlugOrToken"]["photos"]
 
 
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index ded2ae3..0d0ad70 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -12,7 +12,6 @@ from .common import Extractor, Message
 from .. import text, util, exception
 from ..cache import cache
 
-
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
 
 
@@ -33,6 +32,8 @@ class AryionExtractor(Extractor):
         self._needle = "class='gallery-item' id='"
 
     def login(self):
+        if self._check_cookies(self.cookienames):
+            return
         username, password = self._get_auth_info()
         if username:
             self._update_cookies(self._login_impl(username, password))
@@ -73,8 +74,7 @@ class AryionExtractor(Extractor):
     def _pagination(self, url):
         while True:
             page = self.request(url).text
-            yield from text.extract_iter(
-                page, self._needle, "'")
+            yield from text.extract_iter(page, self._needle, "'")
 
             pos = page.find("Next &gt;&gt;")
             if pos < 0:
@@ -173,7 +173,7 @@ class AryionGalleryExtractor(AryionExtractor):
 
     def skip(self, num):
         if self.recursive:
-            num = 0
+            return 0
         self.offset += num
         return num
 
@@ -182,7 +182,7 @@ class AryionGalleryExtractor(AryionExtractor):
             url = "{}/g4/gallery/{}".format(self.root, self.user)
             return self._pagination(url)
         else:
-            self._needle = "class='thumb' href='/g4/view/"
+            self._needle = "thumb' href='/g4/view/"
             url = "{}/g4/latest.php?name={}".format(self.root, self.user)
             return util.advance(self._pagination(url), self.offset)
 
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 1f86ea5..3b96a4e 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -61,6 +61,7 @@ class DanbooruExtractor(Extractor):
                         "{}/posts/{}.json?only=pixiv_ugoira_frame_data".format(
                             self.root, post["id"])
                     ).json()["pixiv_ugoira_frame_data"]["data"]
+                    post["_http_adjust_extension"] = False
                 else:
                     url = post["large_file_url"]
                     post["extension"] = "webm"
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 47f589a..9a461a4 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -930,10 +930,12 @@ class DeviantartOAuthAPI():
         self.folders = extractor.config("folders", False)
         self.metadata = extractor.extra or extractor.config("metadata", False)
 
-        self.client_id = extractor.config(
-            "client-id", self.CLIENT_ID)
-        self.client_secret = extractor.config(
-            "client-secret", self.CLIENT_SECRET)
+        self.client_id = extractor.config("client-id")
+        if self.client_id:
+            self.client_secret = extractor.config("client-secret")
+        else:
+            self.client_id = self.CLIENT_ID
+            self.client_secret = self.CLIENT_SECRET
 
         token = extractor.config("refresh-token")
         if token is None or token == "cache":
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 910da7d..64a6cb7 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -128,7 +128,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
                 "gid": 1200119,
                 "height": int,
                 "image_token": "re:[0-9a-f]{10}",
-                "lang": "jp",
+                "lang": "ja",
                 "language": "Japanese",
                 "parent": "",
                 "rating": r"re:\d\.\d+",
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 0bcec2b..5962b9e 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -135,7 +135,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
             "url": "61896d9d9a2edb556b619000a308a984307b6d30",
         }),
         ("https://thebarchive.com/b/thread/739772332/", {
-            "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
+            "url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd",
         }),
     )
 
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 863cead..df45d0d 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -8,8 +8,10 @@
 
 """Extractors for https://gelbooru.com/"""
 
+from .common import Extractor, Message
 from . import gelbooru_v02
 from .. import text, exception
+import binascii
 
 
 class GelbooruBase():
@@ -131,3 +133,23 @@ class GelbooruPostExtractor(GelbooruBase,
             }
         }),
     )
+
+
+class GelbooruRedirectExtractor(GelbooruBase, Extractor):
+    subcategory = "redirect"
+    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com"
+               r"/redirect\.php\?s=([^&#]+)")
+    test = (("https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgu"
+             "cGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MTgzMDA0Ng=="), {
+        "pattern": r"https://gelbooru.com/index.php"
+                   r"\?page=post&s=view&id=1830046"
+    })
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.redirect_url = text.ensure_http_scheme(
+            binascii.a2b_base64(match.group(1)).decode())
+
+    def items(self):
+        data = {"_extractor": GelbooruPostExtractor}
+        yield Message.Queue, self.redirect_url, data
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 76b2c38..9370840 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract images from http://www.imagebam.com/"""
+"""Extractors for https://www.imagebam.com/"""
 
 from .common import Extractor, Message
 from .. import text, exception
@@ -15,34 +15,44 @@ from .. import text, exception
 class ImagebamExtractor(Extractor):
     """Base class for imagebam extractors"""
     category = "imagebam"
-    root = "http://www.imagebam.com"
+    root = "https://www.imagebam.com"
+    cookies = None
 
-    def get_image_data(self, page_url, data):
-        """Fill 'data' and return image URL"""
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.key = match.group(1)
+        if self.cookies:
+            self.session.cookies = self.cookies
+
+    def get_image_data(self, data):
+        page_url = "{}/image/{}".format(self.root, data["image_key"])
         page = self.request(page_url).text
-        image_url = text.extract(page, 'property="og:image" content="', '"')[0]
-        data["extension"] = image_url.rpartition(".")[2]
-        data["image_key"] = page_url.rpartition("/")[2]
-        data["image_id"] = data["image_key"][6:]
-        return image_url
+        image_url, pos = text.extract(page, '<img src="https://images', '"')
+
+        if not image_url:
+            # cache cookies
+            ImagebamExtractor.cookies = self.session.cookies
+            # repeat request to get past "Continue to your image" pages
+            page = self.request(page_url).text
+            image_url, pos = text.extract(
+                page, '<img src="https://images', '"')
 
-    def request_page(self, url):
-        """Retrive the main part of a gallery page"""
-        page = self.request(text.urljoin(self.root, url)).text
-        return text.extract(page, "<fieldset>", "</fieldset>")[0]
+        filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
+        data["url"] = "https://images" + image_url
+        data["filename"], _, data["extension"] = filename.rpartition(".")
 
 
 class ImagebamGalleryExtractor(ImagebamExtractor):
     """Extractor for image galleries from imagebam.com"""
     subcategory = "gallery"
-    directory_fmt = ("{category}", "{title} - {gallery_key}")
-    filename_fmt = "{num:>03}-{image_key}.{extension}"
+    directory_fmt = ("{category}", "{title} {gallery_key}")
+    filename_fmt = "{num:>03} {filename}.{extension}"
     archive_fmt = "{gallery_key}_{image_key}"
     pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
     test = (
-        ("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
+        ("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
             "url": "76d976788ae2757ac81694736b07b72356f5c4c8",
-            "keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a",
+            "keyword": "b048478b1bbba3072a7fa9fcc40630b3efad1f6c",
             "content": "596e6bfa157f2c7169805d50075c2986549973a8",
         }),
         ("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
@@ -51,78 +61,67 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
             "url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
         }),
         ("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
-            "exception": exception.NotFoundError,
+            "exception": exception.HttpError,
         }),
     )
 
-    def __init__(self, match):
-        ImagebamExtractor.__init__(self, match)
-        self.gallery_key = match.group(1)
-
     def items(self):
-        url = "{}/gallery/{}".format(self.root, self.gallery_key)
-        page = self.request_page(url)
-        if not page or ">Error<" in page:
-            raise exception.NotFoundError("gallery")
+        url = "{}/gallery/{}".format(self.root, self.key)
+        page = self.request(url).text
 
         data = self.get_metadata(page)
-        imgs = self.get_image_pages(page)
-        data["count"] = len(imgs)
-        data["gallery_key"] = self.gallery_key
+        keys = self.get_image_keys(page)
+        keys.reverse()
+        data["count"] = len(keys)
+        data["gallery_key"] = self.key
 
-        yield Message.Version, 1
         yield Message.Directory, data
-        for data["num"], page_url in enumerate(imgs, 1):
-            image_url = self.get_image_data(page_url, data)
-            yield Message.Url, image_url, data
+        for data["num"], data["image_key"] in enumerate(keys, 1):
+            self.get_image_data(data)
+            yield Message.Url, data["url"], data
 
     @staticmethod
     def get_metadata(page):
         """Return gallery metadata"""
-        return text.extract_all(page, (
-            ("title"      , "'> ", " <span "),
-            (None         , "'>", "</span>"),
-            ("description", ":#FCFCFC;'>", "</div>"),
-        ))[0]
-
-    def get_image_pages(self, page):
-        """Return a list of all image pages"""
-        pages = []
+        title = text.extract(page, 'id="gallery-name">', '<')[0]
+        return {"title": text.unescape(title.strip())}
+
+    def get_image_keys(self, page):
+        """Return a list of all image keys"""
+        keys = []
         while True:
-            pages.extend(text.extract_iter(page, "\n<a href='", "'"))
-            pos = page.find('"pagination_current"')
+            keys.extend(text.extract_iter(
+                page, '<a href="https://www.imagebam.com/image/', '"'))
+            pos = page.find('rel="next" aria-label="Next')
             if pos > 0:
-                url = text.extract(page, "<a href='", "'", pos)[0]
+                url = text.rextract(page, 'href="', '"', pos)[0]
                 if url:
-                    page = self.request_page(url)
+                    page = self.request(url).text
                     continue
-            return pages
+            return keys
 
 
 class ImagebamImageExtractor(ImagebamExtractor):
     """Extractor for single images from imagebam.com"""
     subcategory = "image"
-    filename_fmt = "{image_key}.{extension}"
     archive_fmt = "{image_key}"
     pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
                r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
     test = (
-        ("http://www.imagebam.com/image/94d56c502511890", {
+        ("https://www.imagebam.com/image/94d56c502511890", {
             "url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
-            "keyword": "4263d4840007524129792b8587a562b5d20c2687",
+            "keyword": "2a4380d4b57554ff793898c2d6ec60987c86d1a1",
             "content": "0c8768055e4e20e7c7259608b67799171b691140",
         }),
         ("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png"),
+        # NSFW (#1534)
+        ("https://www.imagebam.com/image/0850951366904951", {
+            "url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
+        }),
     )
 
-    def __init__(self, match):
-        ImagebamExtractor.__init__(self, match)
-        self.image_key = match.group(1)
-
     def items(self):
-        page_url = "{}/image/{}".format(self.root, self.image_key)
-        data = {}
-        image_url = self.get_image_data(page_url, data)
-        yield Message.Version, 1
+        data = {"image_key": self.key}
+        self.get_image_data(data)
         yield Message.Directory, data
-        yield Message.Url, image_url, data
+        yield Message.Url, data["url"], data
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 7009c7a..f925c9e 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -57,7 +57,8 @@ class ImgurImageExtractor(ImgurExtractor):
     subcategory = "image"
     filename_fmt = "{category}_{id}{title:?_//}.{extension}"
     archive_fmt = "{id}"
-    pattern = BASE_PATTERN + r"/(?!gallery|search)(\w{7}|\w{5})[sbtmlh]?\.?"
+    pattern = (BASE_PATTERN + r"/(?!gallery|search)"
+               r"(?:r/\w+/)?(\w{7}|\w{5})[sbtmlh]?")
     test = (
         ("https://imgur.com/21yMxCS", {
             "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
@@ -110,6 +111,7 @@ class ImgurImageExtractor(ImgurExtractor):
         ("https://imgur.com/zzzzzzz", {  # not found
             "exception": exception.HttpError,
         }),
+        ("https://m.imgur.com/r/Celebs/iHJ7tsM"),
         ("https://www.imgur.com/21yMxCS"),     # www
         ("https://m.imgur.com/21yMxCS"),       # mobile
         ("https://imgur.com/zxaY6"),           # 5 character key
@@ -289,7 +291,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
 class ImgurSubredditExtractor(ImgurExtractor):
     """Extractor for a subreddits's imgur links"""
     subcategory = "subreddit"
-    pattern = BASE_PATTERN + r"/r/([^/?#]+)"
+    pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$"
     test = ("https://imgur.com/r/pics", {
         "range": "1-100",
         "count": 100,
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 9b5331a..2f7935b 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -64,7 +64,7 @@ class InkbunnyExtractor(Extractor):
 class InkbunnyUserExtractor(InkbunnyExtractor):
     """Extractor for inkbunny user profiles"""
     subcategory = "user"
-    pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?#]+)"
+    pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
     test = (
         ("https://inkbunny.net/soina", {
             "pattern": r"https://[\w.]+\.metapix\.net/files/full"
@@ -138,6 +138,33 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
         return self.api.search(params)
 
 
+class InkbunnyFavoriteExtractor(InkbunnyExtractor):
+    """Extractor for inkbunny user favorites"""
+    subcategory = "favorite"
+    pattern = BASE_PATTERN + r"/userfavorites_process\.php\?favs_user_id=(\d+)"
+    test = (
+        ("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
+            "pattern": r"https://[\w.]+\.metapix\.net/files/full"
+                       r"/\d+/\d+_\w+_.+",
+            "range": "20-50",
+        }),
+    )
+
+    def __init__(self, match):
+        InkbunnyExtractor.__init__(self, match)
+        self.user_id = match.group(1)
+
+    def posts(self):
+        orderby = self.config("orderby", "fav_datetime")
+        params = {
+            "favs_user_id": self.user_id,
+            "orderby"     : orderby,
+        }
+        if orderby and orderby.startswith("unread_"):
+            params["unread_submissions"] = "yes"
+        return self.api.search(params)
+
+
 class InkbunnyPostExtractor(InkbunnyExtractor):
     """Extractor for individual Inkbunny posts"""
     subcategory = "post"
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index a027be1..e3db789 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -408,7 +408,7 @@ class InstagramPostsExtractor(InstagramExtractor):
         url = "{}/{}/".format(self.root, self.item)
         user = self._extract_profile_page(url)
 
-        query_hash = "42d2750e44dbac713ff30130659cd891"
+        query_hash = "32b14723a678bd4628d70c1f877b94c9"
         variables = {"id": user["id"], "first": 50}
         edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
         return self._pagination_graphql(query_hash, variables, edge)
@@ -613,7 +613,7 @@ class InstagramPostExtractor(InstagramExtractor):
     )
 
     def posts(self):
-        query_hash = "cf28bf5eb45d62d4dc8e77cdb99d750d"
+        query_hash = "d4e8ae69cb68f66329dcebe82fb69f6d"
         variables = {
             "shortcode"            : self.item,
             "child_comment_count"  : 3,
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 377e00b..1b5e5e9 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -26,24 +26,41 @@ class KemonopartyExtractor(Extractor):
     def items(self):
         find_inline = re.compile(r'src="(/inline/[^"]+)').findall
 
+        if self.config("metadata"):
+            username = text.unescape(text.extract(
+                self.request(self.user_url).text, "<title>", " | Kemono<")[0])
+        else:
+            username = None
+
         for post in self.posts():
 
             files = []
-            if post["file"]:
-                files.append(post["file"])
-            if post["attachments"]:
-                files.extend(post["attachments"])
+            append = files.append
+            file = post["file"]
+
+            if file:
+                file["type"] = "file"
+                append(file)
+            for attachment in post["attachments"]:
+                attachment["type"] = "attachment"
+                append(attachment)
             for path in find_inline(post["content"] or ""):
-                files.append({"path": path, "name": path})
+                append({"path": path, "name": path, "type": "inline"})
 
             post["date"] = text.parse_datetime(
                 post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+            if username:
+                post["username"] = username
             yield Message.Directory, post
 
             for post["num"], file in enumerate(files, 1):
+                post["type"] = file["type"]
                 url = file["path"]
                 if url[0] == "/":
-                    url = self.root + url
+                    url = "https://data.kemono.party" + url
+                elif url.startswith("https://kemono.party/"):
+                    url = "https://data.kemono.party" + url[20:]
+
                 text.nameext_from_url(file["name"], post)
                 yield Message.Url, url, post
 
@@ -64,6 +81,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
         KemonopartyExtractor.__init__(self, match)
         service, user_id = match.groups()
         self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
+        self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
 
     def posts(self):
         url = self.api_url
@@ -84,7 +102,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
     pattern = BASE_PATTERN + r"/post/([^/?#]+)"
     test = (
         ("https://kemono.party/fanbox/user/6993449/post/506575", {
-            "pattern": r"https://kemono\.party/files/fanbox"
+            "pattern": r"https://data\.kemono\.party/files/fanbox"
                        r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
             "keyword": {
                 "added": "Wed, 06 May 2020 20:28:02 GMT",
@@ -101,16 +119,21 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
                 "shared_file": False,
                 "subcategory": "post",
                 "title": "c96取り置き",
+                "type": "file",
                 "user": "6993449",
             },
         }),
         # inline image (#1286)
         ("https://kemono.party/fanbox/user/7356311/post/802343", {
-            "pattern": r"https://kemono\.party/inline/fanbox"
+            "pattern": r"https://data\.kemono\.party/inline/fanbox"
                        r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
         }),
+        # kemono.party -> data.kemono.party
+        ("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
+            "pattern": r"https://data\.kemono\.party/(file|attachment)s"
+                       r"/gumroad/trylsc/IURjT/",
+        }),
         ("https://kemono.party/subscribestar/user/alcorart/post/184330"),
-        ("https://kemono.party/gumroad/user/trylsc/post/IURjT"),
     )
 
     def __init__(self, match):
@@ -118,6 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
         service, user_id, post_id = match.groups()
         self.api_url = "{}/api/{}/user/{}/post/{}".format(
             self.root, service, user_id, post_id)
+        self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
 
     def posts(self):
         posts = self.request(self.api_url).json()
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index f8e1473..833d18e 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -4,35 +4,23 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract manga-chapters and entire manga from https://manganelo.com/"""
+"""Extractors for https://manganato.com/"""
 
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
 import re
 
+BASE_PATTERN = \
+    r"(?:https?://)?((?:(?:read)?manganato|(?:www\.)?manganelo)\.com)"
 
-class ManganeloBase():
-    """Base class for manganelo extractors"""
-    category = "manganelo"
-    root = "https://manganelo.com"
-
-    @staticmethod
-    def parse_page(page, data):
-        """Parse metadata on 'page' and add it to 'data'"""
-        text.extract_all(page, (
-            ("manga"  , '<h1>', '</h1>'),
-            ('author' , '</i>Author(s) :</td>', '</tr>'),
-        ), values=data)
-        data["author"] = text.remove_html(data["author"])
-        return data
 
-
-class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
+class ManganeloChapterExtractor(ChapterExtractor):
     """Extractor for manga-chapters from manganelo.com"""
-    pattern = (r"(?:https?://)?(?:www\.)?manganelo\.com"
-               r"(/chapter/\w+/chapter_[^/?#]+)")
+    category = "manganelo"
+    root = "https://readmanganato.com"
+    pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
     test = (
-        ("https://manganelo.com/chapter/gq921227/chapter_23", {
+        ("https://readmanganato.com/manga-gn983696/chapter-23", {
             "pattern": r"https://s\d+\.\w+\.com/mangakakalot/g\d+/gq921227/"
                        r"vol3_chapter_23_24_yen/\d+\.jpg",
             "keyword": "3748087cf41abc97f991530e6fd53b291490d6d0",
@@ -43,11 +31,12 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
             "content": "fbec629c71f66b246bfa0604204407c0d1c8ae38",
             "count": 39,
         }),
+        ("https://manganelo.com/chapter/gq921227/chapter_23"),
     )
 
     def __init__(self, match):
-        self.path = match.group(1)
-        ChapterExtractor.__init__(self, match, self.root + self.path)
+        domain, path = match.groups()
+        ChapterExtractor.__init__(self, match, "https://" + domain + path)
         self.session.headers['Referer'] = self.root
 
     def metadata(self, page):
@@ -85,21 +74,29 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
         ]
 
 
-class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
+class ManganeloMangaExtractor(MangaExtractor):
     """Extractor for manga from manganelo.com"""
+    category = "manganelo"
+    root = "https://readmanganato.com"
     chapterclass = ManganeloChapterExtractor
-    pattern = (r"(?:https?://)?(?:www\.)?manganelo\.com"
-               r"(/(?:manga/|read_)\w+)")
+    pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
     test = (
-        ("https://manganelo.com/manga/ol921234", {
-            "url": "6ba7f083a6944e414ad8214b74a0a40cb60d4562",
+        ("https://manganato.com/manga-gu983703", {
+            "pattern": ManganeloChapterExtractor.pattern,
+            "count": ">= 70",
         }),
         ("https://manganelo.com/manga/read_otome_no_teikoku", {
             "pattern": ManganeloChapterExtractor.pattern,
-            "count": ">= 40"
+            "count": ">= 40",
         }),
+        ("https://manganelo.com/manga/ol921234/"),
     )
 
+    def __init__(self, match):
+        domain, path = match.groups()
+        MangaExtractor.__init__(self, match, "https://" + domain + path)
+        self.session.headers['Referer'] = self.root
+
     def chapters(self, page):
         results = []
         data = self.parse_page(page, {"lang": "en", "language": "English"})
@@ -117,3 +114,13 @@ class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
             data["chapter"] = text.parse_int(chapter)
             data["chapter_minor"] = sep + minor
             results.append((url, data.copy()))
+
+    @staticmethod
+    def parse_page(page, data):
+        """Parse metadata on 'page' and add it to 'data'"""
+        text.extract_all(page, (
+            ("manga"  , '<h1>', '</h1>'),
+            ('author' , '</i>Author(s) :</td>', '</tr>'),
+        ), values=data)
+        data["author"] = text.remove_html(data["author"])
+        return data
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 558e682..9b6d4ba 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -17,7 +17,7 @@ import re
 class MangaparkBase():
     """Base class for mangapark extractors"""
     category = "mangapark"
-    root_fmt = "https://mangapark.{}"
+    root_fmt = "https://v2.mangapark.{}"
     browser = "firefox"
 
     @staticmethod
@@ -51,7 +51,7 @@ class MangaparkBase():
 
 class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
     """Extractor for manga-chapters from mangapark.net"""
-    pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
+    pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)"
                r"/manga/([^?#]+/i\d+)")
     test = (
         ("https://mangapark.net/manga/gosu/i811653/c055/1", {
@@ -117,7 +117,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
 class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
     """Extractor for manga from mangapark.net"""
     chapterclass = MangaparkChapterExtractor
-    pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
+    pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)"
                r"(/manga/[^/?#]+)/?$")
     test = (
         ("https://mangapark.net/manga/aria", {
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index e1081da..b74355d 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -158,7 +158,7 @@ class NozomiTagExtractor(NozomiExtractor):
     """Extractor for posts from tag searches on nozomi.la"""
     subcategory = "tag"
     directory_fmt = ("{category}", "{search_tags}")
-    archive_fmt = "t_{search_tags}_{postid}"
+    archive_fmt = "t_{search_tags}_{dataid}"
     pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
     test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
         "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$",
@@ -180,7 +180,7 @@ class NozomiSearchExtractor(NozomiExtractor):
     """Extractor for search results on nozomi.la"""
     subcategory = "search"
     directory_fmt = ("{category}", "{search_tags:J }")
-    archive_fmt = "t_{search_tags}_{postid}"
+    archive_fmt = "t_{search_tags}_{dataid}"
     pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)"
     test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", {
         "count": ">= 5",
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 839e0b8..9c32d7a 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -117,12 +117,22 @@ class PatreonExtractor(Extractor):
         attr = post["attributes"]
         attr["id"] = text.parse_int(post["id"])
 
-        if post.get("current_user_can_view", True):
+        if attr.get("current_user_can_view", True):
+
+            relationships = post["relationships"]
             attr["images"] = self._files(post, included, "images")
             attr["attachments"] = self._files(post, included, "attachments")
             attr["date"] = text.parse_datetime(
                 attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
-            user = post["relationships"]["user"]
+
+            tags = relationships.get("user_defined_tags")
+            attr["tags"] = [
+                tag["id"].replace("user_defined;", "")
+                for tag in tags["data"]
+                if tag["type"] == "post_tag"
+            ] if tags else []
+
+            user = relationships["user"]
             attr["creator"] = (
                 self._user(user["links"]["related"]) or
                 included["user"][user["data"]["id"]])
@@ -299,6 +309,10 @@ class PatreonPostExtractor(PatreonExtractor):
         ("https://www.patreon.com/posts/19987002", {
             "count": 4,
         }),
+        # tags (#1539)
+        ("https://www.patreon.com/posts/free-post-12497641", {
+            "keyword": {"tags": ["AWMedia"]},
+        }),
         ("https://www.patreon.com/posts/not-found-123", {
             "exception": exception.NotFoundError,
         }),
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index cbd65d7..3c3fcd4 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -9,7 +9,9 @@
 """Extractors for https://www.pillowfort.social/"""
 
 from .common import Extractor, Message
-from .. import text
+from ..cache import cache
+from .. import text, exception
+import re
 
 BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
 
@@ -19,94 +21,171 @@ class PillowfortExtractor(Extractor):
     category = "pillowfort"
     root = "https://www.pillowfort.social"
     directory_fmt = ("{category}", "{username}")
-    filename_fmt = ("{post_id} {title|original_post[title]} "
+    filename_fmt = ("{post_id} {title|original_post[title]:?/ /}"
                     "{num:>02}.{extension}")
     archive_fmt = "{id}"
+    cookiedomain = "www.pillowfort.social"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.item = match.group(1)
-        self.reblogs = self.config("reblogs", False)
 
     def items(self):
-        for post in self.posts():
+        self.login()
+        inline = self.config("inline", True)
+        reblogs = self.config("reblogs", False)
+        external = self.config("external", False)
+
+        if inline:
+            inline = re.compile(r'src="(https://img\d+\.pillowfort\.social'
+                                r'/posts/[^"]+)').findall
 
-            if "original_post" in post and not self.reblogs:
+        for post in self.posts():
+            if "original_post" in post and not reblogs:
                 continue
 
-            files = post["media"]
-            del post["media"]
+            files = post.pop("media")
+            if inline:
+                for url in inline(post["content"]):
+                    files.append({"url": url})
 
             post["date"] = text.parse_datetime(
                 post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+            post["post_id"] = post.pop("id")
             yield Message.Directory, post
 
             post["num"] = 0
             for file in files:
                 url = file["url"]
-                if url:
-                    post.update(file)
+                if not url:
+                    continue
+
+                if file.get("embed_code"):
+                    if not external:
+                        continue
+                    msgtype = Message.Queue
+                else:
                     post["num"] += 1
+                    msgtype = Message.Url
+
+                post.update(file)
+                text.nameext_from_url(url, post)
+                post["hash"], _, post["filename"] = \
+                    post["filename"].partition("_")
+
+                if "id" not in file:
+                    post["id"] = post["hash"]
+                if "created_at" in file:
                     post["date"] = text.parse_datetime(
                         file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
-                    yield Message.Url, url, text.nameext_from_url(url, post)
+
+                yield msgtype, url, post
+
+    def login(self):
+        cget = self.session.cookies.get
+        if cget("_Pf_new_session", domain=self.cookiedomain) \
+                or cget("remember_user_token", domain=self.cookiedomain):
+            return
+
+        username, password = self._get_auth_info()
+        if username:
+            cookies = self._login_impl(username, password)
+            self._update_cookies(cookies)
+
+    @cache(maxage=14*24*3600, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        url = "https://www.pillowfort.social/users/sign_in"
+        page = self.request(url).text
+        auth = text.extract(page, 'name="authenticity_token" value="', '"')[0]
+
+        headers = {"Origin": self.root, "Referer": url}
+        data = {
+            "utf8"              : "✓",
+            "authenticity_token": auth,
+            "user[email]"       : username,
+            "user[password]"    : password,
+            "user[remember_me]" : "1",
+        }
+        response = self.request(url, method="POST", headers=headers, data=data)
+
+        if not response.history:
+            raise exception.AuthenticationError()
+
+        return {
+            cookie.name: cookie.value
+            for cookie in response.history[0].cookies
+        }
 
 
 class PillowfortPostExtractor(PillowfortExtractor):
     """Extractor for a single pillowfort post"""
     subcategory = "post"
     pattern = BASE_PATTERN + r"/posts/(\d+)"
-    test = ("https://www.pillowfort.social/posts/27510", {
-        "pattern": r"https://img\d+\.pillowfort\.social/posts/\w+_out\d+\.png",
-        "count": 4,
-        "keyword": {
-            "avatar_url": str,
-            "col": 0,
-            "commentable": True,
-            "comments_count": int,
-            "community_id": None,
-            "content": str,
-            "created_at": str,
-            "date": "type:datetime",
-            "deleted": None,
-            "deleted_at": None,
-            "deleted_by_mod": None,
-            "deleted_for_flag_id": None,
-            "embed_code": None,
-            "id": int,
-            "last_activity": str,
-            "last_activity_elapsed": str,
-            "last_edited_at": None,
-            "likes_count": int,
-            "media_type": "picture",
-            "nsfw": False,
-            "num": int,
-            "original_post_id": None,
-            "original_post_user_id": None,
-            "picture_content_type": None,
-            "picture_file_name": None,
-            "picture_file_size": None,
-            "picture_updated_at": None,
-            "post_id": 27510,
-            "post_type": "picture",
-            "privacy": "public",
-            "reblog_copy_info": list,
-            "rebloggable": True,
-            "reblogged_from_post_id": None,
-            "reblogged_from_user_id": None,
-            "reblogs_count": int,
-            "row": int,
-            "small_image_url": None,
-            "tags": list,
-            "time_elapsed": str,
-            "timestamp": str,
-            "title": "What is Pillowfort.io? ",
-            "updated_at": str,
-            "url": r"re:https://img3.pillowfort.social/posts/.*\.png",
-            "user_id": 5,
-            "username": "Staff"
-        },
-    })
+    test = (
+        ("https://www.pillowfort.social/posts/27510", {
+            "pattern": r"https://img\d+\.pillowfort\.social"
+                       r"/posts/\w+_out\d+\.png",
+            "count": 4,
+            "keyword": {
+                "avatar_url": str,
+                "col": 0,
+                "commentable": True,
+                "comments_count": int,
+                "community_id": None,
+                "content": str,
+                "created_at": str,
+                "date": "type:datetime",
+                "deleted": None,
+                "deleted_at": None,
+                "deleted_by_mod": None,
+                "deleted_for_flag_id": None,
+                "embed_code": None,
+                "id": int,
+                "last_activity": str,
+                "last_activity_elapsed": str,
+                "last_edited_at": None,
+                "likes_count": int,
+                "media_type": "picture",
+                "nsfw": False,
+                "num": int,
+                "original_post_id": None,
+                "original_post_user_id": None,
+                "picture_content_type": None,
+                "picture_file_name": None,
+                "picture_file_size": None,
+                "picture_updated_at": None,
+                "post_id": 27510,
+                "post_type": "picture",
+                "privacy": "public",
+                "reblog_copy_info": list,
+                "rebloggable": True,
+                "reblogged_from_post_id": None,
+                "reblogged_from_user_id": None,
+                "reblogs_count": int,
+                "row": int,
+                "small_image_url": None,
+                "tags": list,
+                "time_elapsed": str,
+                "timestamp": str,
+                "title": "What is Pillowfort.io? ",
+                "updated_at": str,
+                "url": r"re:https://img3.pillowfort.social/posts/.*\.png",
+                "user_id": 5,
+                "username": "Staff"
+            },
+        }),
+        ("https://www.pillowfort.social/posts/1557500", {
+            "options": (("external", True), ("inline", False)),
+            "pattern": r"https://twitter\.com/Aliciawitdaart/status"
+                       r"/1282862493841457152",
+        }),
+        ("https://www.pillowfort.social/posts/1672518", {
+            "options": (("inline", True),),
+            "count": 3,
+        }),
+    )
 
     def posts(self):
         url = "{}/posts/{}/json/".format(self.root, self.item)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8bfae06..8076fff 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -29,14 +29,28 @@ class PixivExtractor(Extractor):
         Extractor.__init__(self, match)
         self.api = PixivAppAPI(self)
         self.load_ugoira = self.config("ugoira", True)
-        self.translated_tags = self.config("translated-tags", False)
+        self.max_posts = self.config("max-posts", 0)
 
     def items(self):
-        tkey = "translated_name" if self.translated_tags else "name"
+        tags = self.config("tags", "japanese")
+        if tags == "original":
+            transform_tags = None
+        elif tags == "translated":
+            def transform_tags(work):
+                work["tags"] = list(set(
+                    tag["translated_name"] or tag["name"]
+                    for tag in work["tags"]))
+        else:
+            def transform_tags(work):
+                work["tags"] = [tag["name"] for tag in work["tags"]]
+
         ratings = {0: "General", 1: "R-18", 2: "R-18G"}
         metadata = self.metadata()
 
-        for work in self.works():
+        works = self.works()
+        if self.max_posts:
+            works = itertools.islice(works, self.max_posts)
+        for work in works:
             if not work["user"]["id"]:
                 continue
 
@@ -45,12 +59,10 @@ class PixivExtractor(Extractor):
             del work["meta_single_page"]
             del work["image_urls"]
             del work["meta_pages"]
+
+            if transform_tags:
+                transform_tags(work)
             work["num"] = 0
-            if self.translated_tags:
-                work["untranslated_tags"] = [
-                    tag["name"] for tag in work["tags"]
-                ]
-            work["tags"] = [tag[tkey] or tag["name"] for tag in work["tags"]]
             work["date"] = text.parse_datetime(work["create_date"])
             work["rating"] = ratings.get(work["x_restrict"])
             work["suffix"] = ""
@@ -66,6 +78,7 @@ class PixivExtractor(Extractor):
                 url = ugoira["zip_urls"]["medium"].replace(
                     "_ugoira600x600", "_ugoira1920x1080")
                 work["frames"] = ugoira["frames"]
+                work["_http_adjust_extension"] = False
                 yield Message.Url, url, text.nameext_from_url(url, work)
 
             elif work["page_count"] == 1:
@@ -115,7 +128,8 @@ class PixivUserExtractor(PixivExtractor):
         }),
         # deleted account
         ("http://www.pixiv.net/member_illust.php?id=173531", {
-            "count": 0,
+            "options": (("metadata", True),),
+            "exception": exception.NotFoundError,
         }),
         ("https://www.pixiv.net/en/users/173530"),
         ("https://www.pixiv.net/en/users/173530/manga"),
@@ -138,6 +152,11 @@ class PixivUserExtractor(PixivExtractor):
         self.user_id = u1 or u2 or u3
         self.tag = t1 or t2
 
+    def metadata(self):
+        if self.config("metadata"):
+            return {"user": self.api.user_detail(self.user_id)}
+        return {}
+
     def works(self):
         works = self.api.user_illusts(self.user_id)
 
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 971347b..c62a942 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -78,6 +78,8 @@ class ReactorExtractor(Extractor):
 
     def _parse_post(self, post):
         post, _, script = post.partition('<script type="application/ld+json">')
+        if not script:
+            return
         images = text.extract_iter(post, '<div class="image">', '</div>')
         script = script[:script.index("</")].strip()
 
@@ -210,7 +212,7 @@ class JoyreactorTagExtractor(ReactorTagExtractor):
     pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)"
     test = (
         ("http://joyreactor.cc/tag/Advent+Cirno", {
-            "count": ">= 17",
+            "count": ">= 15",
         }),
         ("http://joyreactor.com/tag/Cirno", {
             "url": "de1e60c15bfb07a0e9603b00dc3d05f60edc7914",
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 7ffe5dc..e4075a2 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -1,17 +1,19 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://readcomiconline.to/"""
+"""Extractors for https://readcomiconline.li/"""
 
 from .common import Extractor, ChapterExtractor, MangaExtractor
 from .. import text, exception
 import re
 
+BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)"
+
 
 class ReadcomiconlineBase():
     """Base class for readcomiconline extractors"""
@@ -19,7 +21,7 @@ class ReadcomiconlineBase():
     directory_fmt = ("{category}", "{comic}", "{issue:>03}")
     filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
     archive_fmt = "{issue_id}_{page}"
-    root = "https://readcomiconline.to"
+    root = "https://readcomiconline.li"
 
     def request(self, url, **kwargs):
         """Detect and handle redirects to CAPTCHA pages"""
@@ -42,11 +44,10 @@ class ReadcomiconlineBase():
 
 
 class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
-    """Extractor for comic-issues from readcomiconline.to"""
+    """Extractor for comic-issues from readcomiconline.li"""
     subcategory = "issue"
-    pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
-               r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))")
-    test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
+    pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))"
+    test = ("https://readcomiconline.li/Comic/W-i-t-c-h/Issue-130?id=22289", {
         "url": "30d29c5afc65043bfd384c010257ec2d0ecbafa6",
         "keyword": "2d9ec81ce1b11fac06ebf96ce33cdbfca0e85eb5",
     })
@@ -78,18 +79,17 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
 
 
 class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
-    """Extractor for comics from readcomiconline.to"""
+    """Extractor for comics from readcomiconline.li"""
     chapterclass = ReadcomiconlineIssueExtractor
     subcategory = "comic"
-    pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
-               r"(/Comic/[^/?#]+/?)$")
+    pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/?)$"
     test = (
-        ("https://readcomiconline.to/Comic/W-i-t-c-h", {
-            "url": "e231bc2a293edb465133c37a8e36a7e7d94cab14",
+        ("https://readcomiconline.li/Comic/W-i-t-c-h", {
+            "url": "74eb8b9504b4084fcc9367b341300b2c52260918",
             "keyword": "3986248e4458fa44a201ec073c3684917f48ee0c",
         }),
         ("https://readcomiconline.to/Comic/Bazooka-Jules", {
-            "url": "711674cb78ed10bd2557315f7a67552d01b33985",
+            "url": "2f66a467a772df4d4592e97a059ddbc3e8991799",
             "keyword": "f5ba5246cd787bb750924d9690cb1549199bd516",
         }),
     )
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 5579017..9808cb8 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -10,7 +10,7 @@
 
 from .booru import BooruExtractor
 from .common import Message
-from .. import text, exception
+from .. import text, util, exception
 from ..cache import cache
 import collections
 
@@ -206,7 +206,7 @@ class SankakuAPI():
 
         self.username, self.password = self.extractor._get_auth_info()
         if not self.username:
-            self.authenticate = lambda: None
+            self.authenticate = util.noop
 
     def pools(self, pool_id):
         params = {"lang": "en"}
@@ -250,7 +250,8 @@ class SankakuAPI():
                 success = True
             if not success:
                 code = data.get("code")
-                if code and code.endswith(("invalid-token", "invalid_token")):
+                if code and code.endswith(
+                        ("unauthorized", "invalid-token", "invalid_token")):
                     _authenticate_impl.invalidate(self.username)
                     continue
                 raise exception.StopExtraction(code)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c323fe0..afeebb0 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -32,6 +32,7 @@ class TwitterExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user = match.group(1)
+        self.textonly = self.config("text-tweets", False)
         self.retweets = self.config("retweets", True)
         self.replies = self.config("replies", True)
         self.twitpic = self.config("twitpic", False)
@@ -64,7 +65,7 @@ class TwitterExtractor(Extractor):
                 self._extract_card(tweet, files)
             if self.twitpic:
                 self._extract_twitpic(tweet, files)
-            if not files:
+            if not files and not self.textonly:
                 continue
 
             tdata = self._transform_tweet(tweet)
@@ -168,7 +169,6 @@ class TwitterExtractor(Extractor):
                 tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
             "user"          : self._transform_user(tweet["user"]),
             "lang"          : tweet["lang"],
-            "content"       : tweet["full_text"],
             "favorite_count": tweet["favorite_count"],
             "quote_count"   : tweet["quote_count"],
             "reply_count"   : tweet["reply_count"],
@@ -187,6 +187,14 @@ class TwitterExtractor(Extractor):
                 "nick": u["name"],
             } for u in mentions]
 
+        content = tweet["full_text"]
+        urls = entities.get("urls")
+        if urls:
+            for url in urls:
+                content = content.replace(url["url"], url["expanded_url"])
+        txt, _, tco = content.rpartition(" ")
+        tdata["content"] = txt if tco.startswith("https://t.co/") else content
+
         if "in_reply_to_screen_name" in tweet:
             tdata["reply_to"] = tweet["in_reply_to_screen_name"]
 
@@ -489,6 +497,10 @@ class TwitterTweetExtractor(TwitterExtractor):
             "options": (("conversations", True),),
             "count": ">= 50",
         }),
+        # retweet with missing media entities (#1555)
+        ("https://twitter.com/morino_ya/status/1392763691599237121", {
+            "count": 4,
+        }),
     )
 
     def __init__(self, match):
@@ -802,6 +814,10 @@ class TwitterAPI():
                         tweet = retweet
                     elif retweet:
                         tweet["author"] = users[retweet["user_id_str"]]
+                        if "extended_entities" in retweet and \
+                                "extended_entities" not in tweet:
+                            tweet["extended_entities"] = \
+                                retweet["extended_entities"]
                 tweet["user"] = users[tweet["user_id_str"]]
                 yield tweet
 
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index d13ce0f..e89a5b7 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -69,7 +69,8 @@ class UnsplashImageExtractor(UnsplashExtractor):
     subcategory = "image"
     pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
     test = ("https://unsplash.com/photos/lsoogGC_5dg", {
-        "url": "b99a5829ca955b768a206aa9afc391bd3f3dd55e",
+        "pattern": r"https://images\.unsplash\.com/photo-1586348943529-"
+                   r"beaae6c28db9\?ixid=\w+&ixlib=rb-1.2.1",
         "keyword": {
             "alt_description": "re:silhouette of trees near body of water ",
             "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
@@ -114,7 +115,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
                 "id": "uMJXuywXLiU",
                 "instagram_username": "just_midwest_rock",
                 "last_name": "Hoefler",
-                "location": "Madison, WI",
+                "location": None,
                 "name": "Dave Hoefler",
                 "portfolio_url": str,
                 "total_collections": int,
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index f8da191..711d3fa 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -77,7 +77,7 @@ class WeasylSubmissionExtractor(WeasylExtractor):
             "keyword": {
                 "comments"    : int,
                 "date"        : "dt:2012-04-20 00:38:04",
-                "description" : "<p>(flex)</p>",
+                "description" : "<p>(flex)</p>\n",
                 "favorites"   : int,
                 "folder_name" : "Wesley Stuff",
                 "folderid"    : 2081,
@@ -160,8 +160,8 @@ class WeasylJournalExtractor(WeasylExtractor):
         "keyword": {
             "title"  : "BBCode",
             "date"   : "dt:2013-09-19 23:11:23",
-            "content": "<p><a>javascript:alert(42);</a></p>"
-                       "<p>No more of that!</p>",
+            "content": "<p><a>javascript:alert(42);</a></p>\n\n"
+                       "<p>No more of that!</p>\n",
         },
     })
 
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index a325f87..0b6a153 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -27,16 +27,21 @@ class WeiboExtractor(Extractor):
         self.videos = self.config("videos", True)
 
     def items(self):
-        yield Message.Version, 1
+        original_retweets = (self.retweets == "original")
 
         for status in self.statuses():
 
-            files = self._files_from_status(status)
             if self.retweets and "retweeted_status" in status:
-                files = itertools.chain(
-                    files,
-                    self._files_from_status(status["retweeted_status"]),
-                )
+                if original_retweets:
+                    status = status["retweeted_status"]
+                    files = self._files_from_status(status)
+                else:
+                    files = itertools.chain(
+                        self._files_from_status(status),
+                        self._files_from_status(status["retweeted_status"]),
+                    )
+            else:
+                files = self._files_from_status(status)
 
             for num, file in enumerate(files, 1):
                 if num == 1:
@@ -143,6 +148,11 @@ class WeiboStatusExtractor(WeiboExtractor):
         }),
         # non-numeric status ID (#664)
         ("https://weibo.com/3314883543/Iy7fj4qVg"),
+        # original retweets (#1542)
+        ("https://m.weibo.cn/detail/4600272267522211", {
+            "options": (("retweets", "original"),),
+            "keyword": {"status": {"id": "4600167083287033"}},
+        }),
         ("https://m.weibo.cn/status/4339748116375525"),
         ("https://m.weibo.cn/5746766133/4339748116375525"),
     )
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 7fd60b1..511a609 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -71,8 +71,8 @@ class WikiartArtistExtractor(WikiartExtractor):
     directory_fmt = ("{category}", "{artist[artistName]}")
     pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
     test = ("https://www.wikiart.org/en/thomas-cole", {
-        "url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98",
-        "keyword": "eb5b141cf33e6d279afd1518aae24e61cc0adf81",
+        "url": "5140343730331786117fa5f4c013a6153393e28e",
+        "keyword": "4d9cbc50ebddfcb186f31ff70b08833578dd0070",
     })
 
     def __init__(self, match):
@@ -97,8 +97,8 @@ class WikiartImageExtractor(WikiartArtistExtractor):
     pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
     test = (
         ("https://www.wikiart.org/en/thomas-cole/the-departure-1838", {
-            "url": "4d9fd87680a2620eaeaf1f13e3273475dec93231",
-            "keyword": "a1b083d500ce2fd364128e35b026e4ca526000cc",
+            "url": "976cc2545f308a650b5dbb35c29d3cee0f4673b3",
+            "keyword": "8e80cdcb01c1fedb934633d1c4c3ab0419cfbedf",
         }),
         # no year or '-' in slug
         ("https://www.wikiart.org/en/huang-shen/summer", {
author	Unit 193 <unit193@unit193.net>	2021-06-05 20:55:36 -0400
committer	Unit 193 <unit193@unit193.net>	2021-06-05 20:55:36 -0400
commit	8a644b7a06c504263a478d3681eed10b4161b5be (patch)
tree	b3d668588e5c0be8c75467e50499f73ff9ec7c05 /gallery_dl/extractor
parent	e7eb1f9779f2e223575ab23a6bc1abf2222e7d27 (diff)