From 30dee4697019389ef29458b2e3931adc976389b2 Mon Sep 17 00:00:00 2001
From: Unit 193 <unit193@unit193.net>
Date: Mon, 11 Dec 2023 01:12:30 -0500
Subject: New upstream version 1.26.4.

---
 gallery_dl/extractor/__init__.py     |  1 +
 gallery_dl/extractor/common.py       | 14 ++++++-
 gallery_dl/extractor/danbooru.py     |  2 +-
 gallery_dl/extractor/deviantart.py   |  2 +-
 gallery_dl/extractor/exhentai.py     | 73 +++++++++++++++++++++++++-----------
 gallery_dl/extractor/nijie.py        | 34 ++++++++---------
 gallery_dl/extractor/oauth.py        |  2 +-
 gallery_dl/extractor/patreon.py      |  7 ++++
 gallery_dl/extractor/pixeldrain.py   |  4 +-
 gallery_dl/extractor/reddit.py       |  3 +-
 gallery_dl/extractor/twitter.py      | 37 ++++++++++++++----
 gallery_dl/extractor/urlgalleries.py | 55 +++++++++++++++++++++++++++
 12 files changed, 179 insertions(+), 55 deletions(-)
 create mode 100644 gallery_dl/extractor/urlgalleries.py

(limited to 'gallery_dl/extractor')

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 72239d5..d074de2 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -155,6 +155,7 @@ modules = [
     "tumblrgallery",
     "twibooru",
     "twitter",
+    "urlgalleries",
     "unsplash",
     "uploadir",
     "urlshortener",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index f378427..9b010c5 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -136,6 +136,18 @@ class Extractor():
             kwargs["timeout"] = self._timeout
         if "verify" not in kwargs:
             kwargs["verify"] = self._verify
+
+        if "json" in kwargs:
+            json = kwargs["json"]
+            if json is not None:
+                kwargs["data"] = util.json_dumps(json).encode()
+                del kwargs["json"]
+                headers = kwargs.get("headers")
+                if headers:
+                    headers["Content-Type"] = "application/json"
+                else:
+                    kwargs["headers"] = {"Content-Type": "application/json"}
+
         response = None
         tries = 1
 
@@ -233,7 +245,7 @@ class Extractor():
         password = None
 
         if username:
-            password = self.config("password")
+            password = self.config("password") or util.LazyPrompt()
         elif self.config("netrc", False):
             try:
                 info = netrc.netrc().authenticators(self.category)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 56d81e5..9e6516e 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -36,7 +36,7 @@ class DanbooruExtractor(BaseExtractor):
         username, api_key = self._get_auth_info()
         if username:
             self.log.debug("Using HTTP Basic Auth for user '%s'", username)
-            self.session.auth = (username, api_key)
+            self.session.auth = util.HTTPBasicAuth(username, api_key)
 
     def skip(self, num):
         pages = num // self.per_page
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 2c37ef1..1852dc1 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1239,7 +1239,7 @@ class DeviantartOAuthAPI():
             self.log.info("Requesting public access token")
             data = {"grant_type": "client_credentials"}
 
-        auth = (self.client_id, self.client_secret)
+        auth = util.HTTPBasicAuth(self.client_id, self.client_secret)
         response = self.extractor.request(
             url, method="POST", data=data, auth=auth, fatal=False)
         data = response.json()
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 5dc498f..a479d00 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -47,14 +47,6 @@ class ExhentaiExtractor(Extractor):
 
         if self.version != "ex":
             self.cookies.set("nw", "1", domain=self.cookies_domain)
-        self.original = self.config("original", True)
-
-        limits = self.config("limits", False)
-        if limits and limits.__class__ is int:
-            self.limits = limits
-            self._remaining = 0
-        else:
-            self.limits = False
 
     def request(self, url, **kwargs):
         response = Extractor.request(self, url, **kwargs)
@@ -85,6 +77,7 @@ class ExhentaiExtractor(Extractor):
     @cache(maxage=90*24*3600, keyarg=1)
     def _login_impl(self, username, password):
         self.log.info("Logging in as %s", username)
+
         url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
         headers = {
             "Referer": "https://e-hentai.org/bounce_login.php?b=d&bt=1-1",
@@ -98,10 +91,19 @@ class ExhentaiExtractor(Extractor):
             "ipb_login_submit": "Login!",
         }
 
+        self.cookies.clear()
+
         response = self.request(url, method="POST", headers=headers, data=data)
         if b"You are now logged in as:" not in response.content:
             raise exception.AuthenticationError()
-        return {c: response.cookies[c] for c in self.cookies_names}
+
+        # collect more cookies
+        url = self.root + "/favorites.php"
+        response = self.request(url)
+        if response.history:
+            self.request(url)
+
+        return self.cookies
 
 
 class ExhentaiGalleryExtractor(ExhentaiExtractor):
@@ -128,6 +130,19 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
         if source == "hitomi":
             self.items = self._items_hitomi
 
+        limits = self.config("limits", False)
+        if limits and limits.__class__ is int:
+            self.limits = limits
+            self._remaining = 0
+        else:
+            self.limits = False
+
+        self.fallback_retries = self.config("fallback-retries", 2)
+        if self.fallback_retries < 0:
+            self.fallback_retries = float("inf")
+
+        self.original = self.config("original", True)
+
     def favorite(self, slot="0"):
         url = self.root + "/gallerypopups.php"
         params = {
@@ -301,12 +316,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             if self.original and orig:
                 url = self.root + "/fullimg" + text.unescape(orig)
                 data = self._parse_original_info(extr('ownload original', '<'))
-                data["_fallback"] = ("{}?nl={}".format(url, nl),)
+                data["_fallback"] = self._fallback_original(nl, url)
             else:
                 url = iurl
                 data = self._parse_image_info(url)
-                data["_fallback"] = self._fallback(
-                    None, self.image_num, nl)
+                data["_fallback"] = self._fallback_1280(nl, self.image_num)
         except IndexError:
             self.log.debug("Page content:\n%s", page)
             raise exception.StopExtraction(
@@ -315,6 +329,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
         data["num"] = self.image_num
         data["image_token"] = self.key_start = extr('var startkey="', '";')
         data["_url_1280"] = iurl
+        data["_nl"] = nl
         self.key_show = extr('var showkey="', '";')
 
         self._check_509(iurl, data)
@@ -351,12 +366,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
                     url = text.unescape(origurl)
                     data = self._parse_original_info(text.extract(
                         i6, "ownload original", "<", pos)[0])
-                    data["_fallback"] = ("{}?nl={}".format(url, nl),)
+                    data["_fallback"] = self._fallback_original(nl, url)
                 else:
                     url = imgurl
                     data = self._parse_image_info(url)
-                    data["_fallback"] = self._fallback(
-                        imgkey, request["page"], nl)
+                    data["_fallback"] = self._fallback_1280(
+                        nl, request["page"], imgkey)
             except IndexError:
                 self.log.debug("Page content:\n%s", page)
                 raise exception.StopExtraction(
@@ -365,6 +380,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             data["num"] = request["page"]
             data["image_token"] = imgkey
             data["_url_1280"] = imgurl
+            data["_nl"] = nl
 
             self._check_509(imgurl, data)
             yield url, text.nameext_from_url(url, data)
@@ -431,13 +447,26 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             raise exception.NotFoundError("image page")
         return page
 
-    def _fallback(self, imgkey, num, nl):
-        url = "{}/s/{}/{}-{}?nl={}".format(
-            self.root, imgkey or self.key_start, self.gallery_id, num, nl)
-        page = self.request(url, fatal=False).text
-        if page.startswith(("Invalid page", "Keep trying")):
-            return
-        yield self.image_from_page(page)[0]
+    def _fallback_original(self, nl, fullimg):
+        url = "{}?nl={}".format(fullimg, nl)
+        for _ in range(self.fallback_retries):
+            yield url
+
+    def _fallback_1280(self, nl, num, token=None):
+        if not token:
+            token = self.key_start
+
+        for _ in range(self.fallback_retries):
+            url = "{}/s/{}/{}-{}?nl={}".format(
+                self.root, token, self.gallery_id, num, nl)
+
+            page = self.request(url, fatal=False).text
+            if page.startswith(("Invalid page", "Keep trying")):
+                return
+            url, data = self.image_from_page(page)
+            yield url
+
+            nl = data["_nl"]
 
     @staticmethod
     def _parse_image_info(url):
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 76c5404..54f2942 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -57,7 +57,11 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
                 data["user_name"] = data["artist_name"]
             yield Message.Directory, data
 
-            for image in self._extract_images(page):
+            for num, url in enumerate(self._extract_images(image_id, page)):
+                image = text.nameext_from_url(url, {
+                    "num": num,
+                    "url": "https:" + url,
+                })
                 image.update(data)
                 if not image["extension"]:
                     image["extension"] = "jpg"
@@ -72,7 +76,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
         extr = text.extract_from(page)
         keywords = text.unescape(extr(
             'name="keywords" content="', '" />')).split(",")
-        data = {
+        return {
             "title"      : keywords[0].strip(),
             "description": text.unescape(extr(
                 '"description": "', '"').replace("&amp;", "&")),
@@ -82,7 +86,6 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
             "artist_name": keywords[1],
             "tags"       : keywords[2:-1],
         }
-        return data
 
     @staticmethod
     def _extract_data_horne(page):
@@ -90,7 +93,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
         extr = text.extract_from(page)
         keywords = text.unescape(extr(
             'name="keywords" content="', '" />')).split(",")
-        data = {
+        return {
             "title"      : keywords[0].strip(),
             "description": text.unescape(extr(
                 'property="og:description" content="', '"')),
@@ -101,21 +104,16 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
                 "itemprop='datePublished' content=", "<").rpartition(">")[2],
                 "%Y-%m-%d %H:%M:%S", 9),
         }
-        return data
 
-    @staticmethod
-    def _extract_images(page):
-        """Extract image URLs from 'page'"""
-        images = text.extract_iter(page, "/view_popup.php", "</a>")
-        for num, image in enumerate(images):
-            src = text.extr(image, 'src="', '"')
-            if not src:
-                continue
-            url = ("https:" + src).replace("/__rs_l120x120/", "/")
-            yield text.nameext_from_url(url, {
-                "num": num,
-                "url": url,
-            })
+    def _extract_images(self, image_id, page):
+        if '&#diff_1" ' in page:
+            # multiple images
+            url = "{}/view_popup.php?id={}".format(self.root, image_id)
+            page = self.request(url).text
+            yield from text.extract_iter(
+                page, 'href="javascript:void(0);"><img src="', '"')
+        else:
+            yield text.extr(page, 'itemprop="image" src="', '"')
 
     @staticmethod
     def _extract_user_name(page):
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index d1f135d..65db94d 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -183,7 +183,7 @@ class OAuthBase(Extractor):
         }
 
         if auth:
-            auth = (client_id, client_secret)
+            auth = util.HTTPBasicAuth(client_id, client_secret)
         else:
             auth = None
             data["client_id"] = client_id
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 6aef9cb..fb560e9 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -254,6 +254,13 @@ class PatreonExtractor(Extractor):
         if bootstrap:
             return util.json_loads(bootstrap + "}")
 
+        bootstrap = text.extr(
+            page,
+            'window.patreon = wrapInProxy({"bootstrap":',
+            '},"apiServer"')
+        if bootstrap:
+            return util.json_loads(bootstrap + "}")
+
         bootstrap = text.extr(page, "window.patreon.bootstrap,", "});")
         if bootstrap:
             return util.json_loads(bootstrap + "}")
diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py
index 34b4ebf..5cfdc43 100644
--- a/gallery_dl/extractor/pixeldrain.py
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -9,7 +9,7 @@
 """Extractors for https://pixeldrain.com/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, util
 
 BASE_PATTERN = r"(?:https?://)?pixeldrain\.com"
 
@@ -23,7 +23,7 @@ class PixeldrainExtractor(Extractor):
     def _init(self):
         api_key = self.config("api-key")
         if api_key:
-            self.session.auth = ("", api_key)
+            self.session.auth = util.HTTPBasicAuth("", api_key)
 
     def parse_datetime(self, date_string):
         return text.parse_datetime(
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index c0bf5b3..feb6d1f 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -423,9 +423,10 @@ class RedditAPI():
                                    "grants/installed_client"),
                     "device_id": "DO_NOT_TRACK_THIS_DEVICE"}
 
+        auth = util.HTTPBasicAuth(self.client_id, "")
         response = self.extractor.request(
             url, method="POST", headers=self.headers,
-            data=data, auth=(self.client_id, ""), fatal=False)
+            data=data, auth=auth, fatal=False)
         data = response.json()
 
         if response.status_code != 200:
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ca1e906..f874f12 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1276,8 +1276,21 @@ class TwitterAPI():
                 self.headers["x-csrf-token"] = csrf_token
 
             if response.status_code < 400:
-                # success
-                return response.json()
+                data = response.json()
+                if not data.get("errors") or not any(
+                        (e.get("message") or "").lower().startswith("timeout")
+                        for e in data["errors"]):
+                    return data  # success or non-timeout errors
+
+                msg = data["errors"][0].get("message") or "Unspecified"
+                self.extractor.log.debug("Internal Twitter error: '%s'", msg)
+
+                if self.headers["x-twitter-auth-type"]:
+                    self.extractor.log.debug("Retrying API request")
+                    continue  # retry
+
+                # fall through to "Login Required"
+                response.status_code = 404
 
             if response.status_code == 429:
                 # rate limit exceeded
@@ -1289,11 +1302,9 @@ class TwitterAPI():
                 self.extractor.wait(until=until, seconds=seconds)
                 continue
 
-            if response.status_code == 403 and \
-                    not self.headers["x-twitter-auth-type"] and \
-                    endpoint == "/2/search/adaptive.json":
-                raise exception.AuthorizationError(
-                    "Login required to access search results")
+            if response.status_code in (403, 404) and \
+                    not self.headers["x-twitter-auth-type"]:
+                raise exception.AuthorizationError("Login required")
 
             # error
             try:
@@ -1431,7 +1442,12 @@ class TwitterAPI():
                 for instr in instructions:
                     instr_type = instr.get("type")
                     if instr_type == "TimelineAddEntries":
-                        entries = instr["entries"]
+                        if entries:
+                            entries.extend(instr["entries"])
+                        else:
+                            entries = instr["entries"]
+                    elif instr_type == "TimelineAddToModule":
+                        entries = instr["moduleItems"]
                     elif instr_type == "TimelineReplaceEntry":
                         entry = instr["entry"]
                         if entry["entryId"].startswith("cursor-bottom-"):
@@ -1479,6 +1495,11 @@ class TwitterAPI():
 
                 if esw("tweet-"):
                     tweets.append(entry)
+                elif esw("profile-grid-"):
+                    if "content" in entry:
+                        tweets.extend(entry["content"]["items"])
+                    else:
+                        tweets.append(entry)
                 elif esw(("homeConversation-",
                           "profile-conversation-",
                           "conversationthread-")):
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
new file mode 100644
index 0000000..b21709a
--- /dev/null
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://urlgalleries.net/"""
+
+from .common import GalleryExtractor, Message
+from .. import text
+
+
+class UrlgalleriesGalleryExtractor(GalleryExtractor):
+    """Base class for Urlgalleries extractors"""
+    category = "urlgalleries"
+    root = "urlgalleries.net"
+    request_interval = (0.5, 1.0)
+    pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
+    example = "https://blog.urlgalleries.net/gallery-12345/TITLE"
+
+    def __init__(self, match):
+        self.blog, self.gallery_id = match.groups()
+        url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format(
+            self.blog, self.gallery_id)
+        GalleryExtractor.__init__(self, match, url)
+
+    def items(self):
+        page = self.request(self.gallery_url).text
+        imgs = self.images(page)
+        data = self.metadata(page)
+        data["count"] = len(imgs)
+        del page
+
+        root = "https://{}.urlgalleries.net".format(self.blog)
+        yield Message.Directory, data
+        for data["num"], img in enumerate(imgs, 1):
+            response = self.request(
+                root + img, method="HEAD", allow_redirects=False)
+            yield Message.Queue, response.headers["Location"], data
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        return {
+            "gallery_id": self.gallery_id,
+            "_site": extr(' title="', '"'),  # site name
+            "blog" : text.unescape(extr(' title="', '"')),
+            "_rprt": extr(' title="', '"'),  # report button
+            "title": text.unescape(extr(' title="', '"').strip()),
+            "date" : text.parse_datetime(
+                extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"),
+        }
+
+    def images(self, page):
+        imgs = text.extr(page, 'id="wtf"', "</div>")
+        return list(text.extract_iter(imgs, " href='", "'"))
-- 
cgit v1.2.3