From 30dee4697019389ef29458b2e3931adc976389b2 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 11 Dec 2023 01:12:30 -0500 Subject: New upstream version 1.26.4. --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/common.py | 14 ++++++- gallery_dl/extractor/danbooru.py | 2 +- gallery_dl/extractor/deviantart.py | 2 +- gallery_dl/extractor/exhentai.py | 73 +++++++++++++++++++++++++----------- gallery_dl/extractor/nijie.py | 34 ++++++++--------- gallery_dl/extractor/oauth.py | 2 +- gallery_dl/extractor/patreon.py | 7 ++++ gallery_dl/extractor/pixeldrain.py | 4 +- gallery_dl/extractor/reddit.py | 3 +- gallery_dl/extractor/twitter.py | 37 ++++++++++++++---- gallery_dl/extractor/urlgalleries.py | 55 +++++++++++++++++++++++++++ 12 files changed, 179 insertions(+), 55 deletions(-) create mode 100644 gallery_dl/extractor/urlgalleries.py (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 72239d5..d074de2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -155,6 +155,7 @@ modules = [ "tumblrgallery", "twibooru", "twitter", + "urlgalleries", "unsplash", "uploadir", "urlshortener", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index f378427..9b010c5 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -136,6 +136,18 @@ class Extractor(): kwargs["timeout"] = self._timeout if "verify" not in kwargs: kwargs["verify"] = self._verify + + if "json" in kwargs: + json = kwargs["json"] + if json is not None: + kwargs["data"] = util.json_dumps(json).encode() + del kwargs["json"] + headers = kwargs.get("headers") + if headers: + headers["Content-Type"] = "application/json" + else: + kwargs["headers"] = {"Content-Type": "application/json"} + response = None tries = 1 @@ -233,7 +245,7 @@ class Extractor(): password = None if username: - password = self.config("password") + password = self.config("password") or util.LazyPrompt() elif self.config("netrc", False): try: info = netrc.netrc().authenticators(self.category) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 56d81e5..9e6516e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -36,7 +36,7 @@ class DanbooruExtractor(BaseExtractor): username, api_key = self._get_auth_info() if username: self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) + self.session.auth = util.HTTPBasicAuth(username, api_key) def skip(self, num): pages = num // self.per_page diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 2c37ef1..1852dc1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1239,7 +1239,7 @@ class DeviantartOAuthAPI(): self.log.info("Requesting public access token") data = {"grant_type": "client_credentials"} - auth = (self.client_id, self.client_secret) + auth = util.HTTPBasicAuth(self.client_id, self.client_secret) response = self.extractor.request( url, method="POST", data=data, auth=auth, fatal=False) data = response.json() diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 5dc498f..a479d00 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -47,14 +47,6 @@ class ExhentaiExtractor(Extractor): if self.version != "ex": self.cookies.set("nw", "1", domain=self.cookies_domain) - self.original = self.config("original", True) - - limits = self.config("limits", False) - if limits and limits.__class__ is int: - self.limits = limits - self._remaining = 0 - else: - self.limits = False def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) @@ -85,6 +77,7 @@ class ExhentaiExtractor(Extractor): @cache(maxage=90*24*3600, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) + url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01" headers = { "Referer": "https://e-hentai.org/bounce_login.php?b=d&bt=1-1", @@ -98,10 +91,19 @@ class ExhentaiExtractor(Extractor): "ipb_login_submit": "Login!", } + self.cookies.clear() + response = self.request(url, method="POST", headers=headers, data=data) if b"You are now logged in as:" not in response.content: raise exception.AuthenticationError() - return {c: response.cookies[c] for c in self.cookies_names} + + # collect more cookies + url = self.root + "/favorites.php" + response = self.request(url) + if response.history: + self.request(url) + + return self.cookies class ExhentaiGalleryExtractor(ExhentaiExtractor): @@ -128,6 +130,19 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if source == "hitomi": self.items = self._items_hitomi + limits = self.config("limits", False) + if limits and limits.__class__ is int: + self.limits = limits + self._remaining = 0 + else: + self.limits = False + + self.fallback_retries = self.config("fallback-retries", 2) + if self.fallback_retries < 0: + self.fallback_retries = float("inf") + + self.original = self.config("original", True) + def favorite(self, slot="0"): url = self.root + "/gallerypopups.php" params = { @@ -301,12 +316,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if self.original and orig: url = self.root + "/fullimg" + text.unescape(orig) data = self._parse_original_info(extr('ownload original', '<')) - data["_fallback"] = ("{}?nl={}".format(url, nl),) + data["_fallback"] = self._fallback_original(nl, url) else: url = iurl data = self._parse_image_info(url) - data["_fallback"] = self._fallback( - None, self.image_num, nl) + data["_fallback"] = self._fallback_1280(nl, self.image_num) except IndexError: self.log.debug("Page content:\n%s", page) raise exception.StopExtraction( @@ -315,6 +329,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["num"] = self.image_num data["image_token"] = self.key_start = extr('var startkey="', '";') data["_url_1280"] = iurl + data["_nl"] = nl self.key_show = extr('var showkey="', '";') self._check_509(iurl, data) @@ -351,12 +366,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): url = text.unescape(origurl) data = self._parse_original_info(text.extract( i6, "ownload original", "<", pos)[0]) - data["_fallback"] = ("{}?nl={}".format(url, nl),) + data["_fallback"] = self._fallback_original(nl, url) else: url = imgurl data = self._parse_image_info(url) - data["_fallback"] = self._fallback( - imgkey, request["page"], nl) + data["_fallback"] = self._fallback_1280( + nl, request["page"], imgkey) except IndexError: self.log.debug("Page content:\n%s", page) raise exception.StopExtraction( @@ -365,6 +380,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["num"] = request["page"] data["image_token"] = imgkey data["_url_1280"] = imgurl + data["_nl"] = nl self._check_509(imgurl, data) yield url, text.nameext_from_url(url, data) @@ -431,13 +447,26 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page - def _fallback(self, imgkey, num, nl): - url = "{}/s/{}/{}-{}?nl={}".format( - self.root, imgkey or self.key_start, self.gallery_id, num, nl) - page = self.request(url, fatal=False).text - if page.startswith(("Invalid page", "Keep trying")): - return - yield self.image_from_page(page)[0] + def _fallback_original(self, nl, fullimg): + url = "{}?nl={}".format(fullimg, nl) + for _ in range(self.fallback_retries): + yield url + + def _fallback_1280(self, nl, num, token=None): + if not token: + token = self.key_start + + for _ in range(self.fallback_retries): + url = "{}/s/{}/{}-{}?nl={}".format( + self.root, token, self.gallery_id, num, nl) + + page = self.request(url, fatal=False).text + if page.startswith(("Invalid page", "Keep trying")): + return + url, data = self.image_from_page(page) + yield url + + nl = data["_nl"] @staticmethod def _parse_image_info(url): diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 76c5404..54f2942 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -57,7 +57,11 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): data["user_name"] = data["artist_name"] yield Message.Directory, data - for image in self._extract_images(page): + for num, url in enumerate(self._extract_images(image_id, page)): + image = text.nameext_from_url(url, { + "num": num, + "url": "https:" + url, + }) image.update(data) if not image["extension"]: image["extension"] = "jpg" @@ -72,7 +76,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): extr = text.extract_from(page) keywords = text.unescape(extr( 'name="keywords" content="', '" />')).split(",") - data = { + return { "title" : keywords[0].strip(), "description": text.unescape(extr( '"description": "', '"').replace("&", "&")), @@ -82,7 +86,6 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): "artist_name": keywords[1], "tags" : keywords[2:-1], } - return data @staticmethod def _extract_data_horne(page): @@ -90,7 +93,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): extr = text.extract_from(page) keywords = text.unescape(extr( 'name="keywords" content="', '" />')).split(",") - data = { + return { "title" : keywords[0].strip(), "description": text.unescape(extr( 'property="og:description" content="', '"')), @@ -101,21 +104,16 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): "itemprop='datePublished' content=", "<").rpartition(">")[2], "%Y-%m-%d %H:%M:%S", 9), } - return data - @staticmethod - def _extract_images(page): - """Extract image URLs from 'page'""" - images = text.extract_iter(page, "/view_popup.php", "") - for num, image in enumerate(images): - src = text.extr(image, 'src="', '"') - if not src: - continue - url = ("https:" + src).replace("/__rs_l120x120/", "/") - yield text.nameext_from_url(url, { - "num": num, - "url": url, - }) + def _extract_images(self, image_id, page): + if '&#diff_1" ' in page: + # multiple images + url = "{}/view_popup.php?id={}".format(self.root, image_id) + page = self.request(url).text + yield from text.extract_iter( + page, 'href="javascript:void(0);">") + return list(text.extract_iter(imgs, " href='", "'")) -- cgit v1.2.3