diff options
| author | 2024-06-22 21:19:36 -0400 | |
|---|---|---|
| committer | 2024-06-22 21:19:36 -0400 | |
| commit | 80e39a8fc7de105510cbbdca8507f2a4b8c9e01d (patch) | |
| tree | bb5caa2f5fafc5116b8f89e659085ffbd8a918f2 /gallery_dl | |
| parent | 1c28712d865e30ed752988ba0b6944882250b665 (diff) | |
New upstream version 1.27.1.upstream/1.27.1
Diffstat (limited to 'gallery_dl')
34 files changed, 603 insertions, 452 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index bc44b35..1d4215e 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -220,8 +220,13 @@ def main(): cnt, "entry" if cnt == 1 else "entries", cache._path(), ) - elif args.config_init: - return config.initialize() + elif args.config: + if args.config == "init": + return config.initialize() + elif args.config == "status": + return config.status() + else: + return config.open_extern() else: if not args.urls and not args.input_files: diff --git a/gallery_dl/config.py b/gallery_dl/config.py index 4be6c53..0a187c1 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -90,13 +90,85 @@ def initialize(): return 0 +def open_extern(): + for path in _default_configs: + path = util.expand_path(path) + if os.access(path, os.R_OK | os.W_OK): + break + else: + log.warning("Unable to find any writable configuration file") + return 1 + + if util.WINDOWS: + openers = ("explorer", "notepad") + else: + openers = ("xdg-open", "open") + editor = os.environ.get("EDITOR") + if editor: + openers = (editor,) + openers + + import shutil + for opener in openers: + opener = shutil.which(opener) + if opener: + break + else: + log.warning("Unable to find a program to open '%s' with", path) + return 1 + + log.info("Running '%s %s'", opener, path) + retcode = util.Popen((opener, path)).wait() + + if not retcode: + try: + with open(path, encoding="utf-8") as fp: + util.json_loads(fp.read()) + except Exception as exc: + log.warning("%s when parsing '%s': %s", + exc.__class__.__name__, path, exc) + return 2 + + return retcode + + +def status(): + from .output import stdout_write + + paths = [] + for path in _default_configs: + path = util.expand_path(path) + + try: + with open(path, encoding="utf-8") as fp: + util.json_loads(fp.read()) + except FileNotFoundError: + status = "Not Present" + except OSError: + status = "Inaccessible" + except ValueError: + status = "Invalid JSON" + except Exception as exc: + log.debug(exc) + status = "Unknown" + else: + status = "OK" + + paths.append((path, status)) + + fmt = "{{:<{}}} : {{}}\n".format( + max(len(p[0]) for p in paths)).format + + for path, status in paths: + stdout_write(fmt(path, status)) + + def load(files=None, strict=False, loads=util.json_loads): """Load JSON configuration files""" for pathfmt in files or _default_configs: path = util.expand_path(pathfmt) try: - with open(path, encoding="utf-8") as file: - conf = loads(file.read()) + with open(path, encoding="utf-8") as fp: + conf = loads(fp.read()) except OSError as exc: if strict: log.error(exc) diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index b4986c1..47f78a7 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -188,8 +188,8 @@ def _firefox_cookies_database(profile=None, container=None): os.path.dirname(path), "containers.json") try: - with open(containers_path) as file: - identities = util.json_loads(file.read())["identities"] + with open(containers_path) as fp: + identities = util.json_loads(fp.read())["identities"] except OSError: _log_error("Unable to read Firefox container database at '%s'", containers_path) @@ -745,8 +745,8 @@ def _get_windows_v10_key(browser_root): _log_error("Unable to find Local State file") return None _log_debug("Found Local State file at '%s'", path) - with open(path, encoding="utf-8") as file: - data = util.json_loads(file.read()) + with open(path, encoding="utf-8") as fp: + data = util.json_loads(fp.read()) try: base64_key = data["os_crypt"]["encrypted_key"] except KeyError: diff --git a/gallery_dl/downloader/text.py b/gallery_dl/downloader/text.py index c57fbd0..a668d62 100644 --- a/gallery_dl/downloader/text.py +++ b/gallery_dl/downloader/text.py @@ -18,8 +18,8 @@ class TextDownloader(DownloaderBase): if self.part: pathfmt.part_enable(self.partdir) self.out.start(pathfmt.path) - with pathfmt.open("wb") as file: - file.write(url.encode()[5:]) + with pathfmt.open("wb") as fp: + fp.write(url.encode()[5:]) return True diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index adada75..87e7756 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -89,6 +89,11 @@ class YoutubeDLDownloader(DownloaderBase): formats = info_dict.get("requested_formats") if formats and not compatible_formats(formats): info_dict["ext"] = "mkv" + elif "ext" not in info_dict: + try: + info_dict["ext"] = info_dict["formats"][0]["ext"] + except LookupError: + info_dict["ext"] = "mp4" if self.outtmpl: self._set_outtmpl(ytdl_instance, self.outtmpl) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 591e6a8..6aff1f3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -62,6 +62,7 @@ modules = [ "hentaifox", "hentaihand", "hentaihere", + "hentainexus", "hiperdex", "hitomi", "hotleak", @@ -113,7 +114,6 @@ modules = [ "paheal", "patreon", "philomena", - "photobucket", "photovogue", "picarto", "piczel", diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py deleted file mode 100644 index a9ccab5..0000000 --- a/gallery_dl/extractor/cien.py +++ /dev/null @@ -1,86 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2024 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://ci-en.net/""" - -from .common import Extractor, Message -from .. import text, util - -BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)" - - -class CienExtractor(Extractor): - category = "cien" - root = "https://ci-en.net" - - def __init__(self, match): - self.root = text.root_from_url(match.group(0)) - Extractor.__init__(self, match) - - def _pagination_articles(self, url, params): - data = {"extractor": CienArticleExtractor} - params["page"] = text.parse_int(params.get("page"), 1) - - while True: - page = self.request(url, params=params).text - - for card in text.extract_iter( - page, ' class="c-cardCase-item', '</div>'): - article_url = text.extr(card, ' href="', '"') - yield Message.Queue, article_url, data - - if ' rel="next"' not in page: - return - params["page"] += 1 - - -class CienArticleExtractor(CienExtractor): - subcategory = "article" - pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)" - example = "https://ci-en.net/creator/123/article/12345" - - def items(self): - url = "{}/creator/{}/article/{}".format( - self.root, self.groups[0], self.groups[1]) - page = self.request(url, notfound="article").text - return - yield 1 - - -class CienCreatorExtractor(CienExtractor): - subcategory = "creator" - pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$" - example = "https://ci-en.net/creator/123" - - def items(self): - url = "{}/creator/{}/article".format(self.root, self.groups[0]) - params = text.parse_query(self.groups[1]) - params["mode"] = "list" - return self._pagination_articles(url, params) - - -class CienRecentExtractor(CienExtractor): - subcategory = "recent" - pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?" - example = "https://ci-en.net/mypage/recent" - - def items(self): - url = self.root + "/mypage/recent" - params = text.parse_query(self.groups[0]) - return self._pagination_articles(url, params) - - -class CienFollowingExtractor(CienExtractor): - subcategory = "following" - pattern = BASE_PATTERN + r"/mypage/subscription(/following)?" - example = "https://ci-en.net/mypage/subscription" - - def items(self): - url = self.root + "/mypage/recent" - params = text.parse_query(self.groups[0]) - return self._pagination_articles(url, params) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 8771261..d7a41bc 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -11,6 +11,7 @@ import os import re import ssl +import sys import time import netrc import queue @@ -42,6 +43,7 @@ class Extractor(): browser = None request_interval = 0.0 request_interval_min = 0.0 + request_interval_429 = 60.0 request_timestamp = 0.0 def __init__(self, match): @@ -202,7 +204,9 @@ class Extractor(): self.log.warning("Cloudflare CAPTCHA") break - if code == 429 and self._interval_429: + if code == 429 and self._handle_429(response): + continue + elif code == 429 and self._interval_429: pass elif code not in retry_codes and code < 500: break @@ -230,6 +234,8 @@ class Extractor(): raise exception.HttpError(msg, response) + _handle_429 = util.false + def wait(self, seconds=None, until=None, adjust=1.0, reason="rate limit"): now = time.time() @@ -263,6 +269,8 @@ class Extractor(): time.sleep(seconds) def input(self, prompt, echo=True): + self._check_input_allowed(prompt) + if echo: try: return input(prompt) @@ -271,13 +279,30 @@ class Extractor(): else: return getpass.getpass(prompt) + def _check_input_allowed(self, prompt=""): + input = self.config("input") + + if input is None: + try: + input = sys.stdin.isatty() + except Exception: + input = False + + if not input: + raise exception.StopExtraction( + "User input required (%s)", prompt.strip(" :")) + def _get_auth_info(self): """Return authentication information as (username, password) tuple""" username = self.config("username") password = None if username: - password = self.config("password") or util.LazyPrompt() + password = self.config("password") + if not password: + self._check_input_allowed("password") + password = util.LazyPrompt() + elif self.config("netrc", False): try: info = netrc.netrc().authenticators(self.category) @@ -304,7 +329,7 @@ class Extractor(): self.request_interval_min, ) self._interval_429 = util.build_duration_func( - self.config("sleep-429", 60), + self.config("sleep-429", self.request_interval_429), ) if self._retries < 0: @@ -837,7 +862,7 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address): if ssl_options or ssl_ciphers: ssl_context = urllib3.connection.create_urllib3_context( options=ssl_options or None, ciphers=ssl_ciphers) - if requests.__version__ > "2.31": + if not requests.__version__ < "2.32": # https://github.com/psf/requests/pull/6731 ssl_context.load_default_certs() ssl_context.check_hostname = False diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 993885a..2199cc8 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1730,15 +1730,16 @@ class DeviantartEclipseAPI(): url = "{}/{}/about".format(self.extractor.root, user) page = self.request(url).text - gruserid, pos = text.extract(page, ' data-userid="', '"') + gruser_id = text.extr(page, ' data-userid="', '"') - pos = page.find('\\"type\\":\\"watching\\"', pos) + pos = page.find('\\"name\\":\\"watching\\"') if pos < 0: - raise exception.NotFoundError("module") - moduleid = text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') + raise exception.NotFoundError("'watching' module ID") + module_id = text.rextract( + page, '\\"id\\":', ',', pos)[0].strip('" ') self._fetch_csrf_token(page) - return gruserid, moduleid + return gruser_id, module_id def _fetch_csrf_token(self, page=None): if page is None: diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 2223403..d81fd0b 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -113,7 +113,17 @@ class FanboxExtractor(Extractor): post["user"] = self._get_user_data(post["creatorId"]) if self._meta_plan: plans = self._get_plan_data(post["creatorId"]) - post["plan"] = plans[post["feeRequired"]] + fee = post["feeRequired"] + try: + post["plan"] = plans[fee] + except KeyError: + fees = [f for f in plans if f >= fee] + if fees: + plan = plans[min(fees)] + else: + plan = plans[0].copy() + plan["fee"] = fee + post["plan"] = plans[fee] = plan return content_body, post diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py new file mode 100644 index 0000000..97b7844 --- /dev/null +++ b/gallery_dl/extractor/hentainexus.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019-2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://hentainexus.com/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, util +import binascii + + +class HentainexusGalleryExtractor(GalleryExtractor): + """Extractor for hentainexus galleries""" + category = "hentainexus" + root = "https://hentainexus.com" + pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" + r"/(?:view|read)/(\d+)") + example = "https://hentainexus.com/view/12345" + + def __init__(self, match): + self.gallery_id = match.group(1) + url = "{}/view/{}".format(self.root, self.gallery_id) + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + rmve = text.remove_html + extr = text.extract_from(page) + data = { + "gallery_id": text.parse_int(self.gallery_id), + "cover" : extr('"og:image" content="', '"'), + "title" : extr('<h1 class="title">', '</h1>'), + } + + for key in ("Artist", "Book", "Circle", "Event", "Language", + "Magazine", "Parody", "Publisher", "Description"): + value = rmve(extr('viewcolumn">' + key + '</td>', '</td>')) + value, sep, rest = value.rpartition(" (") + data[key.lower()] = value if sep else rest + + data["tags"] = tags = [] + for k in text.extract_iter(page, '<a href="/?q=tag:', '"'): + tags.append(text.unquote(k).strip('"').replace("+", " ")) + + if not data["language"]: + data["language"] = "English" + data["lang"] = util.language_to_code(data["language"]) + + if "doujin" in data["tags"]: + data["type"] = "Doujinshi" + elif "illustration" in data["tags"]: + data["type"] = "Illustration" + else: + data["type"] = "Manga" + data["title_conventional"] = self._join_title(data) + return data + + def images(self, _): + url = "{}/read/{}".format(self.root, self.gallery_id) + page = self.request(url).text + imgs = util.json_loads(self._decode(text.extr( + page, 'initReader("', '"'))) + + headers = None + if not self.config("original", True): + headers = {"Accept": "image/webp,*/*"} + for img in imgs: + img["_http_headers"] = headers + + return [ + (img["image"], img) + for img in imgs + ] + + @staticmethod + def _decode(data): + # https://hentainexus.com/static/js/reader.min.js?r=22 + hostname = "hentainexus.com" + primes = (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53) + blob = list(binascii.a2b_base64(data)) + for i in range(0, len(hostname)): + blob[i] = blob[i] ^ ord(hostname[i]) + + key = blob[0:64] + + C = 0 + for k in key: + C = C ^ k + for _ in range(8): + if C & 1: + C = C >> 1 ^ 0xc + else: + C = C >> 1 + k = primes[C & 0x7] + + x = 0 + S = list(range(256)) + for i in range(256): + x = (x + S[i] + key[i % len(key)]) % 256 + S[i], S[x] = S[x], S[i] + + result = "" + a = c = m = x = 0 + for n in range(64, len(blob)): + a = (a + k) % 256 + x = (c + S[(x + S[a]) % 256]) % 256 + c = (c + a + S[a]) % 256 + + S[a], S[x] = S[x], S[a] + m = S[(x + S[(a + S[(m + c) % 256]) % 256]) % 256] + result += chr(blob[n] ^ m) + + return result + + @staticmethod + def _join_title(data): + event = data['event'] + artist = data['artist'] + circle = data['circle'] + title = data['title'] + parody = data['parody'] + book = data['book'] + magazine = data['magazine'] + + # a few galleries have a large number of artists or parodies, + # which get replaced with "Various" in the title string + if artist.count(',') >= 3: + artist = 'Various' + if parody.count(',') >= 3: + parody = 'Various' + + jt = '' + if event: + jt += '({}) '.format(event) + if circle: + jt += '[{} ({})] '.format(circle, artist) + else: + jt += '[{}] '.format(artist) + jt += title + if parody.lower() != 'original work': + jt += ' ({})'.format(parody) + if book: + jt += ' ({})'.format(book) + if magazine: + jt += ' ({})'.format(magazine) + return jt + + +class HentainexusSearchExtractor(Extractor): + """Extractor for hentainexus search results""" + category = "hentainexus" + subcategory = "search" + root = "https://hentainexus.com" + pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" + r"(?:/page/\d+)?/?(?:\?(q=[^/?#]+))?$") + example = "https://hentainexus.com/?q=QUERY" + + def items(self): + params = text.parse_query(self.groups[0]) + data = {"_extractor": HentainexusGalleryExtractor} + path = "/" + + while path: + page = self.request(self.root + path, params=params).text + extr = text.extract_from(page) + + while True: + gallery_id = extr('<a href="/view/', '"') + if not gallery_id: + break + yield Message.Queue, self.root + "/view/" + gallery_id, data + + path = extr('class="pagination-next" href="', '"') diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 88f5708..9b74700 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -58,6 +58,7 @@ class HitomiGalleryExtractor(GalleryExtractor): return { "gallery_id": text.parse_int(info["id"]), "title" : info["title"], + "title_jpn" : info.get("japanese_title") or "", "type" : info["type"].capitalize(), "language" : language, "lang" : util.language_to_code(language), diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 9c2b1de..f7a5cc7 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -168,6 +168,7 @@ class InstagramExtractor(Extractor): "likes": post.get("like_count", 0), "pinned": post.get("timeline_pinned_user_ids", ()), "date": text.parse_timestamp(post.get("taken_at")), + "liked": post.get("has_liked", False), } caption = post["caption"] @@ -270,6 +271,7 @@ class InstagramExtractor(Extractor): "typename" : typename, "date" : text.parse_timestamp(post["taken_at_timestamp"]), "likes" : post["edge_media_preview_like"]["count"], + "liked" : post.get("viewer_has_liked", False), "pinned" : pinned, "owner_id" : owner["id"], "username" : owner.get("username"), diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index b0c24de..6f2d5f3 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -518,7 +518,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): if not sort: sort = "updated" - users.sort(key=lambda x: x[sort], reverse=(order == "desc")) + users.sort(key=lambda x: x[sort] or util.NONE, + reverse=(order == "desc")) for user in users: user["_extractor"] = KemonopartyUserExtractor @@ -532,7 +533,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): if not sort: sort = "faved_seq" - posts.sort(key=lambda x: x[sort], reverse=(order == "desc")) + posts.sort(key=lambda x: x[sort] or util.NONE, + reverse=(order == "desc")) for post in posts: post["_extractor"] = KemonopartyPostExtractor diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 7ac3a3a..ecd6619 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache import itertools +import re class NewgroundsExtractor(Extractor): @@ -33,10 +34,16 @@ class NewgroundsExtractor(Extractor): def _init(self): self.flash = self.config("flash", True) - fmt = self.config("format", "original") - self.format = (True if not fmt or fmt == "original" else - fmt if isinstance(fmt, int) else - text.parse_int(fmt.rstrip("p"))) + fmt = self.config("format") + if not fmt or fmt == "original": + self.format = ("mp4", "webm", "m4v", "mov", "mkv", + 1080, 720, 360) + elif isinstance(fmt, (list, tuple)): + self.format = fmt + else: + self._video_formats = self._video_formats_limit + self.format = (fmt if isinstance(fmt, int) else + text.parse_int(fmt.rstrip("p"))) def items(self): self.login() @@ -266,7 +273,7 @@ class NewgroundsExtractor(Extractor): if src: src = src.replace("\\/", "/") - fallback = () + formats = () date = text.parse_datetime(extr( 'itemprop="datePublished" content="', '"')) else: @@ -276,23 +283,8 @@ class NewgroundsExtractor(Extractor): "X-Requested-With": "XMLHttpRequest", } sources = self.request(url, headers=headers).json()["sources"] - - if self.format is True: - src = sources["360p"][0]["src"].replace(".360p.", ".") - formats = sources - else: - formats = [] - for fmt, src in sources.items(): - width = text.parse_int(fmt.rstrip("p")) - if width <= self.format: - formats.append((width, src)) - if formats: - formats.sort(reverse=True) - src, formats = formats[0][1][0]["src"], formats[1:] - else: - src = "" - - fallback = self._video_fallback(formats) + formats = self._video_formats(sources) + src = next(formats, "") date = text.parse_timestamp(src.rpartition("?")[2]) return { @@ -306,15 +298,33 @@ class NewgroundsExtractor(Extractor): "rating" : extr('class="rated-', '"'), "index" : text.parse_int(index), "_index" : index, - "_fallback" : fallback, + "_fallback" : formats, } - @staticmethod - def _video_fallback(formats): - if isinstance(formats, dict): - formats = list(formats.items()) - formats.sort(key=lambda fmt: text.parse_int(fmt[0].rstrip("p")), - reverse=True) + def _video_formats(self, sources): + src = sources["360p"][0]["src"] + sub = re.compile(r"\.360p\.\w+").sub + + for fmt in self.format: + try: + if isinstance(fmt, int): + yield sources[str(fmt) + "p"][0]["src"] + elif fmt in sources: + yield sources[fmt][0]["src"] + else: + yield sub("." + fmt, src, 1) + except Exception as exc: + self.log.debug("Video format '%s' not available (%s: %s)", + fmt, exc.__class__.__name__, exc) + + def _video_formats_limit(self, sources): + formats = [] + for fmt, src in sources.items(): + width = text.parse_int(fmt.rstrip("p")) + if width <= self.format: + formats.append((width, src)) + + formats.sort(reverse=True) for fmt in formats: yield fmt[1][0]["src"] diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index c50c013..60cca22 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -56,7 +56,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): data["user_id"] = data["artist_id"] data["user_name"] = data["artist_name"] - urls = list(self._extract_images(image_id, page)) + urls = self._extract_images(image_id, page) data["count"] = len(urls) yield Message.Directory, data @@ -113,11 +113,14 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): # multiple images url = "{}/view_popup.php?id={}".format(self.root, image_id) page = self.request(url).text - yield from text.extract_iter( - page, 'href="javascript:void(0);"><img src="', '"') + return [ + text.extr(media, ' src="', '"') + for media in text.extract_iter( + page, 'href="javascript:void(0);"><', '>') + ] else: pos = page.find('id="view-center"') + 1 - yield text.extract(page, 'itemprop="image" src="', '"', pos)[0] + return (text.extr(page, 'itemprop="image" src="', '"', pos),) @staticmethod def _extract_user_name(page): diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py index 2bce597..cfc8861 100644 --- a/gallery_dl/extractor/nitter.py +++ b/gallery_dl/extractor/nitter.py @@ -234,26 +234,6 @@ class NitterExtractor(BaseExtractor): BASE_PATTERN = NitterExtractor.update({ - "nitter.net": { - "root": "https://nitter.net", - "pattern": r"nitter\.net", - }, - "nitter.1d4.us": { - "root": "https://nitter.1d4.us", - "pattern": r"nitter\.1d4\.us", - }, - "nitter.kavin.rocks": { - "root": "https://nitter.kavin.rocks", - "pattern": r"nitter\.kavin\.rocks", - }, - "nitter.unixfox.eu": { - "root": "https://nitter.unixfox.eu", - "pattern": r"nitter\.unixfox\.eu", - }, - "nitter.it": { - "root": "https://nitter.it", - "pattern": r"nitter\.it", - }, }) USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)" diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 5571575..9d025d5 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -424,7 +424,7 @@ class OAuthPixiv(OAuthBase): "code_challenge_method": "S256", "client": "pixiv-android", } - code = self.open(url, params, self._input) + code = self.open(url, params, self._input_code) url = "https://oauth.secure.pixiv.net/auth/token" headers = { @@ -459,7 +459,7 @@ class OAuthPixiv(OAuthBase): stdout_write(self._generate_message(("refresh-token",), (token,))) - def _input(self): + def _input_code(self): stdout_write("""\ 1) Open your browser's Developer Tools (F12) and switch to the Network tab 2) Login @@ -471,5 +471,5 @@ class OAuthPixiv(OAuthBase): like the entire URL or several query parameters. """) - code = input("code: ") + code = self.input("code: ") return code.rpartition("=")[2].strip() diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index 339646f..150efed 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -24,8 +24,13 @@ class PhilomenaExtractor(BooruExtractor): def _init(self): self.api = PhilomenaAPI(self) + if not self.config("svg", True): + self._file_url = operator.itemgetter("view_url") - _file_url = operator.itemgetter("view_url") + def _file_url(self, post): + if post["format"] == "svg": + return post["view_url"].rpartition(".")[0] + ".svg" + return post["view_url"] @staticmethod def _prepare(post): diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py deleted file mode 100644 index a01c9fe..0000000 --- a/gallery_dl/extractor/photobucket.py +++ /dev/null @@ -1,145 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2019-2023 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://photobucket.com/""" - -from .common import Extractor, Message -from .. import text, exception -import binascii -import json - - -class PhotobucketAlbumExtractor(Extractor): - """Extractor for albums on photobucket.com""" - category = "photobucket" - subcategory = "album" - directory_fmt = ("{category}", "{username}", "{location}") - filename_fmt = "{offset:>03}{pictureId:?_//}_{titleOrFilename}.{extension}" - archive_fmt = "{id}" - pattern = (r"(?:https?://)?((?:[\w-]+\.)?photobucket\.com)" - r"/user/[^/?&#]+/library(?:/[^?&#]*)?") - example = "https://s123.photobucket.com/user/USER/library" - - def __init__(self, match): - self.root = "https://" + match.group(1) - Extractor.__init__(self, match) - - def _init(self): - self.session.headers["Referer"] = self.url - - def items(self): - for image in self.images(): - image["titleOrFilename"] = text.unescape(image["titleOrFilename"]) - image["title"] = text.unescape(image["title"]) - image["extension"] = image["ext"] - yield Message.Directory, image - yield Message.Url, image["fullsizeUrl"], image - - if self.config("subalbums", True): - for album in self.subalbums(): - album["_extractor"] = PhotobucketAlbumExtractor - yield Message.Queue, album["url"], album - - def images(self): - """Yield all images of the current album""" - url = self.url - params = {"sort": "3", "page": 1} - - while True: - page = self.request(url, params=params).text - json_data = text.extract(page, "collectionData:", ",\n")[0] - if not json_data: - msg = text.extr(page, 'libraryPrivacyBlock">', "</div>") - msg = ' ("{}")'.format(text.remove_html(msg)) if msg else "" - self.log.error("Unable to get JSON data%s", msg) - return - data = json.loads(json_data) - - yield from data["items"]["objects"] - - if data["total"] <= data["offset"] + data["pageSize"]: - self.album_path = data["currentAlbumPath"] - return - params["page"] += 1 - - def subalbums(self): - """Return all subalbum objects""" - url = self.root + "/component/Albums-SubalbumList" - params = { - "albumPath": self.album_path, - "fetchSubAlbumsOnly": "true", - "deferCollapsed": "true", - "json": "1", - } - - data = self.request(url, params=params).json() - return data["body"].get("subAlbums", ()) - - -class PhotobucketImageExtractor(Extractor): - """Extractor for individual images from photobucket.com""" - category = "photobucket" - subcategory = "image" - directory_fmt = ("{category}", "{username}") - filename_fmt = "{pictureId:?/_/}{titleOrFilename}.{extension}" - archive_fmt = "{username}_{id}" - pattern = (r"(?:https?://)?(?:[\w-]+\.)?photobucket\.com" - r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)" - r"|/user/([^/?&#]+)/media/[^?&#]+\.html)") - example = "https://s123.photobucket.com/user/USER/media/NAME.EXT.html" - - def __init__(self, match): - Extractor.__init__(self, match) - self.user = match.group(1) or match.group(3) - self.media_id = match.group(2) - - def _init(self): - self.session.headers["Referer"] = self.url - - def items(self): - url = "https://photobucket.com/galleryd/search.php" - params = {"userName": self.user, "searchTerm": "", "ref": ""} - - if self.media_id: - params["mediaId"] = self.media_id - else: - params["url"] = self.url - - # retry API call up to 5 times, since it can randomly fail - tries = 0 - while tries < 5: - data = self.request(url, method="POST", params=params).json() - image = data["mediaDocuments"] - if "message" not in image: - break # success - tries += 1 - self.log.debug(image["message"]) - else: - raise exception.StopExtraction(image["message"]) - - # adjust metadata entries to be at least somewhat similar - # to what the 'album' extractor provides - if "media" in image: - image = image["media"][image["mediaIndex"]] - image["albumView"] = data["mediaDocuments"]["albumView"] - image["username"] = image["ownerId"] - else: - image["fileUrl"] = image.pop("imageUrl") - - image.setdefault("title", "") - image.setdefault("description", "") - name, _, ext = image["fileUrl"].rpartition("/")[2].rpartition(".") - image["ext"] = image["extension"] = ext - image["titleOrFilename"] = image["title"] or name - image["tags"] = image.pop("clarifaiTagList", []) - - mtype, _, mid = binascii.a2b_base64(image["id"]).partition(b":") - image["pictureId"] = mid.decode() if mtype == b"mediaId" else "" - - yield Message.Directory, image - yield Message.Url, image["fileUrl"], image diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py index 67f38c4..a68f0db 100644 --- a/gallery_dl/extractor/shimmie2.py +++ b/gallery_dl/extractor/shimmie2.py @@ -92,6 +92,10 @@ BASE_PATTERN = Shimmie2Extractor.update({ "root": "https://rule34hentai.net", "pattern": r"rule34hentai\.net", }, + "vidyapics": { + "root": "https://vidya.pics", + "pattern": r"vidya\.pics", + }, }) + r"/(?:index\.php\?q=/?)?" diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 38a2d16..6ec44ba 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -7,7 +7,7 @@ """Extractors for https://skeb.jp/""" from .common import Extractor, Message -from .. import text, exception +from .. import text import itertools @@ -31,14 +31,15 @@ class SkebExtractor(Extractor): if "Authorization" not in self.session.headers: self.headers["Authorization"] = "Bearer null" - def request(self, url, **kwargs): - while True: - try: - return Extractor.request(self, url, **kwargs) - except exception.HttpError as exc: - if exc.status == 429 and "request_key" in exc.response.cookies: - continue - raise + def _handle_429(self, response): + if "request_key" in response.cookies: + return True + + request_key = text.extr( + response.text, "request_key=", ";") + if request_key: + self.cookies.set("request_key", request_key, domain="skeb.jp") + return True def items(self): metadata = self.metadata() diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py index e44fdae..3210fd8 100644 --- a/gallery_dl/extractor/speakerdeck.py +++ b/gallery_dl/extractor/speakerdeck.py @@ -8,45 +8,35 @@ """Extractors for https://speakerdeck.com/""" -from .common import Extractor, Message +from .common import GalleryExtractor from .. import text +import re -class SpeakerdeckPresentationExtractor(Extractor): +class SpeakerdeckPresentationExtractor(GalleryExtractor): """Extractor for images from a presentation on speakerdeck.com""" category = "speakerdeck" subcategory = "presentation" directory_fmt = ("{category}", "{user}") filename_fmt = "{presentation}-{num:>02}.{extension}" archive_fmt = "{presentation}_{num}" + root = "https://speakerdeck.com" pattern = r"(?:https?://)?(?:www\.)?speakerdeck\.com/([^/?#]+)/([^/?#]+)" example = "https://speakerdeck.com/USER/PRESENTATION" def __init__(self, match): - Extractor.__init__(self, match) + GalleryExtractor.__init__(self, match, "") self.user, self.presentation = match.groups() - self.presentation_id = None - - def items(self): - data = self.get_job_metadata() - imgs = self.get_image_urls() - data["count"] = len(imgs) - yield Message.Directory, data - for data["num"], url in enumerate(imgs, 1): - yield Message.Url, url, text.nameext_from_url(url, data) - - def get_job_metadata(self): - """Collect metadata for extractor-job""" - url = "https://speakerdeck.com/oembed.json" + + def metadata(self, _): + url = self.root + "/oembed.json" params = { - "url": "https://speakerdeck.com/" + self.user + - "/" + self.presentation, + "url": "{}/{}/{}".format(self.root, self.user, self.presentation), } - data = self.request(url, params=params).json() - self.presentation_id, pos = \ - text.extract(data["html"], 'src="//speakerdeck.com/player/', '"') + self.presentation_id = text.extr( + data["html"], 'src="//speakerdeck.com/player/', '"') return { "user": self.user, @@ -56,8 +46,10 @@ class SpeakerdeckPresentationExtractor(Extractor): "author": data["author_name"], } - def get_image_urls(self): - """Extract and return a list of all image-urls""" - page = self.request("https://speakerdeck.com/player/" + - self.presentation_id).text - return list(text.extract_iter(page, 'js-sd-slide" data-url="', '"')) + def images(self, _): + url = "{}/player/{}".format(self.root, self.presentation_id) + page = re.sub(r"\s+", " ", self.request(url).text) + return [ + (url, None) + for url in text.extract_iter(page, 'js-sd-slide" data-url="', '"') + ] diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py index 08cccab..bba1ece 100644 --- a/gallery_dl/extractor/szurubooru.py +++ b/gallery_dl/extractor/szurubooru.py @@ -98,13 +98,13 @@ class SzurubooruTagExtractor(SzurubooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}_{version}" - pattern = BASE_PATTERN + r"/posts/query=([^/?#]+)" + pattern = BASE_PATTERN + r"/posts(?:/query=([^/?#]*))?" example = "https://booru.foalcon.com/posts/query=TAG" def __init__(self, match): SzurubooruExtractor.__init__(self, match) - query = match.group(match.lastindex) - self.query = text.unquote(query.replace("+", " ")) + query = self.groups[-1] + self.query = text.unquote(query.replace("+", " ")) if query else "" def metadata(self): return {"search_tags": self.query} @@ -119,9 +119,5 @@ class SzurubooruPostExtractor(SzurubooruExtractor): pattern = BASE_PATTERN + r"/post/(\d+)" example = "https://booru.foalcon.com/post/12345" - def __init__(self, match): - SzurubooruExtractor.__init__(self, match) - self.post_id = match.group(match.lastindex) - def posts(self): - return (self._api_request("/post/" + self.post_id),) + return (self._api_request("/post/" + self.groups[-1]),) diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py index de6f3ee..71431ad 100644 --- a/gallery_dl/extractor/tcbscans.py +++ b/gallery_dl/extractor/tcbscans.py @@ -4,19 +4,23 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://tcbscans.com/""" +"""Extractors for https://tcbscans.me/""" from .common import ChapterExtractor, MangaExtractor from .. import text -BASE_PATTERN = r"(?:https?://)?(?:tcbscans|onepiecechapters)\.com" +BASE_PATTERN = (r"(?:https?://)?(?:tcb(?:-backup\.bihar-mirchi|scans)" + r"|onepiecechapters)\.(?:com|me)") class TcbscansChapterExtractor(ChapterExtractor): category = "tcbscans" - root = "https://tcbscans.com" pattern = BASE_PATTERN + r"(/chapters/\d+/[^/?#]+)" - example = "https://tcbscans.com/chapters/12345/MANGA-chapter-123" + example = "https://tcbscans.me/chapters/12345/MANGA-chapter-123" + + def __init__(self, match): + self.root = text.root_from_url(match.group(0)) + ChapterExtractor.__init__(self, match) def images(self, page): return [ @@ -39,10 +43,13 @@ class TcbscansChapterExtractor(ChapterExtractor): class TcbscansMangaExtractor(MangaExtractor): category = "tcbscans" - root = "https://tcbscans.com" chapterclass = TcbscansChapterExtractor pattern = BASE_PATTERN + r"(/mangas/\d+/[^/?#]+)" - example = "https://tcbscans.com/mangas/123/MANGA" + example = "https://tcbscans.me/mangas/123/MANGA" + + def __init__(self, match): + self.root = text.root_from_url(match.group(0)) + MangaExtractor.__init__(self, match) def chapters(self, page): data = { diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index f57f479..a725a2c 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -28,8 +28,13 @@ class TwibooruExtractor(BooruExtractor): def _init(self): self.api = TwibooruAPI(self) + if not self.config("svg", True): + self._file_url = operator.itemgetter("view_url") - _file_url = operator.itemgetter("view_url") + def _file_url(self, post): + if post["format"] == "svg": + return post["view_url"].rpartition(".")[0] + ".svg" + return post["view_url"] @staticmethod def _prepare(post): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ff77828..ec098aa 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -36,6 +36,7 @@ class TwitterExtractor(Extractor): self.user = match.group(1) def _init(self): + self.unavailable = self.config("unavailable", False) self.textonly = self.config("text-tweets", False) self.retweets = self.config("retweets", False) self.replies = self.config("replies", True) @@ -143,6 +144,15 @@ class TwitterExtractor(Extractor): def _extract_media(self, tweet, entities, files): for media in entities: + + if "ext_media_availability" in media: + ext = media["ext_media_availability"] + if ext.get("status") == "Unavailable": + self.log.warning("Media unavailable (%s - '%s')", + tweet["id_str"], ext.get("reason")) + if not self.unavailable: + continue + descr = media.get("ext_alt_text") width = media["original_info"].get("width", 0) height = media["original_info"].get("height", 0) @@ -1709,11 +1719,16 @@ class TwitterAPI(): variables["cursor"] = cursor def _handle_ratelimit(self, response): - if self.extractor.config("ratelimit") == "abort": + rl = self.extractor.config("ratelimit") + if rl == "abort": raise exception.StopExtraction("Rate limit exceeded") - - until = response.headers.get("x-rate-limit-reset") - self.extractor.wait(until=until, seconds=None if until else 60) + elif rl and isinstance(rl, str) and rl.startswith("wait:"): + until = None + seconds = text.parse_float(rl.partition(":")[2]) or 60.0 + else: + until = response.headers.get("x-rate-limit-reset") + seconds = None if until else 60.0 + self.extractor.wait(until=until, seconds=seconds) def _process_tombstone(self, entry, tombstone): text = (tombstone.get("richText") or tombstone["text"])["text"] @@ -1849,7 +1864,7 @@ def _login_impl(extr, username, password): }, } elif subtask == "LoginEnterAlternateIdentifierSubtask": - alt = extr.input( + alt = extr.config("username_alt") or extr.input( "Alternate Identifier (username, email, phone number): ") data = { "enter_text": { @@ -1881,8 +1896,9 @@ def _login_impl(extr, username, password): raise exception.AuthenticationError("Login requires CAPTCHA") elif subtask == "DenyLoginSubtask": raise exception.AuthenticationError("Login rejected as suspicious") - elif subtask == "ArkoseLogin": - raise exception.AuthenticationError("No auth token cookie") + elif subtask == "LoginSuccessSubtask": + raise exception.AuthenticationError( + "No 'auth_token' cookie received") else: raise exception.StopExtraction("Unrecognized subtask %s", subtask) diff --git a/gallery_dl/extractor/vichan.py b/gallery_dl/extractor/vichan.py index 79d7916..654c451 100644 --- a/gallery_dl/extractor/vichan.py +++ b/gallery_dl/extractor/vichan.py @@ -22,10 +22,6 @@ BASE_PATTERN = VichanExtractor.update({ "root": "https://8kun.top", "pattern": r"8kun\.top", }, - "wikieat": { - "root": "https://wikieat.club", - "pattern": r"wikieat\.club", - }, "smugloli": { "root": None, "pattern": r"smuglo(?:\.li|li\.net)", diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 0b212d5..ec1c926 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -325,6 +325,26 @@ def _parse_slice(format_spec, default): return apply_slice +def _parse_conversion(format_spec, default): + conversions, _, format_spec = format_spec.partition(_SEPARATOR) + convs = [_CONVERSIONS[c] for c in conversions[1:]] + fmt = _build_format_func(format_spec, default) + + if len(conversions) <= 2: + + def convert_one(obj): + return fmt(conv(obj)) + conv = _CONVERSIONS[conversions[1]] + return convert_one + + def convert_many(obj): + for conv in convs: + obj = conv(obj) + return fmt(obj) + convs = [_CONVERSIONS[c] for c in conversions[1:]] + return convert_many + + def _parse_maxlen(format_spec, default): maxlen, replacement, format_spec = format_spec.split(_SEPARATOR, 2) maxlen = text.parse_int(maxlen[1:]) @@ -403,6 +423,19 @@ def _parse_sort(format_spec, default): return sort_asc +def _parse_limit(format_spec, default): + limit, hint, format_spec = format_spec.split(_SEPARATOR, 2) + limit = int(limit[1:]) + limit_hint = limit - len(hint) + fmt = _build_format_func(format_spec, default) + + def apply_limit(obj): + if len(obj) > limit: + obj = obj[:limit_hint] + hint + return fmt(obj) + return apply_limit + + def _default_format(format_spec, default): def wrap(obj): return format(obj, format_spec) @@ -447,10 +480,12 @@ _CONVERSIONS = { _FORMAT_SPECIFIERS = { "?": _parse_optional, "[": _parse_slice, + "C": _parse_conversion, "D": _parse_datetime, - "L": _parse_maxlen, "J": _parse_join, + "L": _parse_maxlen, "O": _parse_offset, "R": _parse_replace, "S": _parse_sort, + "X": _parse_limit, } diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 12622d0..f31d5ac 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -173,28 +173,6 @@ def build_parser(): action="version", version=version.__version__, help="Print program version and exit", ) - if util.EXECUTABLE: - general.add_argument( - "-U", "--update", - dest="update", action="store_const", const="latest", - help="Update to the latest version", - ) - general.add_argument( - "--update-to", - dest="update", metavar="[CHANNEL@]TAG", - help="Upgrade/downgrade to a specific version", - ) - general.add_argument( - "--update-check", - dest="update", action="store_const", const="check", - help="Check if a newer version is available", - ) - else: - general.add_argument( - "-U", "--update-check", - dest="update", action="store_const", const="check", - help="Check if a newer version is available", - ) general.add_argument( "-f", "--filename", dest="filename", metavar="FORMAT", @@ -217,16 +195,6 @@ def build_parser(): help="Load external extractors from PATH", ) general.add_argument( - "--proxy", - dest="proxy", metavar="URL", action=ConfigAction, - help="Use the specified proxy", - ) - general.add_argument( - "--source-address", - dest="source-address", metavar="IP", action=ConfigAction, - help="Client-side IP address to bind to", - ) - general.add_argument( "--user-agent", dest="user-agent", metavar="UA", action=ConfigAction, help="User-Agent request header", @@ -238,6 +206,31 @@ def build_parser(): "(ALL to delete everything)", ) + update = parser.add_argument_group("Update Options") + if util.EXECUTABLE or 1: + update.add_argument( + "-U", "--update", + dest="update", action="store_const", const="latest", + help="Update to the latest version", + ) + update.add_argument( + "--update-to", + dest="update", metavar="CHANNEL[@TAG]", + help=("Switch to a dfferent release channel (stable or dev) " + "or upgrade/downgrade to a specific version"), + ) + update.add_argument( + "--update-check", + dest="update", action="store_const", const="check", + help="Check if a newer version is available", + ) + else: + update.add_argument( + "-U", "--update-check", + dest="update", action="store_const", const="check", + help="Check if a newer version is available", + ) + input = parser.add_argument_group("Input Options") input.add_argument( "urls", @@ -263,6 +256,11 @@ def build_parser(): help=("Download URLs found in FILE. " "Delete them after they were downloaded successfully."), ) + input.add_argument( + "--no-input", + dest="input", nargs=0, action=ConfigConstAction, const=False, + help=("Do not prompt for passwords/tokens"), + ) output = parser.add_argument_group("Output Options") output.add_argument( @@ -353,23 +351,45 @@ def build_parser(): help=("Do not emit ANSI color codes in output"), ) - downloader = parser.add_argument_group("Downloader Options") - downloader.add_argument( - "-r", "--limit-rate", - dest="rate", metavar="RATE", action=ConfigAction, - help="Maximum download rate (e.g. 500k or 2.5M)", - ) - downloader.add_argument( + networking = parser.add_argument_group("Networking Options") + networking.add_argument( "-R", "--retries", dest="retries", metavar="N", type=int, action=ConfigAction, help=("Maximum number of retries for failed HTTP requests " "or -1 for infinite retries (default: 4)"), ) - downloader.add_argument( + networking.add_argument( "--http-timeout", dest="timeout", metavar="SECONDS", type=float, action=ConfigAction, help="Timeout for HTTP connections (default: 30.0)", ) + networking.add_argument( + "--proxy", + dest="proxy", metavar="URL", action=ConfigAction, + help="Use the specified proxy", + ) + networking.add_argument( + "--source-address", + dest="source-address", metavar="IP", action=ConfigAction, + help="Client-side IP address to bind to", + ) + networking.add_argument( + "--no-check-certificate", + dest="verify", nargs=0, action=ConfigConstAction, const=False, + help="Disable HTTPS certificate validation", + ) + + downloader = parser.add_argument_group("Downloader Options") + downloader.add_argument( + "-r", "--limit-rate", + dest="rate", metavar="RATE", action=ConfigAction, + help="Maximum download rate (e.g. 500k or 2.5M)", + ) + downloader.add_argument( + "--chunk-size", + dest="chunk-size", metavar="SIZE", action=ConfigAction, + help="Size of in-memory data chunks (default: 32k)", + ) downloader.add_argument( "--sleep", dest="sleep", metavar="SECONDS", action=ConfigAction, @@ -390,21 +410,6 @@ def build_parser(): "for an input URL"), ) downloader.add_argument( - "--filesize-min", - dest="filesize-min", metavar="SIZE", action=ConfigAction, - help="Do not download files smaller than SIZE (e.g. 500k or 2.5M)", - ) - downloader.add_argument( - "--filesize-max", - dest="filesize-max", metavar="SIZE", action=ConfigAction, - help="Do not download files larger than SIZE (e.g. 500k or 2.5M)", - ) - downloader.add_argument( - "--chunk-size", - dest="chunk-size", metavar="SIZE", action=ConfigAction, - help="Size of in-memory data chunks (default: 32k)", - ) - downloader.add_argument( "--no-part", dest="part", nargs=0, action=ConfigConstAction, const=False, help="Do not use .part files", @@ -425,16 +430,6 @@ def build_parser(): dest="download", nargs=0, action=ConfigConstAction, const=False, help=("Do not download any files") ) - downloader.add_argument( - "--no-postprocessors", - dest="postprocess", nargs=0, action=ConfigConstAction, const=False, - help=("Do not run any post processors") - ) - downloader.add_argument( - "--no-check-certificate", - dest="verify", nargs=0, action=ConfigConstAction, const=False, - help="Disable HTTPS certificate validation", - ) configuration = parser.add_argument_group("Configuration Options") configuration.add_argument( @@ -461,10 +456,20 @@ def build_parser(): ) configuration.add_argument( "--config-create", - dest="config_init", action="store_true", + dest="config", action="store_const", const="init", help="Create a basic configuration file", ) configuration.add_argument( + "--config-status", + dest="config", action="store_const", const="status", + help="Show configuration file status", + ) + configuration.add_argument( + "--config-open", + dest="config", action="store_const", const="open", + help="Open configuration file in external application", + ) + configuration.add_argument( "--config-ignore", dest="config_load", action="store_false", help="Do not read default configuration files", @@ -516,12 +521,6 @@ def build_parser(): selection = parser.add_argument_group("Selection Options") selection.add_argument( - "--download-archive", - dest="archive", metavar="FILE", action=ConfigAction, - help=("Record all downloaded or skipped files in FILE and " - "skip downloading any file already in it"), - ) - selection.add_argument( "-A", "--abort", dest="abort", metavar="N", type=int, help=("Stop current extractor run " @@ -534,6 +533,22 @@ def build_parser(): "after N consecutive file downloads were skipped"), ) selection.add_argument( + "--filesize-min", + dest="filesize-min", metavar="SIZE", action=ConfigAction, + help="Do not download files smaller than SIZE (e.g. 500k or 2.5M)", + ) + selection.add_argument( + "--filesize-max", + dest="filesize-max", metavar="SIZE", action=ConfigAction, + help="Do not download files larger than SIZE (e.g. 500k or 2.5M)", + ) + selection.add_argument( + "--download-archive", + dest="archive", metavar="FILE", action=ConfigAction, + help=("Record all downloaded or skipped files in FILE and " + "skip downloading any file already in it"), + ) + selection.add_argument( "--range", dest="image-range", metavar="RANGE", action=ConfigAction, help=("Index range(s) specifying which files to download. " @@ -574,6 +589,11 @@ def build_parser(): help="Activate the specified post processor", ) postprocessor.add_argument( + "--no-postprocessors", + dest="postprocess", nargs=0, action=ConfigConstAction, const=False, + help=("Do not run any post processors") + ) + postprocessor.add_argument( "-O", "--postprocessor-option", dest="options_pp", metavar="KEY=VALUE", action=PPParseAction, default={}, diff --git a/gallery_dl/output.py b/gallery_dl/output.py index 3518545..bd5d959 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -105,6 +105,7 @@ class LoggerAdapterActions(): self.error = functools.partial(self.log, logging.ERROR) def log(self, level, msg, *args, **kwargs): + msg = str(msg) if args: msg = msg % args diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index c63a3d9..9e60ce2 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -158,6 +158,7 @@ class UgoiraPP(PostProcessor): except Exception as exc: print() self.log.error("%s: %s", exc.__class__.__name__, exc) + self.log.debug("", exc_info=True) pathfmt.realpath = pathfmt.temppath else: if self.mtime: @@ -266,8 +267,8 @@ class UgoiraPP(PostProcessor): append("") ffconcat = tempdir + "/ffconcat.txt" - with open(ffconcat, "w") as file: - file.write("\n".join(content)) + with open(ffconcat, "w") as fp: + fp.write("\n".join(content)) return ffconcat def _write_mkvmerge_timecodes(self, tempdir): @@ -282,8 +283,8 @@ class UgoiraPP(PostProcessor): append("") timecodes = tempdir + "/timecodes.tc" - with open(timecodes, "w") as file: - file.write("\n".join(content)) + with open(timecodes, "w") as fp: + fp.write("\n".join(content)) return timecodes def calculate_framerate(self, frames): diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 861ec7e..e76ddf3 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -106,12 +106,12 @@ def identity(x): return x -def true(_): +def true(_, __=None): """Always returns True""" return True -def false(_): +def false(_, __=None): """Always returns False""" return False @@ -540,11 +540,22 @@ class CustomNone(): def __bool__(): return False + __lt__ = true + __le__ = true + __eq__ = false + __ne__ = true + __gt__ = false + __ge__ = false + @staticmethod def __len__(): return 0 @staticmethod + def __hash__(): + return 0 + + @staticmethod def __format__(_): return "None" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6557763..a8ff38e 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.27.0" +__version__ = "1.27.1" __variant__ = None |
