From a5aecc343fd2886e7ae09bb3e2afeec38f175755 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Wed, 1 Dec 2021 14:44:00 -0500 Subject: New upstream version 1.19.3. --- gallery_dl/downloader/ytdl.py | 87 +++--- gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/dynastyscans.py | 25 +- gallery_dl/extractor/exhentai.py | 8 +- gallery_dl/extractor/foolfuuka.py | 6 +- gallery_dl/extractor/gelbooru_v02.py | 15 +- gallery_dl/extractor/instagram.py | 20 +- gallery_dl/extractor/kemonoparty.py | 125 +++++++-- gallery_dl/extractor/mangadex.py | 42 ++- gallery_dl/extractor/mangoxo.py | 12 +- gallery_dl/extractor/philomena.py | 12 + gallery_dl/extractor/reactor.py | 228 +++++++-------- gallery_dl/extractor/seisoparty.py | 201 ------------- gallery_dl/extractor/shopify.py | 6 + gallery_dl/extractor/skeb.py | 3 +- gallery_dl/extractor/subscribestar.py | 14 +- gallery_dl/extractor/twitter.py | 37 ++- gallery_dl/extractor/webtoons.py | 5 +- gallery_dl/extractor/xvideos.py | 4 +- gallery_dl/extractor/ytdl.py | 79 +++--- gallery_dl/formatter.py | 12 + gallery_dl/job.py | 69 ++--- gallery_dl/util.py | 70 +++++ gallery_dl/version.py | 2 +- gallery_dl/ytdl.py | 513 ++++++++++++++++++++++++++++++++++ 25 files changed, 1024 insertions(+), 572 deletions(-) delete mode 100644 gallery_dl/extractor/seisoparty.py create mode 100644 gallery_dl/ytdl.py (limited to 'gallery_dl') diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index f4d3e05..8416ca0 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -9,7 +9,7 @@ """Downloader module for URLs requiring youtube-dl support""" from .common import DownloaderBase -from .. import text +from .. import ytdl, text import os @@ -17,70 +17,53 @@ class YoutubeDLDownloader(DownloaderBase): scheme = "ytdl" def __init__(self, job): - module = __import__(self.config("module") or "youtube_dl") - DownloaderBase.__init__(self, job) - extractor = job.extractor + extractor = job.extractor retries = self.config("retries", extractor._retries) - options = { - "format": self.config("format") or None, - "ratelimit": text.parse_bytes(self.config("rate"), None), + self.ytdl_opts = { "retries": retries+1 if retries >= 0 else float("inf"), "socket_timeout": self.config("timeout", extractor._timeout), "nocheckcertificate": not self.config("verify", extractor._verify), - "nopart": not self.part, - "updatetime": self.config("mtime", True), - "proxy": extractor.session.proxies.get("http"), - "min_filesize": text.parse_bytes( - self.config("filesize-min"), None), - "max_filesize": text.parse_bytes( - self.config("filesize-max"), None), } - raw_options = self.config("raw-options") - if raw_options: - options.update(raw_options) - - self.progress = self.config("progress", 3.0) - if self.progress is not None: - options["progress_hooks"] = (self._progress_hook,) - - if self.config("logging", True): - options["logger"] = self.log + self.ytdl_instance = None self.forward_cookies = self.config("forward-cookies", False) - + self.progress = self.config("progress", 3.0) self.outtmpl = self.config("outtmpl") - if self.outtmpl == "default": - self.outtmpl = module.DEFAULT_OUTTMPL - - self.ytdl = module.YoutubeDL(options) def download(self, url, pathfmt): kwdict = pathfmt.kwdict - ytdl = kwdict.pop("_ytdl_instance", None) - if ytdl: - if self.progress is not None and not ytdl._progress_hooks: - ytdl.add_progress_hook(self._progress_hook) - else: - ytdl = self.ytdl + ytdl_instance = kwdict.pop("_ytdl_instance", None) + if not ytdl_instance: + ytdl_instance = self.ytdl_instance + if not ytdl_instance: + module = __import__(self.config("module") or "youtube_dl") + self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL( + module, self, self.ytdl_opts) + if self.outtmpl == "default": + self.outtmpl = module.DEFAULT_OUTTMPL if self.forward_cookies: - set_cookie = ytdl.cookiejar.set_cookie + set_cookie = ytdl_instance.cookiejar.set_cookie for cookie in self.session.cookies: set_cookie(cookie) + if self.progress is not None and not ytdl_instance._progress_hooks: + ytdl_instance.add_progress_hook(self._progress_hook) + info_dict = kwdict.pop("_ytdl_info_dict", None) if not info_dict: try: - info_dict = ytdl.extract_info(url[5:], download=False) + info_dict = ytdl_instance.extract_info(url[5:], download=False) except Exception: return False if "entries" in info_dict: index = kwdict.get("_ytdl_index") if index is None: - return self._download_playlist(ytdl, pathfmt, info_dict) + return self._download_playlist( + ytdl_instance, pathfmt, info_dict) else: info_dict = info_dict["entries"][index] @@ -88,9 +71,9 @@ class YoutubeDLDownloader(DownloaderBase): if extra: info_dict.update(extra) - return self._download_video(ytdl, pathfmt, info_dict) + return self._download_video(ytdl_instance, pathfmt, info_dict) - def _download_video(self, ytdl, pathfmt, info_dict): + def _download_video(self, ytdl_instance, pathfmt, info_dict): if "url" in info_dict: text.nameext_from_url(info_dict["url"], pathfmt.kwdict) @@ -99,8 +82,9 @@ class YoutubeDLDownloader(DownloaderBase): info_dict["ext"] = "mkv" if self.outtmpl: - self._set_outtmpl(ytdl, self.outtmpl) - pathfmt.filename = filename = ytdl.prepare_filename(info_dict) + self._set_outtmpl(ytdl_instance, self.outtmpl) + pathfmt.filename = filename = \ + ytdl_instance.prepare_filename(info_dict) pathfmt.extension = info_dict["ext"] pathfmt.path = pathfmt.directory + filename pathfmt.realpath = pathfmt.temppath = ( @@ -115,40 +99,41 @@ class YoutubeDLDownloader(DownloaderBase): pathfmt.temppath = os.path.join( self.partdir, pathfmt.filename) - self._set_outtmpl(ytdl, pathfmt.temppath.replace("%", "%%")) + self._set_outtmpl(ytdl_instance, pathfmt.temppath.replace("%", "%%")) self.out.start(pathfmt.path) try: - ytdl.process_info(info_dict) + ytdl_instance.process_info(info_dict) except Exception: self.log.debug("Traceback", exc_info=True) return False return True - def _download_playlist(self, ytdl, pathfmt, info_dict): + def _download_playlist(self, ytdl_instance, pathfmt, info_dict): pathfmt.set_extension("%(playlist_index)s.%(ext)s") - self._set_outtmpl(ytdl, pathfmt.realpath) + self._set_outtmpl(ytdl_instance, pathfmt.realpath) for entry in info_dict["entries"]: - ytdl.process_info(entry) + ytdl_instance.process_info(entry) return True def _progress_hook(self, info): if info["status"] == "downloading" and \ info["elapsed"] >= self.progress: total = info.get("total_bytes") or info.get("total_bytes_estimate") + speed = info.get("speed") self.out.progress( None if total is None else int(total), info["downloaded_bytes"], - int(info["speed"]), + int(speed) if speed else 0, ) @staticmethod - def _set_outtmpl(ytdl, outtmpl): + def _set_outtmpl(ytdl_instance, outtmpl): try: - ytdl.outtmpl_dict["default"] = outtmpl + ytdl_instance.outtmpl_dict["default"] = outtmpl except AttributeError: - ytdl.params["outtmpl"] = outtmpl + ytdl_instance.params["outtmpl"] = outtmpl def compatible_formats(formats): diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 79fe971..dd9da01 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -111,7 +111,6 @@ modules = [ "sankaku", "sankakucomplex", "seiga", - "seisoparty", "senmanga", "sexcom", "simplyhentai", diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index 4541d25..ab1044f 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -8,7 +8,7 @@ """Extractors for https://dynasty-scans.com/""" -from .common import ChapterExtractor, Extractor, Message +from .common import ChapterExtractor, MangaExtractor, Extractor, Message from .. import text import json import re @@ -48,12 +48,12 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): (("http://dynasty-scans.com/chapters/" "hitoribocchi_no_oo_seikatsu_ch33"), { "url": "dce64e8c504118f1ab4135c00245ea12413896cb", - "keyword": "1564965671ac69bb7fbc340538397f6bd0aa269b", + "keyword": "b67599703c27316a2fe4f11c3232130a1904e032", }), (("http://dynasty-scans.com/chapters/" "new_game_the_spinoff_special_13"), { "url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538", - "keyword": "22b35029bc65d6d95db2e2c147b0a37f2d290f29", + "keyword": "6b674eb3a274999153f6be044973b195008ced2f", }), ) @@ -76,7 +76,8 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): "author" : text.remove_html(author), "group" : (text.remove_html(group) or text.extract(group, ' alt="', '"')[0] or ""), - "date" : extr('"icon-calendar"> ', '<'), + "date" : text.parse_datetime(extr( + '"icon-calendar"> ', '<'), "%b %d, %Y"), "lang" : "en", "language": "English", } @@ -89,6 +90,22 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): ] +class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor): + chapterclass = DynastyscansChapterExtractor + reverse = False + pattern = BASE_PATTERN + r"(/series/[^/?#]+)" + test = ("https://dynasty-scans.com/series/hitoribocchi_no_oo_seikatsu", { + "pattern": DynastyscansChapterExtractor.pattern, + "count": ">= 100", + }) + + def chapters(self, page): + return [ + (self.root + path, {}) + for path in text.extract_iter(page, '
\n', '')), "_" : extr('
', '<'), - "uploader" : text.unquote(extr('/uploader/', '"')), + "uploader" : extr('
', '
'), "date" : text.parse_datetime(extr( '>Posted:', ''), "%Y-%m-%d %H:%M"), "parent" : extr( @@ -255,6 +255,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "torrentcount" : extr('>Torrent Download (', ')'), } + if data["uploader"].startswith("<"): + data["uploader"] = text.unescape(text.extract( + data["uploader"], ">", "<")[0]) + f = data["favorites"][0] if f == "N": data["favorites"] = "0" diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index d2c5e8f..6ddd689 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -122,7 +122,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a", }), ("https://desuarchive.org/a/thread/159542679/", { - "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406", + "url": "2bddbe03b01b4630337f6916f6df36d1d443b7b8", }), ("https://boards.fireden.net/sci/thread/11264294/", { "url": "61cab625c95584a12a30049d054931d64f8d20aa", @@ -131,10 +131,10 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f", }), ("https://rbt.asia/g/thread/61487650/", { - "url": "61896d9d9a2edb556b619000a308a984307b6d30", + "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5", }), ("https://archive.rebeccablacktech.com/g/thread/61487650/", { - "url": "61896d9d9a2edb556b619000a308a984307b6d30", + "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5", }), ("https://thebarchive.com/b/thread/739772332/", { "url": "e8b18001307d130d67db31740ce57c8561b5d80c", diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index e09e190..a42a202 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -27,8 +27,21 @@ class GelbooruV02Extractor(booru.BooruExtractor): params["pid"] = self.page_start params["limit"] = self.per_page + post = None while True: - root = self._api_request(params) + try: + root = self._api_request(params) + except ElementTree.ParseError: + if "tags" not in params or post is None: + raise + taglist = [tag for tag in params["tags"].split() + if not tag.startswith("id:<")] + taglist.append("id:<" + str(post.attrib["id"])) + params["tags"] = " ".join(taglist) + params["pid"] = 0 + continue + + post = None for post in root: yield post.attrib diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index bf479ab..a1dd465 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -439,15 +439,27 @@ class InstagramTaggedExtractor(InstagramExtractor): test = ("https://www.instagram.com/instagram/tagged/", { "range": "1-16", "count": ">= 16", + "keyword": { + "tagged_owner_id" : "25025320", + "tagged_username" : "instagram", + "tagged_full_name": "Instagram", + }, }) - def posts(self): + def metadata(self): url = "{}/{}/".format(self.root, self.item) - user = self._extract_profile_page(url) + self.user = user = self._extract_profile_page(url) + + return { + "tagged_owner_id" : user["id"], + "tagged_username" : user["username"], + "tagged_full_name": user["full_name"], + } + def posts(self): query_hash = "be13233562af2d229b008d2976b998b5" - variables = {"id": user["id"], "first": 50} - edge = self._get_edge_data(user, None) + variables = {"id": self.user["id"], "first": 50} + edge = self._get_edge_data(self.user, None) return self._pagination_graphql(query_hash, variables, edge) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 2e1d0b2..6483278 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -14,7 +14,7 @@ from ..cache import cache import itertools import re -BASE_PATTERN = r"(?:https?://)?kemono\.party" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?kemono\.party" USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)" @@ -30,19 +30,20 @@ class KemonopartyExtractor(Extractor): def items(self): self._prepare_ddosguard_cookies() - find_inline = re.compile( + self._find_inline = re.compile( r'src="(?:https?://kemono\.party)?(/inline/[^"]+' r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall - skip_service = \ - "patreon" if self.config("patreon-skip-file", True) else None + find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match + generators = self._build_file_generators(self.config("files")) comments = self.config("comments") + username = dms = None if self.config("metadata"): username = text.unescape(text.extract( self.request(self.user_url).text, '"): + dms.append({ + "body": text.unescape(text.extract( + dm, '
', '
', + )[0].strip()), + "date": text.extract(dm, 'datetime="', '"')[0], + }) + return dms + class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" @@ -175,6 +226,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor): "embed": dict, "extension": "jpeg", "filename": "P058kDFYus7DbqAkGlfWTlOr", + "hash": "210f35388e28bbcf756db18dd516e2d8" + "2ce758e0d32881eeee76d43e1716d382", "id": "506575", "num": 1, "published": "Sun, 11 Aug 2019 02:09:04 GMT", @@ -188,25 +241,39 @@ class KemonopartyPostExtractor(KemonopartyExtractor): }), # inline image (#1286) ("https://kemono.party/fanbox/user/7356311/post/802343", { - "pattern": r"https://kemono\.party/data/inline/fanbox" - r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg", + "pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8" + r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg", + "keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a" + "76336997ae8596f332e97d956a460ad2"}, }), # kemono.party -> data.kemono.party ("https://kemono.party/gumroad/user/trylsc/post/IURjT", { - "pattern": r"https://kemono\.party/data/(file|attachment)s" - r"/gumroad/trylsc/IURjT/", + "pattern": r"https://kemono\.party/data/(" + r"files/gumroad/trylsc/IURjT/reward8\.jpg|" + r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)", }), # username (#1548, #1652) ("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", { "options": (("metadata", True),), "keyword": {"username": "Kudalyn's Creations"}, }), - # skip patreon main file (#1667, #1689) + # skip patreon duplicates ("https://kemono.party/patreon/user/4158582/post/32099982", { "count": 2, - "keyword": {"type": "attachment"}, + }), + # DMs (#2008) + ("https://kemono.party/patreon/user/34134344/post/38129255", { + "options": (("dms", True),), + "keyword": {"dms": [{ + "body": r"re:Hi! Thank you very much for supporting the work I" + r" did in May. Here's your reward pack! I hope you fin" + r"d something you enjoy in it. :\)\n\nhttps://www.medi" + r"afire.com/file/\w+/Set13_tier_2.zip/file", + "date": "2021-07-31 02:47:51.327865", + }]}, }), ("https://kemono.party/subscribestar/user/alcorart/post/184330"), + ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index ff1d7c3..393f4e2 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -46,10 +46,10 @@ class MangadexExtractor(Extractor): def _transform(self, chapter): relationships = defaultdict(list) for item in chapter["relationships"]: - relationships[item["type"]].append(item["id"]) - manga = self.api.manga(relationships["manga"][0]) + relationships[item["type"]].append(item) + manga = self.api.manga(relationships["manga"][0]["id"]) for item in manga["relationships"]: - relationships[item["type"]].append(item["id"]) + relationships[item["type"]].append(item) cattributes = chapter["attributes"] mattributes = manga["attributes"] @@ -75,16 +75,12 @@ class MangadexExtractor(Extractor): "count" : len(cattributes["data"]), } - if self.config("metadata"): - data["artist"] = [ - self.api.author(uuid)["attributes"]["name"] - for uuid in relationships["artist"]] - data["author"] = [ - self.api.author(uuid)["attributes"]["name"] - for uuid in relationships["author"]] - data["group"] = [ - self.api.group(uuid)["attributes"]["name"] - for uuid in relationships["scanlation_group"]] + data["artist"] = [artist["attributes"]["name"] + for artist in relationships["artist"]] + data["author"] = [author["attributes"]["name"] + for author in relationships["author"]] + data["group"] = [group["attributes"]["name"] + for group in relationships["scanlation_group"]] return data @@ -95,12 +91,11 @@ class MangadexChapterExtractor(MangadexExtractor): pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" test = ( ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", { - "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd", + "keyword": "86fb262cf767dac6d965cd904ad499adba466404", # "content": "50383a4c15124682057b197d40261641a98db514", }), # oneshot ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", { - "options": (("metadata", True),), "count": 64, "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb", }), @@ -147,6 +142,8 @@ class MangadexMangaExtractor(MangadexExtractor): "date" : "type:datetime", "lang" : str, "language": str, + "artist" : ["Arakawa Hiromu"], + "author" : ["Arakawa Hiromu"], }, }), ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", { @@ -193,20 +190,14 @@ class MangadexAPI(): def athome_server(self, uuid): return self._call("/at-home/server/" + uuid) - @memcache(keyarg=1) - def author(self, uuid): - return self._call("/author/" + uuid)["data"] - def chapter(self, uuid): - return self._call("/chapter/" + uuid)["data"] - - @memcache(keyarg=1) - def group(self, uuid): - return self._call("/group/" + uuid)["data"] + params = {"includes[]": ("scanlation_group",)} + return self._call("/chapter/" + uuid, params)["data"] @memcache(keyarg=1) def manga(self, uuid): - return self._call("/manga/" + uuid)["data"] + params = {"includes[]": ("artist", "author")} + return self._call("/manga/" + uuid, params)["data"] def manga_feed(self, uuid): order = "desc" if self.extractor.config("chapter-reverse") else "asc" @@ -275,6 +266,7 @@ class MangadexAPI(): ratings = ("safe", "suggestive", "erotica", "pornographic") params["contentRating[]"] = ratings + params["includes[]"] = ("scanlation_group",) params["translatedLanguage[]"] = config("lang") params["offset"] = 0 diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py index d45fbc9..1486057 100644 --- a/gallery_dl/extractor/mangoxo.py +++ b/gallery_dl/extractor/mangoxo.py @@ -122,18 +122,18 @@ class MangoxoAlbumExtractor(MangoxoExtractor): def metadata(self, page): """Return general metadata""" extr = text.extract_from(page) - title = extr('', '') - count = extr('id="pic-count">', '<') - cid = extr('', '', '<') - date = extr('', '<') + count = extr('id="pic-count">', '<') + date = extr('class="fa fa-calendar">', '<') descr = extr('
', '
') return { "channel": { "id": cid, - "name": text.unescape(cname.strip()), + "name": text.unescape(cname), "cover": cover, }, "album": { diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index d3b3bb1..51a0d38 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -62,6 +62,8 @@ INSTANCES = { "filter_id": "56027"}, "ponybooru" : {"root": "https://ponybooru.org", "filter_id": "2"}, + "furbooru" : {"root": "https://furbooru.org", + "filter_id": "2"}, } BASE_PATTERN = PhilomenaExtractor.update(INSTANCES) @@ -124,6 +126,9 @@ class PhilomenaPostExtractor(PhilomenaExtractor): ("https://ponybooru.org/images/1", { "content": "bca26f58fafd791fe07adcd2a28efd7751824605", }), + ("https://furbooru.org/images/1", { + "content": "9eaa1e1b32fa0f16520912257dbefaff238d5fd2", + }), ) def __init__(self, match): @@ -157,6 +162,10 @@ class PhilomenaSearchExtractor(PhilomenaExtractor): "range": "40-60", "count": 21, }), + ("https://furbooru.org/search?q=cute", { + "range": "40-60", + "count": 21, + }), ) def __init__(self, match): @@ -210,6 +219,9 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor): ("https://ponybooru.org/galleries/27", { "count": ">= 24", }), + ("https://furbooru.org/galleries/27", { + "count": ">= 13", + }), ) def __init__(self, match): diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 04fe581..b3a620a 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -8,29 +8,29 @@ """Generic extractors for *reactor sites""" -from .common import Extractor, Message +from .common import BaseExtractor, Message from .. import text import urllib.parse import json -BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)" - -class ReactorExtractor(Extractor): +class ReactorExtractor(BaseExtractor): """Base class for *reactor.cc extractors""" basecategory = "reactor" filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}" archive_fmt = "{post_id}_{num}" - instances = () request_interval = 5.0 def __init__(self, match): - Extractor.__init__(self, match) - self.root = "http://" + match.group(1) + BaseExtractor.__init__(self, match) + url = text.ensure_http_scheme(match.group(0), "http://") + pos = url.index("/", 10) + + self.root, self.path = url[:pos], url[pos:] self.session.headers["Referer"] = self.root self.gif = self.config("gif", False) - if not self.category: + if self.category == "reactor": # set category based on domain name netloc = urllib.parse.urlsplit(self.root).netloc self.category = netloc.rpartition(".")[0] @@ -50,7 +50,7 @@ class ReactorExtractor(Extractor): def posts(self): """Return all relevant post-objects""" - return self._pagination(self.url) + return self._pagination(self.root + self.path) def _pagination(self, url): while True: @@ -145,91 +145,63 @@ class ReactorExtractor(Extractor): } +BASE_PATTERN = ReactorExtractor.update({ + "reactor" : { + "root": "http://reactor.cc", + "pattern": r"(?:[^/.]+\.)?reactor\.cc", + }, + "joyreactor" : { + "root": "http://joyreactor.cc", + "pattern": r"(?:www\.)?joyreactor\.c(?:c|om)", + }, + "pornreactor": { + "root": "http://pornreactor.cc", + "pattern": r"(?:www\.)?(?:pornreactor\.cc|fapreactor.com)", + }, + "thatpervert": { + "root": "http://thatpervert.com", + }, +}) + + class ReactorTagExtractor(ReactorExtractor): """Extractor for tag searches on *reactor.cc sites""" subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "{search_tags}_{post_id}_{num}" pattern = BASE_PATTERN + r"/tag/([^/?#]+)" - test = ("http://anime.reactor.cc/tag/Anime+Art",) + test = ( + ("http://reactor.cc/tag/gif"), + ("http://anime.reactor.cc/tag/Anime+Art"), + ("http://joyreactor.cc/tag/Advent+Cirno", { + "count": ">= 15", + }), + ("http://joyreactor.com/tag/Cirno", { + "url": "aa59090590b26f4654881301fe8fe748a51625a8", + }), + ("http://pornreactor.cc/tag/RiceGnat", { + "range": "1-25", + "count": ">= 25", + }), + ("http://fapreactor.com/tag/RiceGnat"), + ) def __init__(self, match): ReactorExtractor.__init__(self, match) - self.tag = match.group(2) + self.tag = match.group(match.lastindex) def metadata(self): return {"search_tags": text.unescape(self.tag).replace("+", " ")} -class ReactorSearchExtractor(ReactorTagExtractor): +class ReactorSearchExtractor(ReactorExtractor): """Extractor for search results on *reactor.cc sites""" subcategory = "search" directory_fmt = ("{category}", "search", "{search_tags}") archive_fmt = "s_{search_tags}_{post_id}_{num}" pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" - test = ("http://anime.reactor.cc/search?q=Art",) - - -class ReactorUserExtractor(ReactorExtractor): - """Extractor for all posts of a user on *reactor.cc sites""" - subcategory = "user" - directory_fmt = ("{category}", "user", "{user}") - pattern = BASE_PATTERN + r"/user/([^/?#]+)" - test = ("http://anime.reactor.cc/user/Shuster",) - - def __init__(self, match): - ReactorExtractor.__init__(self, match) - self.user = match.group(2) - - def metadata(self): - return {"user": text.unescape(self.user).replace("+", " ")} - - -class ReactorPostExtractor(ReactorExtractor): - """Extractor for single posts on *reactor.cc sites""" - subcategory = "post" - pattern = BASE_PATTERN + r"/post/(\d+)" - test = ("http://anime.reactor.cc/post/3576250",) - - def __init__(self, match): - ReactorExtractor.__init__(self, match) - self.post_id = match.group(2) - - def items(self): - post = self.request(self.url).text - pos = post.find('class="uhead">') - for image in self._parse_post(post[pos:]): - if image["num"] == 1: - yield Message.Directory, image - url = image["url"] - yield Message.Url, url, text.nameext_from_url(url, image) - - -# -------------------------------------------------------------------- -# JoyReactor - -JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))" - - -class JoyreactorTagExtractor(ReactorTagExtractor): - """Extractor for tag searches on joyreactor.cc""" - category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)" - test = ( - ("http://joyreactor.cc/tag/Advent+Cirno", { - "count": ">= 15", - }), - ("http://joyreactor.com/tag/Cirno", { - "url": "aa59090590b26f4654881301fe8fe748a51625a8", - }), - ) - - -class JoyreactorSearchExtractor(ReactorSearchExtractor): - """Extractor for search results on joyreactor.cc""" - category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" test = ( + ("http://reactor.cc/search?q=Art"), ("http://joyreactor.cc/search/Nature", { "range": "1-25", "count": ">= 20", @@ -238,26 +210,54 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor): "range": "1-25", "count": ">= 20", }), + ("http://pornreactor.cc/search?q=ecchi+hentai"), + ("http://fapreactor.com/search/ecchi+hentai"), ) + def __init__(self, match): + ReactorExtractor.__init__(self, match) + self.tag = match.group(match.lastindex) + + def metadata(self): + return {"search_tags": text.unescape(self.tag).replace("+", " ")} + -class JoyreactorUserExtractor(ReactorUserExtractor): - """Extractor for all posts of a user on joyreactor.cc""" - category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/user/([^/?#]+)" +class ReactorUserExtractor(ReactorExtractor): + """Extractor for all posts of a user on *reactor.cc sites""" + subcategory = "user" + directory_fmt = ("{category}", "user", "{user}") + pattern = BASE_PATTERN + r"/user/([^/?#]+)" test = ( + ("http://reactor.cc/user/Dioklet"), + ("http://anime.reactor.cc/user/Shuster"), ("http://joyreactor.cc/user/hemantic"), ("http://joyreactor.com/user/Tacoman123", { "url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5", }), + ("http://pornreactor.cc/user/Disillusion", { + "range": "1-25", + "count": ">= 20", + }), + ("http://fapreactor.com/user/Disillusion"), ) + def __init__(self, match): + ReactorExtractor.__init__(self, match) + self.user = match.group(match.lastindex) + + def metadata(self): + return {"user": text.unescape(self.user).replace("+", " ")} + -class JoyreactorPostExtractor(ReactorPostExtractor): - """Extractor for single posts on joyreactor.cc""" - category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/post/(\d+)" +class ReactorPostExtractor(ReactorExtractor): + """Extractor for single posts on *reactor.cc sites""" + subcategory = "post" + pattern = BASE_PATTERN + r"/post/(\d+)" test = ( + ("http://reactor.cc/post/4999736", { + "url": "dfc74d150d7267384d8c229c4b82aa210755daa0", + }), + ("http://anime.reactor.cc/post/3576250"), ("http://joyreactor.com/post/3721876", { # single image "pattern": r"http://img\d\.joyreactor\.com/pics/post/full" r"/cartoon-painting-monster-lake-4841316.jpeg", @@ -281,57 +281,6 @@ class JoyreactorPostExtractor(ReactorPostExtractor): ("http://joyreactor.cc/post/1299", { # "malformed" JSON "url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39", }), - ) - - -# -------------------------------------------------------------------- -# PornReactor - -PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)" - - -class PornreactorTagExtractor(ReactorTagExtractor): - """Extractor for tag searches on pornreactor.cc""" - category = "pornreactor" - pattern = PR_BASE_PATTERN + r"/tag/([^/?#]+)" - test = ( - ("http://pornreactor.cc/tag/RiceGnat", { - "range": "1-25", - "count": ">= 25", - }), - ("http://fapreactor.com/tag/RiceGnat"), - ) - - -class PornreactorSearchExtractor(ReactorSearchExtractor): - """Extractor for search results on pornreactor.cc""" - category = "pornreactor" - pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" - test = ( - ("http://pornreactor.cc/search?q=ecchi+hentai"), - ("http://fapreactor.com/search/ecchi+hentai"), - ) - - -class PornreactorUserExtractor(ReactorUserExtractor): - """Extractor for all posts of a user on pornreactor.cc""" - category = "pornreactor" - pattern = PR_BASE_PATTERN + r"/user/([^/?#]+)" - test = ( - ("http://pornreactor.cc/user/Disillusion", { - "range": "1-25", - "count": ">= 20", - }), - ("http://fapreactor.com/user/Disillusion"), - ) - - -class PornreactorPostExtractor(ReactorPostExtractor): - """Extractor for single posts on pornreactor.cc""" - category = "pornreactor" - subcategory = "post" - pattern = PR_BASE_PATTERN + r"/post/(\d+)" - test = ( ("http://pornreactor.cc/post/863166", { "url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3", "content": "ec6b0568bfb1803648744077da082d14de844340", @@ -340,3 +289,16 @@ class PornreactorPostExtractor(ReactorPostExtractor): "url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54", }), ) + + def __init__(self, match): + ReactorExtractor.__init__(self, match) + self.post_id = match.group(match.lastindex) + + def items(self): + post = self.request(self.root + self.path).text + pos = post.find('class="uhead">') + for image in self._parse_post(post[pos:]): + if image["num"] == 1: + yield Message.Directory, image + url = image["url"] + yield Message.Url, url, text.nameext_from_url(url, image) diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py deleted file mode 100644 index a2a24e0..0000000 --- a/gallery_dl/extractor/seisoparty.py +++ /dev/null @@ -1,201 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2021 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://seiso.party/""" - -from .common import Extractor, Message -from .. import text, exception -from ..cache import cache -import re - - -class SeisopartyExtractor(Extractor): - """Base class for seisoparty extractors""" - category = "seisoparty" - root = "https://seiso.party" - directory_fmt = ("{category}", "{service}", "{username}") - filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}" - archive_fmt = "{service}_{user}_{id}_{num}" - cookiedomain = ".seiso.party" - - def __init__(self, match): - Extractor.__init__(self, match) - self.user_name = None - self._find_files = re.compile( - r'href="(https://cdn(?:-\d)?\.seiso\.party/files/[^"]+)').findall - - def items(self): - self._prepare_ddosguard_cookies() - - for post in self.posts(): - files = post.pop("files") - yield Message.Directory, post - for post["num"], url in enumerate(files, 1): - yield Message.Url, url, text.nameext_from_url(url, post) - - def _parse_post(self, page, post_id): - extr = text.extract_from(page) - return { - "service" : self.service, - "user" : self.user_id, - "username": self.user_name, - "id" : post_id, - "date" : text.parse_datetime(extr( - '
', '<'), - "%Y-%m-%d %H:%M:%S %Z"), - "title" : text.unescape(extr('class="post-title">', '<')), - "content" : text.unescape(extr("\n

\n", "\n

\n").strip()), - "files" : self._find_files(page), - } - - def login(self): - username, password = self._get_auth_info() - if username: - self._update_cookies(self._login_impl(username, password)) - - @cache(maxage=28*24*3600, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - url = self.root + "/account/login" - data = {"username": username, "password": password} - - response = self.request(url, method="POST", data=data) - if response.url.endswith("/account/login") and \ - "Username or password is incorrect" in response.text: - raise exception.AuthenticationError() - - return {c.name: c.value for c in response.history[0].cookies} - - -class SeisopartyUserExtractor(SeisopartyExtractor): - """Extractor for all posts from a seiso.party user listing""" - subcategory = "user" - pattern = r"(?:https?://)?seiso\.party/artists/([^/?#]+)/([^/?#]+)" - test = ( - ("https://seiso.party/artists/fanbox/21", { - "pattern": r"https://cdn\.seiso\.party/files/fanbox/\d+/", - "count": ">=15", - "keyword": { - "content": str, - "date": "type:datetime", - "id": r"re:\d+", - "num": int, - "service": "fanbox", - "title": str, - "user": "21", - "username": "雨", - }, - }), - ) - - def __init__(self, match): - SeisopartyExtractor.__init__(self, match) - self.service, self.user_id = match.groups() - - def posts(self): - url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id) - page = self.request(url).text - self.user_name, pos = text.extract(page, '', '<') - - url = self.root + text.extract( - page, 'href="', '"', page.index('id="content"', pos))[0] - response = self.request(url) - headers = {"Referer": url} - - while True: - yield self._parse_post(response.text, url.rpartition("/")[2]) - response = self.request(url + "/next", headers=headers) - if url == response.url: - return - url = headers["Referer"] = response.url - - -class SeisopartyPostExtractor(SeisopartyExtractor): - """Extractor for a single seiso.party post""" - subcategory = "post" - pattern = r"(?:https?://)?seiso\.party/post/([^/?#]+)/([^/?#]+)/([^/?#]+)" - test = ( - ("https://seiso.party/post/fanbox/21/371", { - "url": "75f13b92de0ce399b6163c3de18f1f36011c2366", - "count": 2, - "keyword": { - "content": "この前描いためぐるちゃんのPSDファイルです。
" - "どうぞよろしくお願いします。", - "date": "dt:2021-05-06 12:38:31", - "extension": "re:psd|jpg", - "filename": "re:backcourt|ffb2ccb7a3586d05f9a4620329dd131e", - "id": "371", - "num": int, - "service": "fanbox", - "title": "MEGURU.PSD", - "user": "21", - "username": "雨", - }, - }), - ("https://seiso.party/post/patreon/429/95949", { - "pattern": r"https://cdn-2\.seiso\.party/files/patreon/95949/", - "count": 2, - }), - ) - - def __init__(self, match): - SeisopartyExtractor.__init__(self, match) - self.service, self.user_id, self.post_id = match.groups() - - def posts(self): - url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id) - page = self.request(url).text - self.user_name, pos = text.extract(page, '', '<') - - url = "{}/post/{}/{}/{}".format( - self.root, self.service, self.user_id, self.post_id) - return (self._parse_post(self.request(url).text, self.post_id),) - - -class SeisopartyFavoriteExtractor(SeisopartyExtractor): - """Extractor for seiso.party favorites""" - subcategory = "favorite" - pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?" - test = ( - ("https://seiso.party/favorites/artists", { - "pattern": SeisopartyUserExtractor.pattern, - "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683", - "count": 3, - }), - ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", { - "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3", - }), - ) - - def __init__(self, match): - SeisopartyExtractor.__init__(self, match) - self.query = match.group(1) - - def items(self): - self._prepare_ddosguard_cookies() - self.login() - - url = self.root + "/favorites/artists" - data = {"_extractor": SeisopartyUserExtractor} - params = text.parse_query(self.query) - params["page"] = text.parse_int(params.get("page"), 1) - - while True: - page = self.request(url, params=params).text - - cnt = 0 - for card in text.extract_iter( - page, '
= 20", - "pattern": r"https://(star-uploads|ss-uploads-prod)\.s\d+-us-west-" - r"\d+\.amazonaws\.com/uploads(_v2)?/users/11/", + "pattern": r"https://\w+\.cloudfront\.net/uploads(_v2)?/users/11/", "keyword": { "author_id": 11, "author_name": "subscribestar", @@ -149,6 +147,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor): "content": str, "date" : "type:datetime", "id" : int, + "num" : int, "post_id": int, "type" : "re:image|video|attachment", "url" : str, @@ -190,7 +189,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor): pattern = BASE_PATTERN + r"/posts/(\d+)" test = ( ("https://www.subscribestar.com/posts/102468", { - "url": "612da5a98af056dd78dc846fbcfa705e721f6675", + "count": 1, "keyword": { "author_id": 11, "author_name": "subscribestar", @@ -202,6 +201,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "group": "imgs_and_videos", "height": 291, "id": 203885, + "num": 1, "pinned": False, "post_id": 102468, "type": "image", @@ -209,7 +209,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor): }, }), ("https://subscribestar.adult/posts/22950", { - "url": "440d745a368e6b3e218415f593a5045f384afa0d", + "count": 1, "keyword": {"date": "dt:2019-04-28 07:32:00"}, }), ) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 00f3b04..f1c392d 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -41,7 +41,9 @@ class TwitterExtractor(Extractor): self.videos = self.config("videos", True) self.cards = self.config("cards", False) self._user_cache = {} + self._init_sizes() + def _init_sizes(self): size = self.config("size") if size is None: self._size_image = "orig" @@ -580,13 +582,17 @@ class TwitterImageExtractor(Extractor): subcategory = "image" pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)" test = ( - ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg%name=orig"), + ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", { + "options": (("size", "4096x4096,orig"),), + "url": "cb3042a6f6826923da98f0d2b66c427e9385114c", + }), ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"), ) def __init__(self, match): Extractor.__init__(self, match) self.id, self.fmt = match.groups() + TwitterExtractor._init_sizes(self) def items(self): base = "https://pbs.twimg.com/media/{}?format={}&name=".format( @@ -595,11 +601,11 @@ class TwitterImageExtractor(Extractor): data = { "filename": self.id, "extension": self.fmt, - "_fallback": TwitterExtractor._image_fallback(base), + "_fallback": TwitterExtractor._image_fallback(self, base), } yield Message.Directory, data - yield Message.Url, base + "orig", data + yield Message.Url, base + self._size_image, data class TwitterAPI(): @@ -793,16 +799,21 @@ class TwitterAPI(): data = response.json() if "errors" in data: try: - msg = ", ".join( - '"' + error["message"] + '"' - for error in data["errors"] - ) + errors, warnings = [], [] + for error in data["errors"]: + if error.get("kind") == "NonFatal": + warnings.append(error["message"]) + else: + errors.append(error["message"]) + errors = ", ".join(errors) except Exception: - msg = data["errors"] - if msg and response.status_code < 400: - raise exception.StopExtraction(msg) + errors = data["errors"] + if warnings: + self.extractor.log.warning(", ".join(warnings)) + if errors and response.status_code < 400: + raise exception.StopExtraction(errors) else: - msg = "" + errors = "" if response.status_code < 400: # success @@ -816,7 +827,7 @@ class TwitterAPI(): continue if response.status_code == 401 and \ - "have been blocked from viewing" in msg: + "have been blocked from viewing" in errors: # account blocked extr = self.extractor if self.headers["x-twitter-auth-type"] and \ @@ -833,7 +844,7 @@ class TwitterAPI(): # error raise exception.StopExtraction( - "%s %s (%s)", response.status_code, response.reason, msg) + "%s %s (%s)", response.status_code, response.reason, errors) def _pagination(self, endpoint, params=None): if params is None: diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index e2474c9..cf5b192 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -48,7 +48,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): test = ( (("https://www.webtoons.com/en/comedy/safely-endangered" "/ep-572-earth/viewer?title_no=352&episode_no=572"), { - "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", + "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38", "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7", "42055e44659f6ffc410b3fb6557346dfbb993df3", "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"), @@ -62,7 +62,6 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): url = "{}/{}/viewer?{}".format(self.root, self.path, query) GalleryExtractor.__init__(self, match, url) self.setup_agegate_cookies() - self.session.headers["Referer"] = url query = text.parse_query(query) self.title_no = query.get("title_no") @@ -88,7 +87,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): @staticmethod def images(page): return [ - (url, None) + (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None) for url in text.extract_iter( page, 'class="_images" data-url="', '"') ] diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 0922c7c..0a55532 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -32,8 +32,8 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): test = ( ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", { "count": 8, - "pattern": r"https://profile-pics-l3\.xvideos-cdn\.com" - r"/[0-9a-f]{40}-\d+/videos/profiles/galleries/84/ca/37" + "pattern": r"https://profile-pics-cdn\d+\.xvideos-cdn\.com" + r"/[^/]+\,\d+/videos/profiles/galleries/84/ca/37" r"/pervertedcouple/gal751031/pic_\d+_big\.jpg", "keyword": { "gallery": { diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py index d380dab..8eb0c83 100644 --- a/gallery_dl/extractor/ytdl.py +++ b/gallery_dl/extractor/ytdl.py @@ -9,7 +9,7 @@ """Extractors for sites supported by youtube-dl""" from .common import Extractor, Message -from .. import text, config, exception +from .. import ytdl, config, exception class YoutubeDLExtractor(Extractor): @@ -54,52 +54,45 @@ class YoutubeDLExtractor(Extractor): self.log.debug("Using %s", ytdl_module) # construct YoutubeDL object - options = { - "format" : self.config("format"), + extr_opts = { + "extract_flat" : "in_playlist", + "force_generic_extractor": self.force_generic_extractor, + } + user_opts = { "retries" : self._retries, "socket_timeout" : self._timeout, "nocheckcertificate" : not self._verify, - "proxy" : self.session.proxies.get("http"), - "force_generic_extractor": self.force_generic_extractor, - "nopart" : not self.config("part", True), - "updatetime" : self.config("mtime", True), - "ratelimit" : text.parse_bytes( - self.config("rate"), None), - "min_filesize" : text.parse_bytes( - self.config("filesize-min"), None), - "max_filesize" : text.parse_bytes( - self.config("filesize-max"), None), } - raw_options = self.config("raw-options") - if raw_options: - options.update(raw_options) - if self.config("logging", True): - options["logger"] = self.log - options["extract_flat"] = "in_playlist" - username, password = self._get_auth_info() if username: - options["username"], options["password"] = username, password + user_opts["username"], user_opts["password"] = username, password del username, password - ytdl = ytdl_module.YoutubeDL(options) + ytdl_instance = ytdl.construct_YoutubeDL( + ytdl_module, self, user_opts, extr_opts) # transfer cookies to ytdl cookies = self.session.cookies if cookies: - set_cookie = self.ytdl.cookiejar.set_cookie - for cookie in self.session.cookies: + set_cookie = ytdl_instance.cookiejar.set_cookie + for cookie in cookies: set_cookie(cookie) # extract youtube_dl info_dict - info_dict = ytdl._YoutubeDL__extract_info( - self.ytdl_url, - ytdl.get_info_extractor(self.ytdl_ie_key), - False, {}, True) - - if "entries" in info_dict: - results = self._process_entries(ytdl, info_dict["entries"]) + try: + info_dict = ytdl_instance._YoutubeDL__extract_info( + self.ytdl_url, + ytdl_instance.get_info_extractor(self.ytdl_ie_key), + False, {}, True) + except ytdl_module.utils.YoutubeDLError: + raise exception.StopExtraction("Failed to extract video data") + + if not info_dict: + return + elif "entries" in info_dict: + results = self._process_entries( + ytdl_module, ytdl_instance, info_dict["entries"]) else: results = (info_dict,) @@ -107,7 +100,7 @@ class YoutubeDLExtractor(Extractor): for info_dict in results: info_dict["extension"] = None info_dict["_ytdl_info_dict"] = info_dict - info_dict["_ytdl_instance"] = ytdl + info_dict["_ytdl_instance"] = ytdl_instance url = "ytdl:" + (info_dict.get("url") or info_dict.get("webpage_url") or @@ -116,15 +109,23 @@ class YoutubeDLExtractor(Extractor): yield Message.Directory, info_dict yield Message.Url, url, info_dict - def _process_entries(self, ytdl, entries): + def _process_entries(self, ytdl_module, ytdl_instance, entries): for entry in entries: - if entry.get("_type") in ("url", "url_transparent"): - info_dict = ytdl.extract_info( - entry["url"], False, - ie_key=entry.get("ie_key")) - if "entries" in info_dict: + if not entry: + continue + elif entry.get("_type") in ("url", "url_transparent"): + try: + info_dict = ytdl_instance.extract_info( + entry["url"], False, + ie_key=entry.get("ie_key")) + except ytdl_module.utils.YoutubeDLError: + continue + + if not info_dict: + continue + elif "entries" in info_dict: yield from self._process_entries( - ytdl, info_dict["entries"]) + ytdl_module, ytdl_instance, info_dict["entries"]) else: yield info_dict else: diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index f5d961a..c2b4d99 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -274,6 +274,8 @@ def build_format_func(format_spec): return _parse_join(format_spec) if fmt == "R": return _parse_replace(format_spec) + if fmt == "D": + return _parse_datetime(format_spec) return _default_format(format_spec) return format @@ -319,6 +321,16 @@ def _parse_replace(format_spec): return replace +def _parse_datetime(format_spec): + dt_format, _, format_spec = format_spec.partition("/") + dt_format = dt_format[1:] + fmt = build_format_func(format_spec) + + def dt(obj): + return fmt(text.parse_datetime(obj, dt_format)) + return dt + + def _default_format(format_spec): def wrap(obj): return format(obj, format_spec) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 4e185d0..97a8d3f 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -11,7 +11,6 @@ import json import time import errno import logging -import operator import functools import collections from . import extractor, downloader, postprocessor @@ -201,7 +200,6 @@ class DownloadJob(Job): def __init__(self, url, parent=None): Job.__init__(self, url, parent) self.log = self.get_logger("download") - self.blacklist = None self.fallback = None self.archive = None self.sleep = None @@ -209,6 +207,7 @@ class DownloadJob(Job): self.downloaders = {} self.out = output.select() self.visited = parent.visited if parent else set() + self._extractor_filter = None self._skipcnt = 0 def handle_url(self, url, kwdict): @@ -297,9 +296,9 @@ class DownloadJob(Job): else: extr = extractor.find(url) if extr: - if self.blacklist is None: - self.blacklist = self._build_blacklist() - if extr.category in self.blacklist: + if self._extractor_filter is None: + self._extractor_filter = self._build_extractor_filter() + if not self._extractor_filter(extr): extr = None if extr: @@ -444,22 +443,20 @@ class DownloadJob(Job): self.hooks = collections.defaultdict(list) pp_log = self.get_logger("postprocessor") pp_list = [] - category = self.extractor.category - basecategory = self.extractor.basecategory pp_conf = config.get((), "postprocessor") or {} for pp_dict in postprocessors: if isinstance(pp_dict, str): pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict} - whitelist = pp_dict.get("whitelist") - if whitelist and category not in whitelist and \ - basecategory not in whitelist: - continue - - blacklist = pp_dict.get("blacklist") - if blacklist and ( - category in blacklist or basecategory in blacklist): + clist = pp_dict.get("whitelist") + if clist is not None: + negate = False + else: + clist = pp_dict.get("blacklist") + negate = True + if clist and not util.build_extractor_filter( + clist, negate)(self.extractor): continue name = pp_dict.get("name") @@ -500,38 +497,18 @@ class DownloadJob(Job): if condition(pathfmt.kwdict): callback(pathfmt) - def _build_blacklist(self): - wlist = self.extractor.config("whitelist") - if wlist is not None: - if isinstance(wlist, str): - wlist = wlist.split(",") - - # build a set of all categories - blist = set() - add = blist.add - update = blist.update - get = operator.itemgetter(0) - - for extr in extractor._list_classes(): - category = extr.category - if category: - add(category) - else: - update(map(get, extr.instances)) - - # remove whitelisted categories - blist.difference_update(wlist) - return blist - - blist = self.extractor.config("blacklist") - if blist is not None: - if isinstance(blist, str): - blist = blist.split(",") - blist = set(blist) + def _build_extractor_filter(self): + clist = self.extractor.config("whitelist") + if clist is not None: + negate = False else: - blist = {self.extractor.category} - blist |= util.SPECIAL_EXTRACTORS - return blist + clist = self.extractor.config("blacklist") + negate = True + if clist is None: + clist = (self.extractor.category,) + + return util.build_extractor_filter( + clist, negate, util.SPECIAL_EXTRACTORS) class SimulationJob(DownloadJob): diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 4a7fdbf..d25194e 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -81,6 +81,16 @@ def identity(x): return x +def true(_): + """Always returns True""" + return True + + +def false(_): + """Always returns False""" + return False + + def noop(): """Does nothing""" @@ -432,6 +442,66 @@ def build_duration_func(duration, min=0.0): return functools.partial(identity, duration if duration > min else min) +def build_extractor_filter(categories, negate=True, special=None): + """Build a function that takes an Extractor class as argument + and returns True if that class is allowed by 'categories' + """ + if isinstance(categories, str): + categories = categories.split(",") + + catset = set() # set of categories / basecategories + subset = set() # set of subcategories + catsub = [] # list of category-subcategory pairs + + for item in categories: + category, _, subcategory = item.partition(":") + if category and category != "*": + if subcategory and subcategory != "*": + catsub.append((category, subcategory)) + else: + catset.add(category) + elif subcategory and subcategory != "*": + subset.add(subcategory) + + if special: + catset |= special + elif not catset and not subset and not catsub: + return true if negate else false + + tests = [] + + if negate: + if catset: + tests.append(lambda extr: + extr.category not in catset and + extr.basecategory not in catset) + if subset: + tests.append(lambda extr: extr.subcategory not in subset) + else: + if catset: + tests.append(lambda extr: + extr.category in catset or + extr.basecategory in catset) + if subset: + tests.append(lambda extr: extr.subcategory in subset) + + if catsub: + def test(extr): + for category, subcategory in catsub: + if category in (extr.category, extr.basecategory) and \ + subcategory == extr.subcategory: + return not negate + return negate + tests.append(test) + + if len(tests) == 1: + return tests[0] + if negate: + return lambda extr: all(t(extr) for t in tests) + else: + return lambda extr: any(t(extr) for t in tests) + + def build_predicate(predicates): if not predicates: return lambda url, kwdict: True diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 48817be..a363a97 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.19.2" +__version__ = "1.19.3" diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py new file mode 100644 index 0000000..4266f48 --- /dev/null +++ b/gallery_dl/ytdl.py @@ -0,0 +1,513 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Helpers for interacting with youtube-dl""" + +import re +import shlex +import itertools +from . import text, util, exception + + +def construct_YoutubeDL(module, obj, user_opts, system_opts=None): + opts = argv = None + config = obj.config + + cfg = config("config-file") + if cfg: + with open(util.expand_path(cfg)) as fp: + contents = fp.read() + argv = shlex.split(contents, comments=True) + + cmd = config("cmdline-args") + if cmd: + if isinstance(cmd, str): + cmd = shlex.split(cmd) + argv = (argv + cmd) if argv else cmd + + try: + opts = parse_command_line(module, argv) if argv else user_opts + except SystemExit: + raise exception.StopExtraction("Invalid command-line option") + + if opts.get("format") is None: + opts["format"] = config("format") + if opts.get("proxy") is None: + opts["proxy"] = obj.session.proxies.get("http") + if opts.get("nopart") is None: + opts["nopart"] = not config("part", True) + if opts.get("updatetime") is None: + opts["updatetime"] = config("mtime", True) + if opts.get("ratelimit") is None: + opts["ratelimit"] = text.parse_bytes(config("rate"), None) + if opts.get("min_filesize") is None: + opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None) + if opts.get("max_filesize") is None: + opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None) + + raw_opts = config("raw-options") + if raw_opts: + opts.update(raw_opts) + if config("logging", True): + opts["logger"] = obj.log + if system_opts: + opts.update(system_opts) + + return module.YoutubeDL(opts) + + +def parse_command_line(module, argv): + parser, opts, args = module.parseOpts(argv) + + ytdlp = (module.__name__ == "yt_dlp") + std_headers = module.std_headers + parse_bytes = module.FileDownloader.parse_bytes + + # HTTP headers + if opts.user_agent is not None: + std_headers["User-Agent"] = opts.user_agent + if opts.referer is not None: + std_headers["Referer"] = opts.referer + if opts.headers: + if isinstance(opts.headers, dict): + std_headers.update(opts.headers) + else: + for h in opts.headers: + key, _, value = h.partition(":") + std_headers[key] = value + + if opts.ratelimit is not None: + opts.ratelimit = parse_bytes(opts.ratelimit) + if getattr(opts, "throttledratelimit", None) is not None: + opts.throttledratelimit = parse_bytes(opts.throttledratelimit) + if opts.min_filesize is not None: + opts.min_filesize = parse_bytes(opts.min_filesize) + if opts.max_filesize is not None: + opts.max_filesize = parse_bytes(opts.max_filesize) + if opts.max_sleep_interval is None: + opts.max_sleep_interval = opts.sleep_interval + if getattr(opts, "overwrites", None): + opts.continue_dl = False + if opts.retries is not None: + opts.retries = parse_retries(opts.retries) + if opts.fragment_retries is not None: + opts.fragment_retries = parse_retries(opts.fragment_retries) + if getattr(opts, "extractor_retries", None) is not None: + opts.extractor_retries = parse_retries(opts.extractor_retries) + if opts.buffersize is not None: + opts.buffersize = parse_bytes(opts.buffersize) + if opts.http_chunk_size is not None: + opts.http_chunk_size = parse_bytes(opts.http_chunk_size) + if opts.extractaudio: + opts.audioformat = opts.audioformat.lower() + if opts.audioquality: + opts.audioquality = opts.audioquality.strip("kK") + if opts.recodevideo is not None: + opts.recodevideo = opts.recodevideo.replace(" ", "") + if getattr(opts, "remuxvideo", None) is not None: + opts.remuxvideo = opts.remuxvideo.replace(" ", "") + + if opts.date is not None: + date = module.DateRange.day(opts.date) + else: + date = module.DateRange(opts.dateafter, opts.datebefore) + + compat_opts = getattr(opts, "compat_opts", ()) + + def _unused_compat_opt(name): + if name not in compat_opts: + return False + compat_opts.discard(name) + compat_opts.update(["*%s" % name]) + return True + + def set_default_compat( + compat_name, opt_name, default=True, remove_compat=True): + attr = getattr(opts, opt_name, None) + if compat_name in compat_opts: + if attr is None: + setattr(opts, opt_name, not default) + return True + else: + if remove_compat: + _unused_compat_opt(compat_name) + return False + elif attr is None: + setattr(opts, opt_name, default) + return None + + set_default_compat("abort-on-error", "ignoreerrors", "only_download") + set_default_compat("no-playlist-metafiles", "allow_playlist_files") + set_default_compat("no-clean-infojson", "clean_infojson") + if "format-sort" in compat_opts: + opts.format_sort.extend(module.InfoExtractor.FormatSort.ytdl_default) + _video_multistreams_set = set_default_compat( + "multistreams", "allow_multiple_video_streams", + False, remove_compat=False) + _audio_multistreams_set = set_default_compat( + "multistreams", "allow_multiple_audio_streams", + False, remove_compat=False) + if _video_multistreams_set is False and _audio_multistreams_set is False: + _unused_compat_opt("multistreams") + + if isinstance(opts.outtmpl, dict): + outtmpl = opts.outtmpl + outtmpl_default = outtmpl.get("default") + else: + opts.outtmpl = outtmpl = outtmpl_default = "" + + if "filename" in compat_opts: + if outtmpl_default is None: + outtmpl_default = outtmpl["default"] = "%(title)s-%(id)s.%(ext)s" + else: + _unused_compat_opt("filename") + + if opts.extractaudio and not opts.keepvideo and opts.format is None: + opts.format = "bestaudio/best" + + if ytdlp: + def metadataparser_actions(f): + if isinstance(f, str): + yield module.MetadataFromFieldPP.to_action(f) + else: + REPLACE = module.MetadataParserPP.Actions.REPLACE + args = f[1:] + for x in f[0].split(","): + action = [REPLACE, x] + action += args + yield action + + if getattr(opts, "parse_metadata", None) is None: + opts.parse_metadata = [] + if opts.metafromtitle is not None: + opts.parse_metadata.append("title:%s" % opts.metafromtitle) + opts.metafromtitle = None + opts.parse_metadata = list(itertools.chain.from_iterable(map( + metadataparser_actions, opts.parse_metadata))) + else: + opts.parse_metadata = () + + download_archive_fn = module.expand_path(opts.download_archive) \ + if opts.download_archive is not None else opts.download_archive + + if getattr(opts, "getcomments", None): + opts.writeinfojson = True + + if getattr(opts, "no_sponsorblock", None): + opts.sponsorblock_mark = set() + opts.sponsorblock_remove = set() + else: + opts.sponsorblock_mark = \ + getattr(opts, "sponsorblock_mark", None) or set() + opts.sponsorblock_remove = \ + getattr(opts, "sponsorblock_remove", None) or set() + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + + addchapters = getattr(opts, "addchapters", None) + if (opts.addmetadata or opts.sponsorblock_mark) and addchapters is None: + addchapters = True + opts.remove_chapters = getattr(opts, "remove_chapters", None) or () + + # PostProcessors + postprocessors = [] + if opts.metafromtitle: + postprocessors.append({ + "key": "MetadataFromTitle", + "titleformat": opts.metafromtitle, + }) + if getattr(opts, "add_postprocessors", None): + postprocessors += list(opts.add_postprocessors) + if sponsorblock_query: + postprocessors.append({ + "key": "SponsorBlock", + "categories": sponsorblock_query, + "api": opts.sponsorblock_api, + "when": "pre_process", + }) + if opts.parse_metadata: + postprocessors.append({ + "key": "MetadataParser", + "actions": opts.parse_metadata, + "when": "pre_process", + }) + if opts.convertsubtitles: + pp = {"key": "FFmpegSubtitlesConvertor", + "format": opts.convertsubtitles} + if ytdlp: + pp["when"] = "before_dl" + postprocessors.append(pp) + if getattr(opts, "convertthumbnails", None): + postprocessors.append({ + "key": "FFmpegThumbnailsConvertor", + "format": opts.convertthumbnails, + "when": "before_dl", + }) + if getattr(opts, "exec_before_dl_cmd", None): + postprocessors.append({ + "key": "Exec", + "exec_cmd": opts.exec_before_dl_cmd, + "when": "before_dl", + }) + if opts.extractaudio: + postprocessors.append({ + "key": "FFmpegExtractAudio", + "preferredcodec": opts.audioformat, + "preferredquality": opts.audioquality, + "nopostoverwrites": opts.nopostoverwrites, + }) + if getattr(opts, "remuxvideo", None): + postprocessors.append({ + "key": "FFmpegVideoRemuxer", + "preferedformat": opts.remuxvideo, + }) + if opts.recodevideo: + postprocessors.append({ + "key": "FFmpegVideoConvertor", + "preferedformat": opts.recodevideo, + }) + if opts.embedsubtitles: + pp = {"key": "FFmpegEmbedSubtitle"} + if ytdlp: + pp["already_have_subtitle"] = ( + opts.writesubtitles and "no-keep-subs" not in compat_opts) + postprocessors.append(pp) + if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts: + opts.writesubtitles = True + if opts.allsubtitles and not opts.writeautomaticsub: + opts.writesubtitles = True + remove_chapters_patterns, remove_ranges = [], [] + for regex in opts.remove_chapters: + if regex.startswith("*"): + dur = list(map(module.parse_duration, regex[1:].split("-"))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + remove_chapters_patterns.append(re.compile(regex)) + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + "key": "ModifyChapters", + "remove_chapters_patterns": remove_chapters_patterns, + "remove_sponsor_segments": opts.sponsorblock_remove, + "remove_ranges": remove_ranges, + "sponsorblock_chapter_title": opts.sponsorblock_chapter_title, + "force_keyframes": opts.force_keyframes_at_cuts, + }) + if opts.addmetadata or addchapters: + pp = {"key": "FFmpegMetadata"} + if ytdlp: + pp["add_chapters"] = addchapters + pp["add_metadata"] = opts.addmetadata + postprocessors.append(pp) + if getattr(opts, "sponskrub", False) is not False: + postprocessors.append({ + "key": "SponSkrub", + "path": opts.sponskrub_path, + "args": opts.sponskrub_args, + "cut": opts.sponskrub_cut, + "force": opts.sponskrub_force, + "ignoreerror": opts.sponskrub is None, + }) + if opts.embedthumbnail: + already_have_thumbnail = (opts.writethumbnail or + opts.write_all_thumbnails) + postprocessors.append({ + "key": "EmbedThumbnail", + "already_have_thumbnail": already_have_thumbnail, + }) + if not already_have_thumbnail: + opts.writethumbnail = True + if isinstance(opts.outtmpl, dict): + opts.outtmpl["pl_thumbnail"] = "" + if getattr(opts, "split_chapters", None): + postprocessors.append({ + "key": "FFmpegSplitChapters", + "force_keyframes": opts.force_keyframes_at_cuts, + }) + if opts.xattrs: + postprocessors.append({"key": "XAttrMetadata"}) + if opts.exec_cmd: + postprocessors.append({ + "key": "Exec", + "exec_cmd": opts.exec_cmd, + "when": "after_move", + }) + + match_filter = ( + None if opts.match_filter is None + else module.match_filter_func(opts.match_filter)) + + return { + "usenetrc": opts.usenetrc, + "netrc_location": getattr(opts, "netrc_location", None), + "username": opts.username, + "password": opts.password, + "twofactor": opts.twofactor, + "videopassword": opts.videopassword, + "ap_mso": opts.ap_mso, + "ap_username": opts.ap_username, + "ap_password": opts.ap_password, + "quiet": opts.quiet, + "no_warnings": opts.no_warnings, + "forceurl": opts.geturl, + "forcetitle": opts.gettitle, + "forceid": opts.getid, + "forcethumbnail": opts.getthumbnail, + "forcedescription": opts.getdescription, + "forceduration": opts.getduration, + "forcefilename": opts.getfilename, + "forceformat": opts.getformat, + "forceprint": getattr(opts, "forceprint", None) or (), + "force_write_download_archive": getattr( + opts, "force_write_download_archive", None), + "simulate": opts.simulate, + "skip_download": opts.skip_download, + "format": opts.format, + "allow_unplayable_formats": getattr( + opts, "allow_unplayable_formats", None), + "ignore_no_formats_error": getattr( + opts, "ignore_no_formats_error", None), + "format_sort": getattr( + opts, "format_sort", None), + "format_sort_force": getattr( + opts, "format_sort_force", None), + "allow_multiple_video_streams": opts.allow_multiple_video_streams, + "allow_multiple_audio_streams": opts.allow_multiple_audio_streams, + "check_formats": getattr( + opts, "check_formats", None), + "listformats": opts.listformats, + "listformats_table": getattr( + opts, "listformats_table", None), + "outtmpl": opts.outtmpl, + "outtmpl_na_placeholder": opts.outtmpl_na_placeholder, + "paths": getattr(opts, "paths", None), + "autonumber_size": opts.autonumber_size, + "autonumber_start": opts.autonumber_start, + "restrictfilenames": opts.restrictfilenames, + "windowsfilenames": getattr(opts, "windowsfilenames", None), + "ignoreerrors": opts.ignoreerrors, + "force_generic_extractor": opts.force_generic_extractor, + "ratelimit": opts.ratelimit, + "throttledratelimit": getattr(opts, "throttledratelimit", None), + "overwrites": getattr(opts, "overwrites", None), + "retries": opts.retries, + "fragment_retries": opts.fragment_retries, + "extractor_retries": getattr(opts, "extractor_retries", None), + "skip_unavailable_fragments": opts.skip_unavailable_fragments, + "keep_fragments": opts.keep_fragments, + "concurrent_fragment_downloads": getattr( + opts, "concurrent_fragment_downloads", None), + "buffersize": opts.buffersize, + "noresizebuffer": opts.noresizebuffer, + "http_chunk_size": opts.http_chunk_size, + "continuedl": opts.continue_dl, + "noprogress": True if opts.noprogress is None else opts.noprogress, + "playliststart": opts.playliststart, + "playlistend": opts.playlistend, + "playlistreverse": opts.playlist_reverse, + "playlistrandom": opts.playlist_random, + "noplaylist": opts.noplaylist, + "logtostderr": outtmpl_default == "-", + "consoletitle": opts.consoletitle, + "nopart": opts.nopart, + "updatetime": opts.updatetime, + "writedescription": opts.writedescription, + "writeannotations": opts.writeannotations, + "writeinfojson": opts.writeinfojson, + "allow_playlist_files": opts.allow_playlist_files, + "clean_infojson": opts.clean_infojson, + "getcomments": getattr(opts, "getcomments", None), + "writethumbnail": opts.writethumbnail, + "write_all_thumbnails": opts.write_all_thumbnails, + "writelink": getattr(opts, "writelink", None), + "writeurllink": getattr(opts, "writeurllink", None), + "writewebloclink": getattr(opts, "writewebloclink", None), + "writedesktoplink": getattr(opts, "writedesktoplink", None), + "writesubtitles": opts.writesubtitles, + "writeautomaticsub": opts.writeautomaticsub, + "allsubtitles": opts.allsubtitles, + "listsubtitles": opts.listsubtitles, + "subtitlesformat": opts.subtitlesformat, + "subtitleslangs": opts.subtitleslangs, + "matchtitle": module.decodeOption(opts.matchtitle), + "rejecttitle": module.decodeOption(opts.rejecttitle), + "max_downloads": opts.max_downloads, + "prefer_free_formats": opts.prefer_free_formats, + "trim_file_name": getattr(opts, "trim_file_name", None), + "verbose": opts.verbose, + "dump_intermediate_pages": opts.dump_intermediate_pages, + "write_pages": opts.write_pages, + "test": opts.test, + "keepvideo": opts.keepvideo, + "min_filesize": opts.min_filesize, + "max_filesize": opts.max_filesize, + "min_views": opts.min_views, + "max_views": opts.max_views, + "daterange": date, + "cachedir": opts.cachedir, + "youtube_print_sig_code": opts.youtube_print_sig_code, + "age_limit": opts.age_limit, + "download_archive": download_archive_fn, + "break_on_existing": getattr(opts, "break_on_existing", None), + "break_on_reject": getattr(opts, "break_on_reject", None), + "skip_playlist_after_errors": getattr( + opts, "skip_playlist_after_errors", None), + "cookiefile": opts.cookiefile, + "cookiesfrombrowser": getattr(opts, "cookiesfrombrowser", None), + "nocheckcertificate": opts.no_check_certificate, + "prefer_insecure": opts.prefer_insecure, + "proxy": opts.proxy, + "socket_timeout": opts.socket_timeout, + "bidi_workaround": opts.bidi_workaround, + "debug_printtraffic": opts.debug_printtraffic, + "prefer_ffmpeg": opts.prefer_ffmpeg, + "include_ads": opts.include_ads, + "default_search": opts.default_search, + "dynamic_mpd": getattr(opts, "dynamic_mpd", None), + "extractor_args": getattr(opts, "extractor_args", None), + "youtube_include_dash_manifest": getattr( + opts, "youtube_include_dash_manifest", None), + "youtube_include_hls_manifest": getattr( + opts, "youtube_include_hls_manifest", None), + "encoding": opts.encoding, + "extract_flat": opts.extract_flat, + "mark_watched": opts.mark_watched, + "merge_output_format": opts.merge_output_format, + "postprocessors": postprocessors, + "fixup": opts.fixup, + "source_address": opts.source_address, + "sleep_interval_requests": getattr( + opts, "sleep_interval_requests", None), + "sleep_interval": opts.sleep_interval, + "max_sleep_interval": opts.max_sleep_interval, + "sleep_interval_subtitles": getattr( + opts, "sleep_interval_subtitles", None), + "external_downloader": opts.external_downloader, + "playlist_items": opts.playlist_items, + "xattr_set_filesize": opts.xattr_set_filesize, + "match_filter": match_filter, + "no_color": opts.no_color, + "ffmpeg_location": opts.ffmpeg_location, + "hls_prefer_native": opts.hls_prefer_native, + "hls_use_mpegts": opts.hls_use_mpegts, + "hls_split_discontinuity": getattr( + opts, "hls_split_discontinuity", None), + "external_downloader_args": opts.external_downloader_args, + "postprocessor_args": opts.postprocessor_args, + "cn_verification_proxy": opts.cn_verification_proxy, + "geo_verification_proxy": opts.geo_verification_proxy, + "geo_bypass": opts.geo_bypass, + "geo_bypass_country": opts.geo_bypass_country, + "geo_bypass_ip_block": opts.geo_bypass_ip_block, + "compat_opts": compat_opts, + } + + +def parse_retries(retries, name=""): + if retries in ("inf", "infinite"): + return float("inf") + return int(retries) -- cgit v1.2.3