diff options
| author | 2021-08-04 02:14:44 -0400 | |
|---|---|---|
| committer | 2021-08-04 02:14:44 -0400 | |
| commit | 873d9a628e9412a79bdc64cd962470749de3425b (patch) | |
| tree | 8cd421ef79a9fa784147fa888543216f0872357b /gallery_dl/extractor/ytdl.py | |
| parent | 32de2b06db501c7de81678bce8e3e0c3e63d340c (diff) | |
New upstream version 1.18.2.upstream/1.18.2
Diffstat (limited to 'gallery_dl/extractor/ytdl.py')
| -rw-r--r-- | gallery_dl/extractor/ytdl.py | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py new file mode 100644 index 0000000..d380dab --- /dev/null +++ b/gallery_dl/extractor/ytdl.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for sites supported by youtube-dl""" + +from .common import Extractor, Message +from .. import text, config, exception + + +class YoutubeDLExtractor(Extractor): + """Generic extractor for youtube-dl supported URLs""" + category = "ytdl" + directory_fmt = ("{category}", "{subcategory}") + filename_fmt = "{title}-{id}.{extension}" + archive_fmt = "{extractor_key} {id}" + pattern = r"ytdl:(.*)" + test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",) + + def __init__(self, match): + # import main youtube_dl module + module_name = self.ytdl_module_name = config.get( + ("extractor", "ytdl"), "module") or "youtube_dl" + module = __import__(module_name) + + # find suitable youtube_dl extractor + self.ytdl_url = url = match.group(1) + generic = config.interpolate(("extractor", "ytdl"), "generic", True) + if generic == "force": + self.ytdl_ie_key = "Generic" + self.force_generic_extractor = True + else: + for ie in module.extractor.gen_extractor_classes(): + if ie.suitable(url): + self.ytdl_ie_key = ie.ie_key() + break + if not generic and self.ytdl_ie_key == "Generic": + raise exception.NoExtractorError() + self.force_generic_extractor = False + + # set subcategory to youtube_dl extractor's key + self.subcategory = self.ytdl_ie_key + Extractor.__init__(self, match) + + def items(self): + # import subcategory module + ytdl_module = __import__( + config.get(("extractor", "ytdl", self.subcategory), "module") or + self.ytdl_module_name) + self.log.debug("Using %s", ytdl_module) + + # construct YoutubeDL object + options = { + "format" : self.config("format"), + "retries" : self._retries, + "socket_timeout" : self._timeout, + "nocheckcertificate" : not self._verify, + "proxy" : self.session.proxies.get("http"), + "force_generic_extractor": self.force_generic_extractor, + "nopart" : not self.config("part", True), + "updatetime" : self.config("mtime", True), + "ratelimit" : text.parse_bytes( + self.config("rate"), None), + "min_filesize" : text.parse_bytes( + self.config("filesize-min"), None), + "max_filesize" : text.parse_bytes( + self.config("filesize-max"), None), + } + + raw_options = self.config("raw-options") + if raw_options: + options.update(raw_options) + if self.config("logging", True): + options["logger"] = self.log + options["extract_flat"] = "in_playlist" + + username, password = self._get_auth_info() + if username: + options["username"], options["password"] = username, password + del username, password + + ytdl = ytdl_module.YoutubeDL(options) + + # transfer cookies to ytdl + cookies = self.session.cookies + if cookies: + set_cookie = self.ytdl.cookiejar.set_cookie + for cookie in self.session.cookies: + set_cookie(cookie) + + # extract youtube_dl info_dict + info_dict = ytdl._YoutubeDL__extract_info( + self.ytdl_url, + ytdl.get_info_extractor(self.ytdl_ie_key), + False, {}, True) + + if "entries" in info_dict: + results = self._process_entries(ytdl, info_dict["entries"]) + else: + results = (info_dict,) + + # yield results + for info_dict in results: + info_dict["extension"] = None + info_dict["_ytdl_info_dict"] = info_dict + info_dict["_ytdl_instance"] = ytdl + + url = "ytdl:" + (info_dict.get("url") or + info_dict.get("webpage_url") or + self.ytdl_url) + + yield Message.Directory, info_dict + yield Message.Url, url, info_dict + + def _process_entries(self, ytdl, entries): + for entry in entries: + if entry.get("_type") in ("url", "url_transparent"): + info_dict = ytdl.extract_info( + entry["url"], False, + ie_key=entry.get("ie_key")) + if "entries" in info_dict: + yield from self._process_entries( + ytdl, info_dict["entries"]) + else: + yield info_dict + else: + yield entry + + +if config.get(("extractor", "ytdl"), "enabled"): + # make 'ytdl:' prefix optional + YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)" |
