diff options
Diffstat (limited to 'gallery_dl/ytdl.py')
| -rw-r--r-- | gallery_dl/ytdl.py | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py new file mode 100644 index 0000000..4266f48 --- /dev/null +++ b/gallery_dl/ytdl.py @@ -0,0 +1,513 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Helpers for interacting with youtube-dl""" + +import re +import shlex +import itertools +from . import text, util, exception + + +def construct_YoutubeDL(module, obj, user_opts, system_opts=None): + opts = argv = None + config = obj.config + + cfg = config("config-file") + if cfg: + with open(util.expand_path(cfg)) as fp: + contents = fp.read() + argv = shlex.split(contents, comments=True) + + cmd = config("cmdline-args") + if cmd: + if isinstance(cmd, str): + cmd = shlex.split(cmd) + argv = (argv + cmd) if argv else cmd + + try: + opts = parse_command_line(module, argv) if argv else user_opts + except SystemExit: + raise exception.StopExtraction("Invalid command-line option") + + if opts.get("format") is None: + opts["format"] = config("format") + if opts.get("proxy") is None: + opts["proxy"] = obj.session.proxies.get("http") + if opts.get("nopart") is None: + opts["nopart"] = not config("part", True) + if opts.get("updatetime") is None: + opts["updatetime"] = config("mtime", True) + if opts.get("ratelimit") is None: + opts["ratelimit"] = text.parse_bytes(config("rate"), None) + if opts.get("min_filesize") is None: + opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None) + if opts.get("max_filesize") is None: + opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None) + + raw_opts = config("raw-options") + if raw_opts: + opts.update(raw_opts) + if config("logging", True): + opts["logger"] = obj.log + if system_opts: + opts.update(system_opts) + + return module.YoutubeDL(opts) + + +def parse_command_line(module, argv): + parser, opts, args = module.parseOpts(argv) + + ytdlp = (module.__name__ == "yt_dlp") + std_headers = module.std_headers + parse_bytes = module.FileDownloader.parse_bytes + + # HTTP headers + if opts.user_agent is not None: + std_headers["User-Agent"] = opts.user_agent + if opts.referer is not None: + std_headers["Referer"] = opts.referer + if opts.headers: + if isinstance(opts.headers, dict): + std_headers.update(opts.headers) + else: + for h in opts.headers: + key, _, value = h.partition(":") + std_headers[key] = value + + if opts.ratelimit is not None: + opts.ratelimit = parse_bytes(opts.ratelimit) + if getattr(opts, "throttledratelimit", None) is not None: + opts.throttledratelimit = parse_bytes(opts.throttledratelimit) + if opts.min_filesize is not None: + opts.min_filesize = parse_bytes(opts.min_filesize) + if opts.max_filesize is not None: + opts.max_filesize = parse_bytes(opts.max_filesize) + if opts.max_sleep_interval is None: + opts.max_sleep_interval = opts.sleep_interval + if getattr(opts, "overwrites", None): + opts.continue_dl = False + if opts.retries is not None: + opts.retries = parse_retries(opts.retries) + if opts.fragment_retries is not None: + opts.fragment_retries = parse_retries(opts.fragment_retries) + if getattr(opts, "extractor_retries", None) is not None: + opts.extractor_retries = parse_retries(opts.extractor_retries) + if opts.buffersize is not None: + opts.buffersize = parse_bytes(opts.buffersize) + if opts.http_chunk_size is not None: + opts.http_chunk_size = parse_bytes(opts.http_chunk_size) + if opts.extractaudio: + opts.audioformat = opts.audioformat.lower() + if opts.audioquality: + opts.audioquality = opts.audioquality.strip("kK") + if opts.recodevideo is not None: + opts.recodevideo = opts.recodevideo.replace(" ", "") + if getattr(opts, "remuxvideo", None) is not None: + opts.remuxvideo = opts.remuxvideo.replace(" ", "") + + if opts.date is not None: + date = module.DateRange.day(opts.date) + else: + date = module.DateRange(opts.dateafter, opts.datebefore) + + compat_opts = getattr(opts, "compat_opts", ()) + + def _unused_compat_opt(name): + if name not in compat_opts: + return False + compat_opts.discard(name) + compat_opts.update(["*%s" % name]) + return True + + def set_default_compat( + compat_name, opt_name, default=True, remove_compat=True): + attr = getattr(opts, opt_name, None) + if compat_name in compat_opts: + if attr is None: + setattr(opts, opt_name, not default) + return True + else: + if remove_compat: + _unused_compat_opt(compat_name) + return False + elif attr is None: + setattr(opts, opt_name, default) + return None + + set_default_compat("abort-on-error", "ignoreerrors", "only_download") + set_default_compat("no-playlist-metafiles", "allow_playlist_files") + set_default_compat("no-clean-infojson", "clean_infojson") + if "format-sort" in compat_opts: + opts.format_sort.extend(module.InfoExtractor.FormatSort.ytdl_default) + _video_multistreams_set = set_default_compat( + "multistreams", "allow_multiple_video_streams", + False, remove_compat=False) + _audio_multistreams_set = set_default_compat( + "multistreams", "allow_multiple_audio_streams", + False, remove_compat=False) + if _video_multistreams_set is False and _audio_multistreams_set is False: + _unused_compat_opt("multistreams") + + if isinstance(opts.outtmpl, dict): + outtmpl = opts.outtmpl + outtmpl_default = outtmpl.get("default") + else: + opts.outtmpl = outtmpl = outtmpl_default = "" + + if "filename" in compat_opts: + if outtmpl_default is None: + outtmpl_default = outtmpl["default"] = "%(title)s-%(id)s.%(ext)s" + else: + _unused_compat_opt("filename") + + if opts.extractaudio and not opts.keepvideo and opts.format is None: + opts.format = "bestaudio/best" + + if ytdlp: + def metadataparser_actions(f): + if isinstance(f, str): + yield module.MetadataFromFieldPP.to_action(f) + else: + REPLACE = module.MetadataParserPP.Actions.REPLACE + args = f[1:] + for x in f[0].split(","): + action = [REPLACE, x] + action += args + yield action + + if getattr(opts, "parse_metadata", None) is None: + opts.parse_metadata = [] + if opts.metafromtitle is not None: + opts.parse_metadata.append("title:%s" % opts.metafromtitle) + opts.metafromtitle = None + opts.parse_metadata = list(itertools.chain.from_iterable(map( + metadataparser_actions, opts.parse_metadata))) + else: + opts.parse_metadata = () + + download_archive_fn = module.expand_path(opts.download_archive) \ + if opts.download_archive is not None else opts.download_archive + + if getattr(opts, "getcomments", None): + opts.writeinfojson = True + + if getattr(opts, "no_sponsorblock", None): + opts.sponsorblock_mark = set() + opts.sponsorblock_remove = set() + else: + opts.sponsorblock_mark = \ + getattr(opts, "sponsorblock_mark", None) or set() + opts.sponsorblock_remove = \ + getattr(opts, "sponsorblock_remove", None) or set() + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + + addchapters = getattr(opts, "addchapters", None) + if (opts.addmetadata or opts.sponsorblock_mark) and addchapters is None: + addchapters = True + opts.remove_chapters = getattr(opts, "remove_chapters", None) or () + + # PostProcessors + postprocessors = [] + if opts.metafromtitle: + postprocessors.append({ + "key": "MetadataFromTitle", + "titleformat": opts.metafromtitle, + }) + if getattr(opts, "add_postprocessors", None): + postprocessors += list(opts.add_postprocessors) + if sponsorblock_query: + postprocessors.append({ + "key": "SponsorBlock", + "categories": sponsorblock_query, + "api": opts.sponsorblock_api, + "when": "pre_process", + }) + if opts.parse_metadata: + postprocessors.append({ + "key": "MetadataParser", + "actions": opts.parse_metadata, + "when": "pre_process", + }) + if opts.convertsubtitles: + pp = {"key": "FFmpegSubtitlesConvertor", + "format": opts.convertsubtitles} + if ytdlp: + pp["when"] = "before_dl" + postprocessors.append(pp) + if getattr(opts, "convertthumbnails", None): + postprocessors.append({ + "key": "FFmpegThumbnailsConvertor", + "format": opts.convertthumbnails, + "when": "before_dl", + }) + if getattr(opts, "exec_before_dl_cmd", None): + postprocessors.append({ + "key": "Exec", + "exec_cmd": opts.exec_before_dl_cmd, + "when": "before_dl", + }) + if opts.extractaudio: + postprocessors.append({ + "key": "FFmpegExtractAudio", + "preferredcodec": opts.audioformat, + "preferredquality": opts.audioquality, + "nopostoverwrites": opts.nopostoverwrites, + }) + if getattr(opts, "remuxvideo", None): + postprocessors.append({ + "key": "FFmpegVideoRemuxer", + "preferedformat": opts.remuxvideo, + }) + if opts.recodevideo: + postprocessors.append({ + "key": "FFmpegVideoConvertor", + "preferedformat": opts.recodevideo, + }) + if opts.embedsubtitles: + pp = {"key": "FFmpegEmbedSubtitle"} + if ytdlp: + pp["already_have_subtitle"] = ( + opts.writesubtitles and "no-keep-subs" not in compat_opts) + postprocessors.append(pp) + if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts: + opts.writesubtitles = True + if opts.allsubtitles and not opts.writeautomaticsub: + opts.writesubtitles = True + remove_chapters_patterns, remove_ranges = [], [] + for regex in opts.remove_chapters: + if regex.startswith("*"): + dur = list(map(module.parse_duration, regex[1:].split("-"))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + remove_chapters_patterns.append(re.compile(regex)) + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + "key": "ModifyChapters", + "remove_chapters_patterns": remove_chapters_patterns, + "remove_sponsor_segments": opts.sponsorblock_remove, + "remove_ranges": remove_ranges, + "sponsorblock_chapter_title": opts.sponsorblock_chapter_title, + "force_keyframes": opts.force_keyframes_at_cuts, + }) + if opts.addmetadata or addchapters: + pp = {"key": "FFmpegMetadata"} + if ytdlp: + pp["add_chapters"] = addchapters + pp["add_metadata"] = opts.addmetadata + postprocessors.append(pp) + if getattr(opts, "sponskrub", False) is not False: + postprocessors.append({ + "key": "SponSkrub", + "path": opts.sponskrub_path, + "args": opts.sponskrub_args, + "cut": opts.sponskrub_cut, + "force": opts.sponskrub_force, + "ignoreerror": opts.sponskrub is None, + }) + if opts.embedthumbnail: + already_have_thumbnail = (opts.writethumbnail or + opts.write_all_thumbnails) + postprocessors.append({ + "key": "EmbedThumbnail", + "already_have_thumbnail": already_have_thumbnail, + }) + if not already_have_thumbnail: + opts.writethumbnail = True + if isinstance(opts.outtmpl, dict): + opts.outtmpl["pl_thumbnail"] = "" + if getattr(opts, "split_chapters", None): + postprocessors.append({ + "key": "FFmpegSplitChapters", + "force_keyframes": opts.force_keyframes_at_cuts, + }) + if opts.xattrs: + postprocessors.append({"key": "XAttrMetadata"}) + if opts.exec_cmd: + postprocessors.append({ + "key": "Exec", + "exec_cmd": opts.exec_cmd, + "when": "after_move", + }) + + match_filter = ( + None if opts.match_filter is None + else module.match_filter_func(opts.match_filter)) + + return { + "usenetrc": opts.usenetrc, + "netrc_location": getattr(opts, "netrc_location", None), + "username": opts.username, + "password": opts.password, + "twofactor": opts.twofactor, + "videopassword": opts.videopassword, + "ap_mso": opts.ap_mso, + "ap_username": opts.ap_username, + "ap_password": opts.ap_password, + "quiet": opts.quiet, + "no_warnings": opts.no_warnings, + "forceurl": opts.geturl, + "forcetitle": opts.gettitle, + "forceid": opts.getid, + "forcethumbnail": opts.getthumbnail, + "forcedescription": opts.getdescription, + "forceduration": opts.getduration, + "forcefilename": opts.getfilename, + "forceformat": opts.getformat, + "forceprint": getattr(opts, "forceprint", None) or (), + "force_write_download_archive": getattr( + opts, "force_write_download_archive", None), + "simulate": opts.simulate, + "skip_download": opts.skip_download, + "format": opts.format, + "allow_unplayable_formats": getattr( + opts, "allow_unplayable_formats", None), + "ignore_no_formats_error": getattr( + opts, "ignore_no_formats_error", None), + "format_sort": getattr( + opts, "format_sort", None), + "format_sort_force": getattr( + opts, "format_sort_force", None), + "allow_multiple_video_streams": opts.allow_multiple_video_streams, + "allow_multiple_audio_streams": opts.allow_multiple_audio_streams, + "check_formats": getattr( + opts, "check_formats", None), + "listformats": opts.listformats, + "listformats_table": getattr( + opts, "listformats_table", None), + "outtmpl": opts.outtmpl, + "outtmpl_na_placeholder": opts.outtmpl_na_placeholder, + "paths": getattr(opts, "paths", None), + "autonumber_size": opts.autonumber_size, + "autonumber_start": opts.autonumber_start, + "restrictfilenames": opts.restrictfilenames, + "windowsfilenames": getattr(opts, "windowsfilenames", None), + "ignoreerrors": opts.ignoreerrors, + "force_generic_extractor": opts.force_generic_extractor, + "ratelimit": opts.ratelimit, + "throttledratelimit": getattr(opts, "throttledratelimit", None), + "overwrites": getattr(opts, "overwrites", None), + "retries": opts.retries, + "fragment_retries": opts.fragment_retries, + "extractor_retries": getattr(opts, "extractor_retries", None), + "skip_unavailable_fragments": opts.skip_unavailable_fragments, + "keep_fragments": opts.keep_fragments, + "concurrent_fragment_downloads": getattr( + opts, "concurrent_fragment_downloads", None), + "buffersize": opts.buffersize, + "noresizebuffer": opts.noresizebuffer, + "http_chunk_size": opts.http_chunk_size, + "continuedl": opts.continue_dl, + "noprogress": True if opts.noprogress is None else opts.noprogress, + "playliststart": opts.playliststart, + "playlistend": opts.playlistend, + "playlistreverse": opts.playlist_reverse, + "playlistrandom": opts.playlist_random, + "noplaylist": opts.noplaylist, + "logtostderr": outtmpl_default == "-", + "consoletitle": opts.consoletitle, + "nopart": opts.nopart, + "updatetime": opts.updatetime, + "writedescription": opts.writedescription, + "writeannotations": opts.writeannotations, + "writeinfojson": opts.writeinfojson, + "allow_playlist_files": opts.allow_playlist_files, + "clean_infojson": opts.clean_infojson, + "getcomments": getattr(opts, "getcomments", None), + "writethumbnail": opts.writethumbnail, + "write_all_thumbnails": opts.write_all_thumbnails, + "writelink": getattr(opts, "writelink", None), + "writeurllink": getattr(opts, "writeurllink", None), + "writewebloclink": getattr(opts, "writewebloclink", None), + "writedesktoplink": getattr(opts, "writedesktoplink", None), + "writesubtitles": opts.writesubtitles, + "writeautomaticsub": opts.writeautomaticsub, + "allsubtitles": opts.allsubtitles, + "listsubtitles": opts.listsubtitles, + "subtitlesformat": opts.subtitlesformat, + "subtitleslangs": opts.subtitleslangs, + "matchtitle": module.decodeOption(opts.matchtitle), + "rejecttitle": module.decodeOption(opts.rejecttitle), + "max_downloads": opts.max_downloads, + "prefer_free_formats": opts.prefer_free_formats, + "trim_file_name": getattr(opts, "trim_file_name", None), + "verbose": opts.verbose, + "dump_intermediate_pages": opts.dump_intermediate_pages, + "write_pages": opts.write_pages, + "test": opts.test, + "keepvideo": opts.keepvideo, + "min_filesize": opts.min_filesize, + "max_filesize": opts.max_filesize, + "min_views": opts.min_views, + "max_views": opts.max_views, + "daterange": date, + "cachedir": opts.cachedir, + "youtube_print_sig_code": opts.youtube_print_sig_code, + "age_limit": opts.age_limit, + "download_archive": download_archive_fn, + "break_on_existing": getattr(opts, "break_on_existing", None), + "break_on_reject": getattr(opts, "break_on_reject", None), + "skip_playlist_after_errors": getattr( + opts, "skip_playlist_after_errors", None), + "cookiefile": opts.cookiefile, + "cookiesfrombrowser": getattr(opts, "cookiesfrombrowser", None), + "nocheckcertificate": opts.no_check_certificate, + "prefer_insecure": opts.prefer_insecure, + "proxy": opts.proxy, + "socket_timeout": opts.socket_timeout, + "bidi_workaround": opts.bidi_workaround, + "debug_printtraffic": opts.debug_printtraffic, + "prefer_ffmpeg": opts.prefer_ffmpeg, + "include_ads": opts.include_ads, + "default_search": opts.default_search, + "dynamic_mpd": getattr(opts, "dynamic_mpd", None), + "extractor_args": getattr(opts, "extractor_args", None), + "youtube_include_dash_manifest": getattr( + opts, "youtube_include_dash_manifest", None), + "youtube_include_hls_manifest": getattr( + opts, "youtube_include_hls_manifest", None), + "encoding": opts.encoding, + "extract_flat": opts.extract_flat, + "mark_watched": opts.mark_watched, + "merge_output_format": opts.merge_output_format, + "postprocessors": postprocessors, + "fixup": opts.fixup, + "source_address": opts.source_address, + "sleep_interval_requests": getattr( + opts, "sleep_interval_requests", None), + "sleep_interval": opts.sleep_interval, + "max_sleep_interval": opts.max_sleep_interval, + "sleep_interval_subtitles": getattr( + opts, "sleep_interval_subtitles", None), + "external_downloader": opts.external_downloader, + "playlist_items": opts.playlist_items, + "xattr_set_filesize": opts.xattr_set_filesize, + "match_filter": match_filter, + "no_color": opts.no_color, + "ffmpeg_location": opts.ffmpeg_location, + "hls_prefer_native": opts.hls_prefer_native, + "hls_use_mpegts": opts.hls_use_mpegts, + "hls_split_discontinuity": getattr( + opts, "hls_split_discontinuity", None), + "external_downloader_args": opts.external_downloader_args, + "postprocessor_args": opts.postprocessor_args, + "cn_verification_proxy": opts.cn_verification_proxy, + "geo_verification_proxy": opts.geo_verification_proxy, + "geo_bypass": opts.geo_bypass, + "geo_bypass_country": opts.geo_bypass_country, + "geo_bypass_ip_block": opts.geo_bypass_ip_block, + "compat_opts": compat_opts, + } + + +def parse_retries(retries, name=""): + if retries in ("inf", "infinite"): + return float("inf") + return int(retries) |
