aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/ytdl.py
blob: c1358967dcfb5a22a5a224b49e1b14fe5bd01b4f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# -*- coding: utf-8 -*-

# Copyright 2021-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for sites supported by youtube-dl"""

from .common import Extractor, Message
from .. import ytdl, config, exception


class YoutubeDLExtractor(Extractor):
    """Generic extractor for youtube-dl supported URLs"""
    category = "ytdl"
    directory_fmt = ("{category}", "{subcategory}")
    filename_fmt = "{title}-{id}.{extension}"
    archive_fmt = "{extractor_key} {id}"
    pattern = r"ytdl:(.*)"
    example = "ytdl:https://www.youtube.com/watch?v=abcdefghijk"

    def __init__(self, match):
        # import main youtube_dl module
        ytdl_module = ytdl.import_module(config.get(
            ("extractor", "ytdl"), "module"))
        self.ytdl_module_name = ytdl_module.__name__

        # find suitable youtube_dl extractor
        self.ytdl_url = url = match[1]
        generic = config.interpolate(("extractor", "ytdl"), "generic", True)
        if generic == "force":
            self.ytdl_ie_key = "Generic"
            self.force_generic_extractor = True
        else:
            for ie in ytdl_module.extractor.gen_extractor_classes():
                if ie.suitable(url):
                    self.ytdl_ie_key = ie.ie_key()
                    break
            if not generic and self.ytdl_ie_key == "Generic":
                raise exception.NoExtractorError()
            self.force_generic_extractor = False

        if self.ytdl_ie_key == "Generic" and config.interpolate(
                ("extractor", "ytdl"), "generic-category", True):
            # set subcategory to URL domain
            self.category = "ytdl-generic"
            self.subcategory = url[url.rfind("/", None, 8)+1:url.find("/", 8)]
        else:
            # set subcategory to youtube_dl extractor's key
            self.subcategory = self.ytdl_ie_key
        Extractor.__init__(self, match)

    def items(self):
        # import subcategory module
        ytdl_module = ytdl.import_module(
            config.get(("extractor", "ytdl", self.subcategory), "module") or
            self.ytdl_module_name)
        self.log.debug("Using %s", ytdl_module)

        # construct YoutubeDL object
        extr_opts = {
            "extract_flat"           : "in_playlist",
            "force_generic_extractor": self.force_generic_extractor,
        }
        user_opts = {
            "retries"                : self._retries,
            "socket_timeout"         : self._timeout,
            "nocheckcertificate"     : not self._verify,
        }

        if self._proxies:
            user_opts["proxy"] = self._proxies.get("http")

        username, password = self._get_auth_info()
        if username:
            user_opts["username"], user_opts["password"] = username, password
        del username, password

        ytdl_instance = ytdl.construct_YoutubeDL(
            ytdl_module, self, user_opts, extr_opts)

        # transfer cookies to ytdl
        if cookies := self.cookies:
            set_cookie = ytdl_instance.cookiejar.set_cookie
            for cookie in cookies:
                set_cookie(cookie)

        # extract youtube_dl info_dict
        try:
            info_dict = ytdl_instance._YoutubeDL__extract_info(
                self.ytdl_url,
                ytdl_instance.get_info_extractor(self.ytdl_ie_key),
                False, {}, True)
        #  except ytdl_module.utils.YoutubeDLError:
        #     raise exception.AbortExtraction("Failed to extract video data")
        except Exception as exc:
            raise exception.AbortExtraction(
                f"Failed to extract video data "
                f"({exc.__class__.__name__}: {exc})")

        if not info_dict:
            return
        elif "entries" in info_dict:
            results = self._process_entries(
                ytdl_module, ytdl_instance, info_dict["entries"])
        else:
            results = (info_dict,)

        # yield results
        for info_dict in results:
            info_dict["extension"] = None
            info_dict["_ytdl_info_dict"] = info_dict
            info_dict["_ytdl_instance"] = ytdl_instance

            url = "ytdl:" + (info_dict.get("url") or
                             info_dict.get("webpage_url") or
                             self.ytdl_url)

            yield Message.Directory, "", info_dict
            yield Message.Url, url, info_dict

    def _process_entries(self, ytdl_module, ytdl_instance, entries):
        for entry in entries:
            if not entry:
                continue

            if entry.get("_type") in ("url", "url_transparent"):
                try:
                    entry = ytdl_instance.extract_info(
                        entry["url"], False,
                        ie_key=entry.get("ie_key"))
                except ytdl_module.utils.YoutubeDLError:
                    continue
                if not entry:
                    continue

            if "entries" in entry:
                yield from self._process_entries(
                    ytdl_module, ytdl_instance, entry["entries"])
            else:
                yield entry


if config.get(("extractor", "ytdl"), "enabled"):
    # make 'ytdl:' prefix optional
    YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)"