aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/recursive.py
blob: 1a793a0d77b18d77dc6e9e41e07549953d7d1708 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# -*- coding: utf-8 -*-

# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Recursive extractor"""

from .common import Extractor, Message
from .. import extractor, util
import requests
import re


class RecursiveExtractor(Extractor):
    """Extractor that fetches URLs from a remote or local source"""
    category = "recursive"
    pattern = r"r(?:ecursive)?:"
    test = ("recursive:https://pastebin.com/raw/FLwrCYsT", {
        "url": "eee86d65c346361b818e8f4b2b307d9429f136a2",
    })

    def items(self):
        blist = self.config(
            "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS)

        self.session.mount("file://", FileAdapter())
        page = self.request(self.url.partition(":")[2]).text

        yield Message.Version, 1
        with extractor.blacklist(blist):
            for match in re.finditer(r"https?://[^\s\"']+", page):
                yield Message.Queue, match.group(0), {}


class FileAdapter(requests.adapters.BaseAdapter):
    """Requests adapter for local files"""

    def send(self, request, **kwargs):
        response = requests.Response()
        try:
            response.raw = open(request.url[7:], "rb")
        except OSError:
            import io
            response.raw = io.BytesIO()
            response.status_code = requests.codes.bad_request
        else:
            response.raw.release_conn = response.raw.close
            response.status_code = requests.codes.ok
        return response

    def close(self):
        pass