diff options
| author | 2019-07-02 04:33:45 -0400 | |
|---|---|---|
| committer | 2019-07-02 04:33:45 -0400 | |
| commit | 195c45911e79c33cf0bb986721365fb06df5a153 (patch) | |
| tree | ac0c9b6ef40bea7aa7ab0c5c3cb500eb510668fa /gallery_dl/extractor/recursive.py | |
Import Upstream version 1.8.7upstream/1.8.7
Diffstat (limited to 'gallery_dl/extractor/recursive.py')
| -rw-r--r-- | gallery_dl/extractor/recursive.py | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py new file mode 100644 index 0000000..1a793a0 --- /dev/null +++ b/gallery_dl/extractor/recursive.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015-2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Recursive extractor""" + +from .common import Extractor, Message +from .. import extractor, util +import requests +import re + + +class RecursiveExtractor(Extractor): + """Extractor that fetches URLs from a remote or local source""" + category = "recursive" + pattern = r"r(?:ecursive)?:" + test = ("recursive:https://pastebin.com/raw/FLwrCYsT", { + "url": "eee86d65c346361b818e8f4b2b307d9429f136a2", + }) + + def items(self): + blist = self.config( + "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS) + + self.session.mount("file://", FileAdapter()) + page = self.request(self.url.partition(":")[2]).text + + yield Message.Version, 1 + with extractor.blacklist(blist): + for match in re.finditer(r"https?://[^\s\"']+", page): + yield Message.Queue, match.group(0), {} + + +class FileAdapter(requests.adapters.BaseAdapter): + """Requests adapter for local files""" + + def send(self, request, **kwargs): + response = requests.Response() + try: + response.raw = open(request.url[7:], "rb") + except OSError: + import io + response.raw = io.BytesIO() + response.status_code = requests.codes.bad_request + else: + response.raw.release_conn = response.raw.close + response.status_code = requests.codes.ok + return response + + def close(self): + pass |
