summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/recursive.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/recursive.py')
-rw-r--r--gallery_dl/extractor/recursive.py37
1 files changed, 9 insertions, 28 deletions
diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py
index 4dd9d5c..4156484 100644
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,6 @@
"""Recursive extractor"""
from .common import Extractor, Message
-import requests
import re
@@ -17,34 +16,16 @@ class RecursiveExtractor(Extractor):
"""Extractor that fetches URLs from a remote or local source"""
category = "recursive"
pattern = r"r(?:ecursive)?:"
- test = ("recursive:https://pastebin.com/raw/FLwrCYsT", {
- "url": "eee86d65c346361b818e8f4b2b307d9429f136a2",
- })
+ example = "recursive:https://pastebin.com/raw/FLwrCYsT"
def items(self):
- self.session.mount("file://", FileAdapter())
- page = self.request(self.url.partition(":")[2]).text
- del self.session.adapters["file://"]
+ url = self.url.partition(":")[2]
- for match in re.finditer(r"https?://[^\s\"']+", page):
- yield Message.Queue, match.group(0), {}
-
-
-class FileAdapter(requests.adapters.BaseAdapter):
- """Requests adapter for local files"""
-
- def send(self, request, **kwargs):
- response = requests.Response()
- try:
- response.raw = open(request.url[7:], "rb")
- except OSError:
- import io
- response.raw = io.BytesIO()
- response.status_code = requests.codes.bad_request
+ if url.startswith("file://"):
+ with open(url[7:]) as fp:
+ page = fp.read()
else:
- response.raw.release_conn = response.raw.close
- response.status_code = requests.codes.ok
- return response
+ page = self.request(url).text
- def close(self):
- pass
+ for match in re.finditer(r"https?://[^\s\"']+", page):
+ yield Message.Queue, match.group(0), {}