diff options
Diffstat (limited to 'gallery_dl/downloader/http.py')
| -rw-r--r-- | gallery_dl/downloader/http.py | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py new file mode 100644 index 0000000..961c1a2 --- /dev/null +++ b/gallery_dl/downloader/http.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014-2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Downloader module for http:// and https:// URLs""" + +import time +import mimetypes +from requests.exceptions import ConnectionError, Timeout +from .common import DownloaderBase +from .. import text, exception + + +class HttpDownloader(DownloaderBase): + scheme = "http" + + def __init__(self, extractor, output): + DownloaderBase.__init__(self, extractor, output) + self.response = None + self.retries = self.config("retries", extractor._retries) + self.timeout = self.config("timeout", extractor._timeout) + self.verify = self.config("verify", extractor._verify) + self.rate = self.config("rate") + self.chunk_size = 16384 + + if self.rate: + self.rate = text.parse_bytes(self.rate) + if not self.rate: + self.log.warning("Invalid rate limit specified") + elif self.rate < self.chunk_size: + self.chunk_size = self.rate + + def connect(self, url, offset): + headers = {} + if offset: + headers["Range"] = "bytes={}-".format(offset) + + try: + self.response = self.session.request( + "GET", url, stream=True, headers=headers, allow_redirects=True, + timeout=self.timeout, verify=self.verify) + except (ConnectionError, Timeout) as exc: + raise exception.DownloadRetry(exc) + + code = self.response.status_code + if code == 200: # OK + offset = 0 + size = self.response.headers.get("Content-Length") + elif code == 206: # Partial Content + size = self.response.headers["Content-Range"].rpartition("/")[2] + elif code == 416: # Requested Range Not Satisfiable + raise exception.DownloadComplete() + elif code == 429 or 500 <= code < 600: # Server Error + raise exception.DownloadRetry( + "{} Server Error: {} for url: {}".format( + code, self.response.reason, url)) + else: + self.response.raise_for_status() + + return offset, text.parse_int(size) + + def receive(self, file): + if self.rate: + total = 0 # total amount of bytes received + start = time.time() # start time + + for data in self.response.iter_content(self.chunk_size): + file.write(data) + + if self.rate: + total += len(data) + expected = total / self.rate # expected elapsed time + delta = time.time() - start # actual elapsed time since start + if delta < expected: + # sleep if less time passed than expected + time.sleep(expected - delta) + + def reset(self): + if self.response: + self.response.close() + self.response = None + + def get_extension(self): + mtype = self.response.headers.get("Content-Type", "image/jpeg") + mtype = mtype.partition(";")[0] + + if mtype in MIMETYPE_MAP: + return MIMETYPE_MAP[mtype] + + exts = mimetypes.guess_all_extensions(mtype, strict=False) + if exts: + exts.sort() + return exts[-1][1:] + + self.log.warning( + "No filename extension found for MIME type '%s'", mtype) + return "txt" + + +MIMETYPE_MAP = { + "image/jpeg": "jpg", + "image/jpg": "jpg", + "image/png": "png", + "image/gif": "gif", + "image/bmp": "bmp", + "image/webp": "webp", + "image/svg+xml": "svg", + + "video/webm": "webm", + "video/ogg": "ogg", + "video/mp4": "mp4", + + "audio/wav": "wav", + "audio/x-wav": "wav", + "audio/webm": "webm", + "audio/ogg": "ogg", + "audio/mpeg": "mp3", + + "application/ogg": "ogg", + "application/octet-stream": "bin", +} + + +__downloader__ = HttpDownloader |
