summaryrefslogtreecommitdiffstats
path: root/gallery_dl/downloader/http.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/downloader/http.py')
-rw-r--r--gallery_dl/downloader/http.py128
1 files changed, 128 insertions, 0 deletions
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
new file mode 100644
index 0000000..961c1a2
--- /dev/null
+++ b/gallery_dl/downloader/http.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2014-2018 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Downloader module for http:// and https:// URLs"""
+
+import time
+import mimetypes
+from requests.exceptions import ConnectionError, Timeout
+from .common import DownloaderBase
+from .. import text, exception
+
+
+class HttpDownloader(DownloaderBase):
+ scheme = "http"
+
+ def __init__(self, extractor, output):
+ DownloaderBase.__init__(self, extractor, output)
+ self.response = None
+ self.retries = self.config("retries", extractor._retries)
+ self.timeout = self.config("timeout", extractor._timeout)
+ self.verify = self.config("verify", extractor._verify)
+ self.rate = self.config("rate")
+ self.chunk_size = 16384
+
+ if self.rate:
+ self.rate = text.parse_bytes(self.rate)
+ if not self.rate:
+ self.log.warning("Invalid rate limit specified")
+ elif self.rate < self.chunk_size:
+ self.chunk_size = self.rate
+
+ def connect(self, url, offset):
+ headers = {}
+ if offset:
+ headers["Range"] = "bytes={}-".format(offset)
+
+ try:
+ self.response = self.session.request(
+ "GET", url, stream=True, headers=headers, allow_redirects=True,
+ timeout=self.timeout, verify=self.verify)
+ except (ConnectionError, Timeout) as exc:
+ raise exception.DownloadRetry(exc)
+
+ code = self.response.status_code
+ if code == 200: # OK
+ offset = 0
+ size = self.response.headers.get("Content-Length")
+ elif code == 206: # Partial Content
+ size = self.response.headers["Content-Range"].rpartition("/")[2]
+ elif code == 416: # Requested Range Not Satisfiable
+ raise exception.DownloadComplete()
+ elif code == 429 or 500 <= code < 600: # Server Error
+ raise exception.DownloadRetry(
+ "{} Server Error: {} for url: {}".format(
+ code, self.response.reason, url))
+ else:
+ self.response.raise_for_status()
+
+ return offset, text.parse_int(size)
+
+ def receive(self, file):
+ if self.rate:
+ total = 0 # total amount of bytes received
+ start = time.time() # start time
+
+ for data in self.response.iter_content(self.chunk_size):
+ file.write(data)
+
+ if self.rate:
+ total += len(data)
+ expected = total / self.rate # expected elapsed time
+ delta = time.time() - start # actual elapsed time since start
+ if delta < expected:
+ # sleep if less time passed than expected
+ time.sleep(expected - delta)
+
+ def reset(self):
+ if self.response:
+ self.response.close()
+ self.response = None
+
+ def get_extension(self):
+ mtype = self.response.headers.get("Content-Type", "image/jpeg")
+ mtype = mtype.partition(";")[0]
+
+ if mtype in MIMETYPE_MAP:
+ return MIMETYPE_MAP[mtype]
+
+ exts = mimetypes.guess_all_extensions(mtype, strict=False)
+ if exts:
+ exts.sort()
+ return exts[-1][1:]
+
+ self.log.warning(
+ "No filename extension found for MIME type '%s'", mtype)
+ return "txt"
+
+
+MIMETYPE_MAP = {
+ "image/jpeg": "jpg",
+ "image/jpg": "jpg",
+ "image/png": "png",
+ "image/gif": "gif",
+ "image/bmp": "bmp",
+ "image/webp": "webp",
+ "image/svg+xml": "svg",
+
+ "video/webm": "webm",
+ "video/ogg": "ogg",
+ "video/mp4": "mp4",
+
+ "audio/wav": "wav",
+ "audio/x-wav": "wav",
+ "audio/webm": "webm",
+ "audio/ogg": "ogg",
+ "audio/mpeg": "mp3",
+
+ "application/ogg": "ogg",
+ "application/octet-stream": "bin",
+}
+
+
+__downloader__ = HttpDownloader