summaryrefslogtreecommitdiffstats
path: root/gallery_dl/downloader/http.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/downloader/http.py')
-rw-r--r--gallery_dl/downloader/http.py139
1 files changed, 83 insertions, 56 deletions
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 5622462..26eb7b5 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -27,10 +27,11 @@ class HttpDownloader(DownloaderBase):
def __init__(self, job):
DownloaderBase.__init__(self, job)
extractor = job.extractor
- self.chunk_size = 16384
self.downloading = False
self.adjust_extension = self.config("adjust-extensions", True)
+ self.chunk_size = self.config("chunk-size", 32768)
+ self.metadata = extractor.config("http-metadata")
self.progress = self.config("progress", 3.0)
self.headers = self.config("headers")
self.minsize = self.config("filesize-min")
@@ -55,6 +56,13 @@ class HttpDownloader(DownloaderBase):
self.log.warning(
"Invalid maximum file size (%r)", self.maxsize)
self.maxsize = maxsize
+ if isinstance(self.chunk_size, str):
+ chunk_size = text.parse_bytes(self.chunk_size)
+ if not chunk_size:
+ self.log.warning(
+ "Invalid chunk size (%r)", self.chunk_size)
+ chunk_size = 32768
+ self.chunk_size = chunk_size
if self.rate:
rate = text.parse_bytes(self.rate)
if rate:
@@ -83,11 +91,12 @@ class HttpDownloader(DownloaderBase):
tries = 0
msg = ""
+ metadata = self.metadata
kwdict = pathfmt.kwdict
adjust_extension = kwdict.get(
"_http_adjust_extension", self.adjust_extension)
- if self.part:
+ if self.part and not metadata:
pathfmt.part_enable(self.partdir)
while True:
@@ -164,13 +173,6 @@ class HttpDownloader(DownloaderBase):
self.log.warning("Invalid response")
return False
- # set missing filename extension from MIME type
- if not pathfmt.extension:
- pathfmt.set_extension(self._find_extension(response))
- if pathfmt.exists():
- pathfmt.temppath = ""
- return True
-
# check file size
size = text.parse_int(size, None)
if size is not None:
@@ -185,11 +187,33 @@ class HttpDownloader(DownloaderBase):
size, self.maxsize)
return False
+ build_path = False
+
+ # set missing filename extension from MIME type
+ if not pathfmt.extension:
+ pathfmt.set_extension(self._find_extension(response))
+ build_path = True
+
+ # set metadata from HTTP headers
+ if metadata:
+ kwdict[metadata] = util.extract_headers(response)
+ build_path = True
+
+ # build and check file path
+ if build_path:
+ pathfmt.build_path()
+ if pathfmt.exists():
+ pathfmt.temppath = ""
+ return True
+ if self.part and metadata:
+ pathfmt.part_enable(self.partdir)
+ metadata = False
+
content = response.iter_content(self.chunk_size)
# check filename extension against file header
if adjust_extension and not offset and \
- pathfmt.extension in FILE_SIGNATURES:
+ pathfmt.extension in SIGNATURE_CHECKS:
try:
file_header = next(
content if response.raw.chunked
@@ -220,7 +244,7 @@ class HttpDownloader(DownloaderBase):
offset += len(file_header)
elif offset:
if adjust_extension and \
- pathfmt.extension in FILE_SIGNATURES:
+ pathfmt.extension in SIGNATURE_CHECKS:
self._adjust_extension(pathfmt, fp.read(16))
fp.seek(offset)
@@ -250,42 +274,38 @@ class HttpDownloader(DownloaderBase):
return True
@staticmethod
- def receive(fp, content, bytes_total, bytes_downloaded):
+ def receive(fp, content, bytes_total, bytes_start):
write = fp.write
for data in content:
write(data)
- def _receive_rate(self, fp, content, bytes_total, bytes_downloaded):
+ def _receive_rate(self, fp, content, bytes_total, bytes_start):
rate = self.rate
- progress = self.progress
- bytes_start = bytes_downloaded
write = fp.write
- t1 = tstart = time.time()
+ progress = self.progress
+
+ bytes_downloaded = 0
+ time_start = time.time()
for data in content:
- write(data)
+ time_current = time.time()
+ time_elapsed = time_current - time_start
+ bytes_downloaded += len(data)
- t2 = time.time() # current time
- elapsed = t2 - t1 # elapsed time
- num_bytes = len(data)
+ write(data)
if progress is not None:
- bytes_downloaded += num_bytes
- tdiff = t2 - tstart
- if tdiff >= progress:
+ if time_elapsed >= progress:
self.out.progress(
- bytes_total, bytes_downloaded,
- int((bytes_downloaded - bytes_start) / tdiff),
+ bytes_total,
+ bytes_start + bytes_downloaded,
+ int(bytes_downloaded / time_elapsed),
)
if rate:
- expected = num_bytes / rate # expected elapsed time
- if elapsed < expected:
- # sleep if less time elapsed than expected
- time.sleep(expected - elapsed)
- t2 = time.time()
-
- t1 = t2
+ time_expected = bytes_downloaded / rate
+ if time_expected > time_elapsed:
+ time.sleep(time_expected - time_elapsed)
def _find_extension(self, response):
"""Get filename extension from MIME type"""
@@ -308,11 +328,11 @@ class HttpDownloader(DownloaderBase):
@staticmethod
def _adjust_extension(pathfmt, file_header):
"""Check filename extension against file header"""
- sig = FILE_SIGNATURES[pathfmt.extension]
- if not file_header.startswith(sig):
- for ext, sig in FILE_SIGNATURES.items():
- if file_header.startswith(sig):
+ if not SIGNATURE_CHECKS[pathfmt.extension](file_header):
+ for ext, check in SIGNATURE_CHECKS.items():
+ if check(file_header):
pathfmt.set_extension(ext)
+ pathfmt.build_path()
return True
return False
@@ -326,6 +346,7 @@ MIME_TYPES = {
"image/x-bmp" : "bmp",
"image/x-ms-bmp": "bmp",
"image/webp" : "webp",
+ "image/avif" : "avif",
"image/svg+xml" : "svg",
"image/ico" : "ico",
"image/icon" : "ico",
@@ -362,27 +383,33 @@ MIME_TYPES = {
}
# https://en.wikipedia.org/wiki/List_of_file_signatures
-FILE_SIGNATURES = {
- "jpg" : b"\xFF\xD8\xFF",
- "png" : b"\x89PNG\r\n\x1A\n",
- "gif" : (b"GIF87a", b"GIF89a"),
- "bmp" : b"BM",
- "webp": b"RIFF",
- "svg" : b"<?xml",
- "ico" : b"\x00\x00\x01\x00",
- "cur" : b"\x00\x00\x02\x00",
- "psd" : b"8BPS",
- "webm": b"\x1A\x45\xDF\xA3",
- "ogg" : b"OggS",
- "wav" : b"RIFF",
- "mp3" : (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2", b"ID3"),
- "zip" : (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
- "rar" : b"\x52\x61\x72\x21\x1A\x07",
- "7z" : b"\x37\x7A\xBC\xAF\x27\x1C",
- "pdf" : b"%PDF-",
- "swf" : (b"CWS", b"FWS"),
+SIGNATURE_CHECKS = {
+ "jpg" : lambda s: s[0:3] == b"\xFF\xD8\xFF",
+ "png" : lambda s: s[0:8] == b"\x89PNG\r\n\x1A\n",
+ "gif" : lambda s: s[0:6] in (b"GIF87a", b"GIF89a"),
+ "bmp" : lambda s: s[0:2] == b"BM",
+ "webp": lambda s: (s[0:4] == b"RIFF" and
+ s[8:12] == b"WEBP"),
+ "avif": lambda s: s[4:11] == b"ftypavi" and s[11] in b"fs",
+ "svg" : lambda s: s[0:5] == b"<?xml",
+ "ico" : lambda s: s[0:4] == b"\x00\x00\x01\x00",
+ "cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
+ "psd" : lambda s: s[0:4] == b"8BPS",
+ "mp4" : lambda s: (s[4:8] == b"ftyp" and s[8:11] in (
+ b"mp4", b"avc", b"iso", b"M4V")),
+ "webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
+ "ogg" : lambda s: s[0:4] == b"OggS",
+ "wav" : lambda s: (s[0:4] == b"RIFF" and
+ s[8:12] == b"WAVE"),
+ "mp3" : lambda s: (s[0:3] == b"ID3" or
+ s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")),
+ "zip" : lambda s: s[0:4] in (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
+ "rar" : lambda s: s[0:6] == b"Rar!\x1A\x07",
+ "7z" : lambda s: s[0:6] == b"\x37\x7A\xBC\xAF\x27\x1C",
+ "pdf" : lambda s: s[0:5] == b"%PDF-",
+ "swf" : lambda s: s[0:3] in (b"CWS", b"FWS"),
# check 'bin' files against all other file signatures
- "bin" : b"\x00\x00\x00\x00\x00\x00\x00\x00",
+ "bin" : lambda s: False,
}
__downloader__ = HttpDownloader