summaryrefslogtreecommitdiffstats
path: root/gallery_dl/downloader
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-07-20 05:51:44 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-07-20 05:51:44 -0400
commit2a63a9c9b7032a76894c48ac4d9cea732fcaee49 (patch)
tree3d5f633ff69cd393036a3dabc4d4533c8484f9ad /gallery_dl/downloader
parent195c45911e79c33cf0bb986721365fb06df5a153 (diff)
New upstream version 1.9.0upstream/1.9.0
Diffstat (limited to 'gallery_dl/downloader')
-rw-r--r--gallery_dl/downloader/__init__.py21
-rw-r--r--gallery_dl/downloader/common.py142
-rw-r--r--gallery_dl/downloader/http.py197
-rw-r--r--gallery_dl/downloader/text.py27
-rw-r--r--gallery_dl/downloader/ytdl.py7
5 files changed, 189 insertions, 205 deletions
diff --git a/gallery_dl/downloader/__init__.py b/gallery_dl/downloader/__init__.py
index 97972cd..6fb09e1 100644
--- a/gallery_dl/downloader/__init__.py
+++ b/gallery_dl/downloader/__init__.py
@@ -22,15 +22,24 @@ def find(scheme):
try:
return _cache[scheme]
except KeyError:
- klass = None
+ pass
+
+ klass = None
+ if scheme == "https":
+ scheme = "http"
+ if scheme in modules: # prevent unwanted imports
try:
- if scheme in modules: # prevent unwanted imports
- module = importlib.import_module("." + scheme, __package__)
- klass = module.__downloader__
- except (ImportError, AttributeError, TypeError):
+ module = importlib.import_module("." + scheme, __package__)
+ except ImportError:
pass
+ else:
+ klass = module.__downloader__
+
+ if scheme == "http":
+ _cache["http"] = _cache["https"] = klass
+ else:
_cache[scheme] = klass
- return klass
+ return klass
# --------------------------------------------------------------------
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index 4803c85..6e5cd4c 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2018 Mike Fährmann
+# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,23 +9,18 @@
"""Common classes and constants used by downloader modules."""
import os
-import time
import logging
-from .. import config, util, exception
-from requests.exceptions import RequestException
-from ssl import SSLError
+from .. import config, util
class DownloaderBase():
"""Base class for downloaders"""
scheme = ""
- retries = 1
def __init__(self, extractor, output):
self.session = extractor.session
self.out = output
self.log = logging.getLogger("downloader." + self.scheme)
- self.downloading = False
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
@@ -34,137 +29,8 @@ class DownloaderBase():
os.makedirs(self.partdir, exist_ok=True)
def config(self, key, default=None):
- """Interpolate config value for 'key'"""
+ """Interpolate downloader config value for 'key'"""
return config.interpolate(("downloader", self.scheme, key), default)
def download(self, url, pathfmt):
- """Download the resource at 'url' and write it to a file-like object"""
- try:
- return self.download_impl(url, pathfmt)
- except Exception:
- print()
- raise
- finally:
- # remove file from incomplete downloads
- if self.downloading and not self.part:
- try:
- os.remove(pathfmt.temppath)
- except (OSError, AttributeError):
- pass
-
- def download_impl(self, url, pathfmt):
- """Actual implementaion of the download process"""
- adj_ext = None
- tries = 0
- msg = ""
-
- if self.part:
- pathfmt.part_enable(self.partdir)
-
- while True:
- self.reset()
- if tries:
- self.log.warning("%s (%d/%d)", msg, tries, self.retries)
- if tries >= self.retries:
- return False
- time.sleep(tries)
- tries += 1
-
- # check for .part file
- filesize = pathfmt.part_size()
-
- # connect to (remote) source
- try:
- offset, size = self.connect(url, filesize)
- except exception.DownloadRetry as exc:
- msg = exc
- continue
- except exception.DownloadComplete:
- break
- except Exception as exc:
- self.log.warning(exc)
- return False
-
- # check response
- if not offset:
- mode = "w+b"
- if filesize:
- self.log.info("Unable to resume partial download")
- else:
- mode = "r+b"
- self.log.info("Resuming download at byte %d", offset)
-
- # set missing filename extension
- if not pathfmt.has_extension:
- pathfmt.set_extension(self.get_extension())
- if pathfmt.exists():
- pathfmt.temppath = ""
- return True
-
- self.out.start(pathfmt.path)
- self.downloading = True
- with pathfmt.open(mode) as file:
- if offset:
- file.seek(offset)
-
- # download content
- try:
- self.receive(file)
- except (RequestException, SSLError) as exc:
- msg = exc
- print()
- continue
-
- # check filesize
- if size and file.tell() < size:
- msg = "filesize mismatch ({} < {})".format(
- file.tell(), size)
- continue
-
- # check filename extension
- adj_ext = self._check_extension(file, pathfmt)
-
- break
-
- self.downloading = False
- if adj_ext:
- pathfmt.set_extension(adj_ext)
- return True
-
- def connect(self, url, offset):
- """Connect to 'url' while respecting 'offset' if possible
-
- Returns a 2-tuple containing the actual offset and expected filesize.
- If the returned offset-value is greater than zero, all received data
- will be appended to the existing .part file.
- Return '0' as second tuple-field to indicate an unknown filesize.
- """
-
- def receive(self, file):
- """Write data to 'file'"""
-
- def reset(self):
- """Reset internal state / cleanup"""
-
- def get_extension(self):
- """Return a filename extension appropriate for the current request"""
-
- @staticmethod
- def _check_extension(file, pathfmt):
- """Check filename extension against fileheader"""
- extension = pathfmt.keywords["extension"]
- if extension in FILETYPE_CHECK:
- file.seek(0)
- header = file.read(8)
- if len(header) >= 8 and not FILETYPE_CHECK[extension](header):
- for ext, check in FILETYPE_CHECK.items():
- if ext != extension and check(header):
- return ext
- return None
-
-
-FILETYPE_CHECK = {
- "jpg": lambda h: h[0:2] == b"\xff\xd8",
- "png": lambda h: h[0:8] == b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a",
- "gif": lambda h: h[0:4] == b"GIF8" and h[5] == 97,
-}
+ """Write data from 'url' into the file specified by 'pathfmt'"""
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 961c1a2..7a95191 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2018 Mike Fährmann
+# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,11 +8,17 @@
"""Downloader module for http:// and https:// URLs"""
+import os
import time
import mimetypes
-from requests.exceptions import ConnectionError, Timeout
+from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
-from .. import text, exception
+from .. import text
+
+try:
+ from OpenSSL.SSL import Error as SSLError
+except ImportError:
+ from ssl import SSLError
class HttpDownloader(DownloaderBase):
@@ -20,13 +26,16 @@ class HttpDownloader(DownloaderBase):
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
- self.response = None
self.retries = self.config("retries", extractor._retries)
self.timeout = self.config("timeout", extractor._timeout)
self.verify = self.config("verify", extractor._verify)
+ self.mtime = self.config("mtime", True)
self.rate = self.config("rate")
+ self.downloading = False
self.chunk_size = 16384
+ if self.retries < 0:
+ self.retries = float("inf")
if self.rate:
self.rate = text.parse_bytes(self.rate)
if not self.rate:
@@ -34,41 +43,132 @@ class HttpDownloader(DownloaderBase):
elif self.rate < self.chunk_size:
self.chunk_size = self.rate
- def connect(self, url, offset):
- headers = {}
- if offset:
- headers["Range"] = "bytes={}-".format(offset)
-
+ def download(self, url, pathfmt):
try:
- self.response = self.session.request(
- "GET", url, stream=True, headers=headers, allow_redirects=True,
- timeout=self.timeout, verify=self.verify)
- except (ConnectionError, Timeout) as exc:
- raise exception.DownloadRetry(exc)
-
- code = self.response.status_code
- if code == 200: # OK
- offset = 0
- size = self.response.headers.get("Content-Length")
- elif code == 206: # Partial Content
- size = self.response.headers["Content-Range"].rpartition("/")[2]
- elif code == 416: # Requested Range Not Satisfiable
- raise exception.DownloadComplete()
- elif code == 429 or 500 <= code < 600: # Server Error
- raise exception.DownloadRetry(
- "{} Server Error: {} for url: {}".format(
- code, self.response.reason, url))
- else:
- self.response.raise_for_status()
-
- return offset, text.parse_int(size)
-
- def receive(self, file):
+ return self._download_impl(url, pathfmt)
+ except Exception:
+ print()
+ raise
+ finally:
+ # remove file from incomplete downloads
+ if self.downloading and not self.part:
+ try:
+ os.unlink(pathfmt.temppath)
+ except (OSError, AttributeError):
+ pass
+
+ def _download_impl(self, url, pathfmt):
+ response = None
+ adj_ext = None
+ tries = 0
+ msg = ""
+
+ if self.part:
+ pathfmt.part_enable(self.partdir)
+
+ while True:
+ if tries:
+ if response:
+ response.close()
+ self.log.warning("%s (%s/%s)", msg, tries, self.retries+1)
+ if tries > self.retries:
+ return False
+ time.sleep(min(2 ** (tries-1), 1800))
+ tries += 1
+
+ # check for .part file
+ filesize = pathfmt.part_size()
+ if filesize:
+ headers = {"Range": "bytes={}-".format(filesize)}
+ else:
+ headers = None
+
+ # connect to (remote) source
+ try:
+ response = self.session.request(
+ "GET", url, stream=True, headers=headers,
+ timeout=self.timeout, verify=self.verify)
+ except (ConnectionError, Timeout) as exc:
+ msg = str(exc)
+ continue
+ except Exception as exc:
+ self.log.warning("%s", exc)
+ return False
+
+ # check response
+ code = response.status_code
+ if code == 200: # OK
+ offset = 0
+ size = response.headers.get("Content-Length")
+ elif code == 206: # Partial Content
+ offset = filesize
+ size = response.headers["Content-Range"].rpartition("/")[2]
+ elif code == 416: # Requested Range Not Satisfiable
+ break
+ else:
+ msg = "{}: {} for url: {}".format(code, response.reason, url)
+ if code == 429 or 500 <= code < 600: # Server Error
+ continue
+ self.log.warning("%s", msg)
+ return False
+ size = text.parse_int(size)
+
+ # set missing filename extension
+ if not pathfmt.has_extension:
+ pathfmt.set_extension(self.get_extension(response))
+ if pathfmt.exists():
+ pathfmt.temppath = ""
+ return True
+
+ # set open mode
+ if not offset:
+ mode = "w+b"
+ if filesize:
+ self.log.info("Unable to resume partial download")
+ else:
+ mode = "r+b"
+ self.log.info("Resuming download at byte %d", offset)
+
+ # start downloading
+ self.out.start(pathfmt.path)
+ self.downloading = True
+ with pathfmt.open(mode) as file:
+ if offset:
+ file.seek(offset)
+
+ # download content
+ try:
+ self.receive(response, file)
+ except (RequestException, SSLError) as exc:
+ msg = str(exc)
+ print()
+ continue
+
+ # check filesize
+ if size and file.tell() < size:
+ msg = "filesize mismatch ({} < {})".format(
+ file.tell(), size)
+ print()
+ continue
+
+ # check filename extension
+ adj_ext = self.check_extension(file, pathfmt)
+
+ break
+
+ self.downloading = False
+ if adj_ext:
+ pathfmt.set_extension(adj_ext)
+ if self.mtime:
+ pathfmt.keywords["_mtime"] = response.headers.get("Last-Modified")
+ return True
+
+ def receive(self, response, file):
if self.rate:
total = 0 # total amount of bytes received
start = time.time() # start time
- for data in self.response.iter_content(self.chunk_size):
+ for data in response.iter_content(self.chunk_size):
file.write(data)
if self.rate:
@@ -79,13 +179,8 @@ class HttpDownloader(DownloaderBase):
# sleep if less time passed than expected
time.sleep(expected - delta)
- def reset(self):
- if self.response:
- self.response.close()
- self.response = None
-
- def get_extension(self):
- mtype = self.response.headers.get("Content-Type", "image/jpeg")
+ def get_extension(self, response):
+ mtype = response.headers.get("Content-Type", "image/jpeg")
mtype = mtype.partition(";")[0]
if mtype in MIMETYPE_MAP:
@@ -100,6 +195,26 @@ class HttpDownloader(DownloaderBase):
"No filename extension found for MIME type '%s'", mtype)
return "txt"
+ @staticmethod
+ def check_extension(file, pathfmt):
+ """Check filename extension against fileheader"""
+ extension = pathfmt.keywords["extension"]
+ if extension in FILETYPE_CHECK:
+ file.seek(0)
+ header = file.read(8)
+ if len(header) >= 8 and not FILETYPE_CHECK[extension](header):
+ for ext, check in FILETYPE_CHECK.items():
+ if ext != extension and check(header):
+ return ext
+ return None
+
+
+FILETYPE_CHECK = {
+ "jpg": lambda h: h[0:2] == b"\xff\xd8",
+ "png": lambda h: h[0:8] == b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a",
+ "gif": lambda h: h[0:4] == b"GIF8" and h[5] == 97,
+}
+
MIMETYPE_MAP = {
"image/jpeg": "jpg",
diff --git a/gallery_dl/downloader/text.py b/gallery_dl/downloader/text.py
index ca33863..c57fbd0 100644
--- a/gallery_dl/downloader/text.py
+++ b/gallery_dl/downloader/text.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2018 Mike Fährmann
+# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -14,24 +14,13 @@ from .common import DownloaderBase
class TextDownloader(DownloaderBase):
scheme = "text"
- def __init__(self, extractor, output):
- DownloaderBase.__init__(self, extractor, output)
- self.content = b""
-
- def connect(self, url, offset):
- data = url.encode()
- self.content = data[offset + 5:]
- return offset, len(data) - 5
-
- def receive(self, file):
- file.write(self.content)
-
- def reset(self):
- self.content = b""
-
- @staticmethod
- def get_extension():
- return "txt"
+ def download(self, url, pathfmt):
+ if self.part:
+ pathfmt.part_enable(self.partdir)
+ self.out.start(pathfmt.path)
+ with pathfmt.open("wb") as file:
+ file.write(url.encode()[5:])
+ return True
__downloader__ = TextDownloader
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 57a84d0..da57935 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -20,13 +20,15 @@ class YoutubeDLDownloader(DownloaderBase):
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
+ retries = self.config("retries", extractor._retries)
options = {
"format": self.config("format") or None,
"ratelimit": text.parse_bytes(self.config("rate"), None),
- "retries": self.config("retries", extractor._retries),
+ "retries": retries+1 if retries >= 0 else float("inf"),
"socket_timeout": self.config("timeout", extractor._timeout),
"nocheckcertificate": not self.config("verify", extractor._verify),
"nopart": not self.part,
+ "updatetime": self.config("mtime", True),
}
options.update(self.config("raw-options") or {})
@@ -36,6 +38,9 @@ class YoutubeDLDownloader(DownloaderBase):
self.ytdl = YoutubeDL(options)
def download(self, url, pathfmt):
+ for cookie in self.session.cookies:
+ self.ytdl.cookiejar.set_cookie(cookie)
+
try:
info_dict = self.ytdl.extract_info(url[5:], download=False)
except Exception: