From 9074eee175f76b824fbb6695d56426105191c51c Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 28 Sep 2020 18:27:46 -0400 Subject: New upstream version 1.15.0. --- CHANGELOG.md | 28 ++++ PKG-INFO | 8 +- README.rst | 6 +- data/completion/_gallery-dl | 3 + data/completion/gallery-dl | 2 +- data/man/gallery-dl.1 | 11 +- data/man/gallery-dl.conf.5 | 84 ++++++++--- docs/gallery-dl.conf | 6 +- gallery_dl.egg-info/PKG-INFO | 8 +- gallery_dl.egg-info/SOURCES.txt | 1 + gallery_dl/config.py | 19 +++ gallery_dl/downloader/http.py | 29 +++- gallery_dl/downloader/ytdl.py | 4 + gallery_dl/extractor/500px.py | 2 +- gallery_dl/extractor/__init__.py | 19 +-- gallery_dl/extractor/aryion.py | 4 +- gallery_dl/extractor/common.py | 23 ++- gallery_dl/extractor/danbooru.py | 7 +- gallery_dl/extractor/deviantart.py | 78 +++++++--- gallery_dl/extractor/exhentai.py | 5 +- gallery_dl/extractor/foolfuuka.py | 2 +- gallery_dl/extractor/foolslide.py | 66 +-------- gallery_dl/extractor/furaffinity.py | 16 +-- gallery_dl/extractor/hbrowse.py | 4 +- gallery_dl/extractor/hitomi.py | 2 +- gallery_dl/extractor/imgur.py | 242 ++++++++++++++++---------------- gallery_dl/extractor/myhentaigallery.py | 65 +++++++++ gallery_dl/extractor/plurk.py | 13 +- gallery_dl/extractor/recursive.py | 10 +- gallery_dl/extractor/redgifs.py | 4 +- gallery_dl/extractor/tumblr.py | 24 ++-- gallery_dl/extractor/twitter.py | 29 +++- gallery_dl/extractor/wikiart.py | 2 +- gallery_dl/job.py | 48 ++++++- gallery_dl/option.py | 15 ++ gallery_dl/postprocessor/zip.py | 10 +- gallery_dl/util.py | 8 +- gallery_dl/version.py | 2 +- test/test_config.py | 22 +++ test/test_downloader.py | 17 +++ test/test_extractor.py | 24 ---- test/test_postprocessor.py | 31 ++-- test/test_results.py | 5 +- test/test_util.py | 2 +- 44 files changed, 644 insertions(+), 366 deletions(-) create mode 100644 gallery_dl/extractor/myhentaigallery.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b38c9c8..b368535 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## 1.15.0 - 2020-09-20 +### Additions +- [deviantart] support watchers-only/paid deviations ([#995](https://github.com/mikf/gallery-dl/issues/995)) +- [myhentaigallery] add gallery extractor ([#1001](https://github.com/mikf/gallery-dl/issues/1001)) +- [twitter] support specifying users by ID ([#980](https://github.com/mikf/gallery-dl/issues/980)) +- [twitter] support `/intent/user?user_id=…` URLs ([#980](https://github.com/mikf/gallery-dl/issues/980)) +- add `--no-skip` command-line option ([#986](https://github.com/mikf/gallery-dl/issues/986)) +- add `blacklist` and `whitelist` options ([#492](https://github.com/mikf/gallery-dl/issues/492), [#844](https://github.com/mikf/gallery-dl/issues/844)) +- add `filesize-min` and `filesize-max` options ([#780](https://github.com/mikf/gallery-dl/issues/780)) +- add `sleep-extractor` and `sleep-request` options ([#788](https://github.com/mikf/gallery-dl/issues/788)) +- write skipped files to archive ([#550](https://github.com/mikf/gallery-dl/issues/550)) +### Changes +- [exhentai] update wait time before original image downloads ([#978](https://github.com/mikf/gallery-dl/issues/978)) +- [imgur] use new API endpoints for image/album data +- [tumblr] create directories for each post ([#965](https://github.com/mikf/gallery-dl/issues/965)) +- support format string replacement fields in download archive paths ([#985](https://github.com/mikf/gallery-dl/issues/985)) +- reduce wait time growth rate for HTTP retries from exponential to linear +### Fixes +- [500px] update query hash +- [aryion] improve post ID extraction ([#981](https://github.com/mikf/gallery-dl/issues/981), [#982](https://github.com/mikf/gallery-dl/issues/982)) +- [danbooru] handle posts without `id` ([#1004](https://github.com/mikf/gallery-dl/issues/1004)) +- [furaffinity] update download URL extraction ([#988](https://github.com/mikf/gallery-dl/issues/988)) +- [imgur] fix image/album detection for galleries +- [postprocessor:zip] defer zip file creation ([#968](https://github.com/mikf/gallery-dl/issues/968)) +### Removals +- [jaiminisbox] remove extractors +- [worldthree] remove extractors + ## 1.14.5 - 2020-08-30 ### Additions - [aryion] add username/password support ([#960](https://github.com/mikf/gallery-dl/issues/960)) diff --git a/PKG-INFO b/PKG-INFO index 644b647..19b7f04 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.14.5 +Version: 1.15.0 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -311,7 +311,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.5.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/README.rst b/README.rst index 6f5c4bb..ca01764 100644 --- a/README.rst +++ b/README.rst @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -300,7 +300,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.5.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index b5074d2..5194312 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -28,7 +28,10 @@ _arguments -C -S \ {-A,--abort}'[Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist]':'' \ --http-timeout'[Timeout for HTTP connections (default: 30.0)]':'' \ --sleep'[Number of seconds to sleep before each download]':'' \ +--filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'' \ +--filesize-max'[Do not download files larger than SIZE (e.g. 500k or 2.5M)]':'' \ --no-part'[Do not use .part files]' \ +--no-skip'[Do not skip downloads; overwrite existing files]' \ --no-mtime'[Do not set file modification times according to Last-Modified HTTP response headers]' \ --no-download'[Do not download any files]' \ --no-check-certificate'[Disable HTTPS certificate validation]' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 921d601..19cb39f 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 2437195..2a84a06 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-08-30" "1.14.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-09-20" "1.15.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -86,9 +86,18 @@ Timeout for HTTP connections (default: 30.0) .B "\-\-sleep" \f[I]SECONDS\f[] Number of seconds to sleep before each download .TP +.B "\-\-filesize\-min" \f[I]SIZE\f[] +Do not download files smaller than SIZE (e.g. 500k or 2.5M) +.TP +.B "\-\-filesize\-max" \f[I]SIZE\f[] +Do not download files larger than SIZE (e.g. 500k or 2.5M) +.TP .B "\-\-no\-part" Do not use .part files .TP +.B "\-\-no\-skip" +Do not skip downloads; overwrite existing files +.TP .B "\-\-no\-mtime" Do not set file modification times according to Last-Modified HTTP response headers .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index a5b1f4d..e37135e 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-08-30" "1.14.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-09-20" "1.15.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -253,6 +253,28 @@ filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.) .IP "Description:" 4 Number of seconds to sleep before each download. +.SS extractor.*.sleep-extractor +.IP "Type:" 6 +\f[I]float\f[] + +.IP "Default:" 9 +\f[I]0\f[] + +.IP "Description:" 4 +Number of seconds to sleep before handling an input URL, +i.e. before starting a new extractor. + +.SS extractor.*.sleep-request +.IP "Type:" 6 +\f[I]float\f[] + +.IP "Default:" 9 +\f[I]0\f[] + +.IP "Description:" 4 +Minimal time interval in seconds between each HTTP request +during data extraction. + .SS extractor.*.username & .password .IP "Type:" 6 \f[I]string\f[] @@ -436,6 +458,21 @@ Transfer an extractor's (sub)category values to all child extractors spawned by it, to let them inherit their parent's config options. +.SS extractor.*.blacklist & .whitelist +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["oauth", "recursive", "test"]\f[] + current extractor category + +.IP "Description:" 4 +A list of extractor categories to ignore (or allow) +when spawning child extractors for unknown URLs, +e.g. from \f[I]reddit\f[] or \f[I]plurk\f[]. + +Note: Any \f[I]blacklist\f[] setting will automatically include +\f[I]"oauth"\f[], \f[I]"recursive"\f[], and \f[I]"test"\f[]. + .SS extractor.*.archive .IP "Type:" 6 \f[I]Path\f[] @@ -443,15 +480,23 @@ config options. .IP "Default:" 9 \f[I]null\f[] +.IP "Example:" 4 +"$HOME/.archives/{category}.sqlite3" + .IP "Description:" 4 File to store IDs of downloaded files in. Downloads of files -already recorded in this archive file will be skipped_. +already recorded in this archive file will be +\f[I]skipped \f[]. The resulting archive file is not a plain text file but an SQLite3 database, as either lookup operations are significantly faster or memory requirements are significantly lower when the amount of stored IDs gets reasonably large. +Note: archive paths support regular \f[I]format string\f[] replacements, +but be aware that using external inputs for building local paths +may pose a security risk. + .SS extractor.*.archive-format .IP "Type:" 6 \f[I]string\f[] @@ -839,9 +884,6 @@ or whenever your \f[I]cache file \f[] is deleted or cleared. .IP "Description:" 4 Minimum wait time in seconds before API requests. -Note: This value will internally be rounded up -to the next power of 2. - .SS extractor.exhentai.domain .IP "Type:" 6 \f[I]string\f[] @@ -1221,17 +1263,6 @@ Controls how to handle redirects to CAPTCHA pages. .br * \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait. -.SS extractor.recursive.blacklist -.IP "Type:" 6 -\f[I]list\f[] of \f[I]strings\f[] - -.IP "Default:" 9 -\f[I]["directlink", "oauth", "recursive", "test"]\f[] - -.IP "Description:" 4 -A list of extractor categories which should be ignored when using -the \f[I]recursive\f[] extractor. - .SS extractor.reddit.comments .IP "Type:" 6 \f[I]integer\f[] @@ -1594,6 +1625,24 @@ Reverse the order of chapter URLs extracted from manga pages. .IP "Description:" 4 Enable/Disable this downloader module. +.SS downloader.*.filesize-min & .filesize-max +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Example:" 4 +"32000", "500k", "2.5M" + +.IP "Description:" 4 +Minimum/Maximum allowed file size in bytes. +Any file smaller/larger than this limit will not be downloaded. + +Possible values are valid integer or floating-point numbers +optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[]. +These suffixes are case-insensitive. + .SS downloader.*.mtime .IP "Type:" 6 \f[I]bool\f[] @@ -2283,6 +2332,9 @@ Submission Policy, and Terms of Service. application and put them in your configuration file as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[] .br +* clear your \f[I]cache \f[] (\f[I]--clear-cache\f[]) to delete +the \f[I]access-token\f[] from the previous \f[I]client-id\f[] +.br * get a new \f[I]refresh-token \f[] if necessary diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 56147e9..2d7b0ff 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -119,10 +119,6 @@ { "captcha": "stop" }, - "recursive": - { - "blacklist": ["directlink", "oauth", "recursive", "test"] - }, "reddit": { "comments": 0, @@ -189,6 +185,8 @@ "downloader": { + "filesize-min": null, + "filesize-max": null, "part": true, "part-directory": null, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index a2fafb1..fc9f14b 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.14.5 +Version: 1.15.0 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -311,7 +311,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.5.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 56c9245..648e273 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -108,6 +108,7 @@ gallery_dl/extractor/mangastream.py gallery_dl/extractor/mangoxo.py gallery_dl/extractor/mastodon.py gallery_dl/extractor/message.py +gallery_dl/extractor/myhentaigallery.py gallery_dl/extractor/myportfolio.py gallery_dl/extractor/naver.py gallery_dl/extractor/newgrounds.py diff --git a/gallery_dl/config.py b/gallery_dl/config.py index a3c71cd..e0a5459 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -140,6 +140,25 @@ def interpolate_common(common, paths, key, default=None, *, conf=_config): return default +def accumulate(path, key, *, conf=_config): + """Accumulate the values of 'key' along 'path'""" + result = [] + try: + if key in conf: + value = conf[key] + if value: + result.extend(value) + for p in path: + conf = conf[p] + if key in conf: + value = conf[key] + if value: + result[:0] = value + except Exception: + pass + return result + + def set(path, key, value, *, conf=_config): """Set the value of property 'key' for this session""" for p in path: diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 6644827..0e67330 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -31,6 +31,8 @@ class HttpDownloader(DownloaderBase): self.downloading = False self.adjust_extension = self.config("adjust-extensions", True) + self.minsize = self.config("filesize-min") + self.maxsize = self.config("filesize-max") self.retries = self.config("retries", extractor._retries) self.timeout = self.config("timeout", extractor._timeout) self.verify = self.config("verify", extractor._verify) @@ -39,6 +41,16 @@ class HttpDownloader(DownloaderBase): if self.retries < 0: self.retries = float("inf") + if self.minsize: + minsize = text.parse_bytes(self.minsize) + if not minsize: + self.log.warning("Invalid minimum filesize (%r)", self.minsize) + self.minsize = minsize + if self.maxsize: + maxsize = text.parse_bytes(self.maxsize) + if not maxsize: + self.log.warning("Invalid maximum filesize (%r)", self.maxsize) + self.maxsize = maxsize if self.rate: rate = text.parse_bytes(self.rate) if rate: @@ -75,7 +87,7 @@ class HttpDownloader(DownloaderBase): self.log.warning("%s (%s/%s)", msg, tries, self.retries+1) if tries > self.retries: return False - time.sleep(min(2 ** (tries-1), 1800)) + time.sleep(tries) tries += 1 headers = {} @@ -116,7 +128,20 @@ class HttpDownloader(DownloaderBase): continue self.log.warning(msg) return False - size = text.parse_int(size) + + # check filesize + size = text.parse_int(size, None) + if size is not None: + if self.minsize and size < self.minsize: + self.log.warning( + "File size smaller than allowed minimum (%s < %s)", + size, self.minsize) + return False + if self.maxsize and size > self.maxsize: + self.log.warning( + "File size larger than allowed maximum (%s > %s)", + size, self.maxsize) + return False # set missing filename extension if not pathfmt.extension: diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index c3dd863..8086b5d 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -31,6 +31,10 @@ class YoutubeDLDownloader(DownloaderBase): "nopart": not self.part, "updatetime": self.config("mtime", True), "proxy": extractor.session.proxies.get("http"), + "min_filesize": text.parse_bytes( + self.config("filesize-min"), None), + "max_filesize": text.parse_bytes( + self.config("filesize-max"), None), } options.update(self.config("raw-options") or {}) diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 96cb021..4dc4f0d 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -153,7 +153,7 @@ class _500pxGalleryExtractor(_500pxExtractor): def metadata(self): user = self._request_graphql( "ProfileRendererQuery", {"username": self.user_name}, - "db1dba2cb7b7e94916d1005db16fea1a39d6211437b691c4de2f1a606c21c5fb", + "4d02ff5c13927a3ac73b3eef306490508bc765956940c31051468cf30402a503", )["profile"] self.user_id = str(user["legacyId"]) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6f8867c..53bc726 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -74,6 +74,7 @@ modules = [ "mangareader", "mangastream", "mangoxo", + "myhentaigallery", "myportfolio", "naver", "newgrounds", @@ -140,7 +141,7 @@ def find(url): """Find a suitable extractor for the given URL""" for cls in _list_classes(): match = cls.pattern.match(url) - if match and cls not in _blacklist: + if match: return cls(match) return None @@ -169,26 +170,10 @@ def extractors(): ) -class blacklist(): - """Context Manager to blacklist extractor modules""" - def __init__(self, categories, extractors=None): - self.extractors = extractors or [] - for cls in _list_classes(): - if cls.category in categories: - self.extractors.append(cls) - - def __enter__(self): - _blacklist.update(self.extractors) - - def __exit__(self, etype, value, traceback): - _blacklist.clear() - - # -------------------------------------------------------------------- # internals _cache = [] -_blacklist = set() _module_iter = iter(modules) diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 2e4c4d4..374a9fc 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -30,6 +30,7 @@ class AryionExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.recursive = True + self._needle = "class='gallery-item' id='" def login(self): username, password = self._get_auth_info() @@ -73,7 +74,7 @@ class AryionExtractor(Extractor): while True: page = self.request(url).text yield from text.extract_iter( - page, "class='thumb' href='/g4/view/", "'") + page, self._needle, "'") pos = page.find("Next >>") if pos < 0: @@ -180,6 +181,7 @@ class AryionGalleryExtractor(AryionExtractor): url = "{}/g4/gallery/{}".format(self.root, self.user) return self._pagination(url) else: + self._needle = "class='thumb' href='/g4/view/" url = "{}/g4/latest.php?name={}".format(self.root, self.user) return util.advance(self._pagination(url), self.offset) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index e6c0968..357deac 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -31,6 +31,8 @@ class Extractor(): cookiedomain = "" root = "" test = None + _request_last = 0 + _request_interval = 0 def __init__(self, match): self.session = requests.Session() @@ -40,10 +42,14 @@ class Extractor(): self._cookiefile = None self._cookiejar = self.session.cookies self._parentdir = "" + + self._cfgpath = ("extractor", self.category, self.subcategory) self._write_pages = self.config("write-pages", False) self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) + self._request_interval = self.config( + "sleep-request", self._request_interval) if self._retries < 0: self._retries = float("inf") @@ -69,8 +75,10 @@ class Extractor(): return 0 def config(self, key, default=None): - return config.interpolate( - ("extractor", self.category, self.subcategory), key, default) + return config.interpolate(self._cfgpath, key, default) + + def config_accumulate(self, key): + return config.accumulate(self._cfgpath, key) def request(self, url, *, method="GET", session=None, retries=None, encoding=None, fatal=True, notfound=None, **kwargs): @@ -81,6 +89,13 @@ class Extractor(): kwargs.setdefault("verify", self._verify) response = None + if self._request_interval: + seconds = (self._request_interval - + (time.time() - Extractor._request_last)) + if seconds > 0: + self.log.debug("Sleeping for %.5s seconds", seconds) + time.sleep(seconds) + while True: try: response = session.request(method, url, **kwargs) @@ -119,11 +134,13 @@ class Extractor(): msg = "'{} {}' for '{}'".format(code, reason, url) if code < 500 and code != 429 and code != 430: break + finally: + Extractor._request_last = time.time() self.log.debug("%s (%s/%s)", msg, tries, retries+1) if tries > retries: break - time.sleep(min(2 ** (tries-1), 1800)) + time.sleep(tries) tries += 1 raise exception.HttpError(msg, response) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index e0edf89..1ebaf5b 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -93,7 +93,12 @@ class DanbooruExtractor(SharedConfigMixin, Extractor): if pagenum: params["page"] += 1 else: - params["page"] = "b{}".format(posts[-1]["id"]) + for post in reversed(posts): + if "id" in post: + params["page"] = "b{}".format(post["id"]) + break + else: + return class DanbooruTagExtractor(DanbooruExtractor): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 73ef20d..a0f4d1c 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -14,7 +14,6 @@ from ..cache import cache, memcache import collections import itertools import mimetypes -import math import time import re @@ -55,6 +54,7 @@ class DeviantartExtractor(Extractor): self._update_content = self._update_content_image self.original = True + self._premium_cache = {} self.commit_journal = { "html": self._commit_journal_html, "text": self._commit_journal_text, @@ -66,6 +66,8 @@ class DeviantartExtractor(Extractor): def items(self): self.api = DeviantartOAuthAPI(self) + if not self.api.refresh_token_key: + self._fetch_premium = self._fetch_premium_notoken if self.user: profile = self.api.user_profile(self.user) @@ -83,6 +85,10 @@ class DeviantartExtractor(Extractor): yield Message.Queue, url, data continue + if "premium_folder_data" in deviation: + if not self._fetch_premium(deviation): + continue + self.prepare(deviation) yield Message.Directory, deviation @@ -261,7 +267,9 @@ class DeviantartExtractor(Extractor): return [(url + folder["name"], folder) for folder in folders] def _update_content_default(self, deviation, content): - content.update(self.api.deviation_download(deviation["deviationid"])) + public = "premium_folder_data" not in deviation + data = self.api.deviation_download(deviation["deviationid"], public) + content.update(data) def _update_content_image(self, deviation, content): data = self.api.deviation_download(deviation["deviationid"]) @@ -290,6 +298,41 @@ class DeviantartExtractor(Extractor): return response self.wait(seconds=180) + def _fetch_premium(self, deviation): + cache = self._premium_cache + + if deviation["deviationid"] not in cache: + + # check accessibility + dev = self.api.deviation(deviation["deviationid"], False) + has_access = dev["premium_folder_data"]["has_access"] + + if has_access: + self.log.info("Fetching premium folder data") + else: + self.log.warning("Unable to access premium content (type: %s)", + dev["premium_folder_data"]["type"]) + # fill cache + for dev in self.api.gallery( + deviation["author"]["username"], + deviation["premium_folder_data"]["gallery_id"], + public=False, + ): + cache[dev["deviationid"]] = dev if has_access else None + + data = cache[deviation["deviationid"]] + if data: + deviation.update(data) + return True + return False + + def _fetch_premium_notoken(self, deviation): + if not self._premium_cache: + self.log.warning( + "Unable to access premium content (no refresh-token)") + self._premium_cache = True + return False + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -837,8 +880,7 @@ class DeviantartOAuthAPI(): self.log = extractor.log self.headers = {} - delay = extractor.config("wait-min", 0) - self.delay = math.ceil(math.log2(delay)) if delay >= 1 else -1 + self.delay = extractor.config("wait-min", 0) self.delay_min = max(2, self.delay) self.mature = extractor.config("mature", "true") @@ -897,27 +939,27 @@ class DeviantartOAuthAPI(): "mature_content": self.mature} return self._pagination_folders(endpoint, params) - def deviation(self, deviation_id): + def deviation(self, deviation_id, public=True): """Query and return info about a single Deviation""" endpoint = "deviation/" + deviation_id - deviation = self._call(endpoint) + deviation = self._call(endpoint, public=public) if self.metadata: self._metadata((deviation,)) if self.folders: self._folders((deviation,)) return deviation - def deviation_content(self, deviation_id): + def deviation_content(self, deviation_id, public=False): """Get extended content of a single Deviation""" endpoint = "deviation/content" params = {"deviationid": deviation_id} - return self._call(endpoint, params, public=False) + return self._call(endpoint, params, public=public) - def deviation_download(self, deviation_id): + def deviation_download(self, deviation_id, public=True): """Get the original file download (if allowed)""" endpoint = "deviation/download/" + deviation_id params = {"mature_content": self.mature} - return self._call(endpoint, params) + return self._call(endpoint, params, public=public) def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" @@ -930,12 +972,12 @@ class DeviantartOAuthAPI(): params = {"mature_content": self.mature} return self._call(endpoint, params)["metadata"] - def gallery(self, username, folder_id="", offset=0, extend=True): + def gallery(self, username, folder_id, offset=0, extend=True, public=True): """Yield all Deviation-objects contained in a gallery folder""" endpoint = "gallery/" + folder_id params = {"username": username, "offset": offset, "limit": 24, "mature_content": self.mature, "mode": "newest"} - return self._pagination(endpoint, params, extend) + return self._pagination(endpoint, params, extend, public) def gallery_all(self, username, offset=0): """Yield all Deviation-objects of a specific user""" @@ -993,8 +1035,8 @@ class DeviantartOAuthAPI(): """Call an API endpoint""" url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint while True: - if self.delay >= 0: - time.sleep(2 ** self.delay) + if self.delay: + time.sleep(self.delay) self.authenticate(None if public else self.refresh_token_key) response = self.extractor.request( @@ -1015,15 +1057,15 @@ class DeviantartOAuthAPI(): msg = "API responded with {} {}".format( status, response.reason) if status == 429: - if self.delay < 9: + if self.delay < 30: self.delay += 1 - self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay) + self.log.warning("%s. Using %ds delay.", msg, self.delay) else: self.log.error(msg) return data - def _pagination(self, endpoint, params, extend=True): - public = warn = True + def _pagination(self, endpoint, params, extend=True, public=True): + warn = True while True: data = self._call(endpoint, params, public=public) if "results" not in data: diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 80c7187..cb4df11 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -34,6 +34,9 @@ class ExhentaiExtractor(Extractor): LIMIT = False def __init__(self, match): + # allow calling 'self.config()' before 'Extractor.__init__()' + self._cfgpath = ("extractor", self.category, self.subcategory) + version = match.group(1) domain = self.config("domain", "auto") if domain == "auto": @@ -193,7 +196,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self._check_limits(data) if "/fullimg.php" in url: data["extension"] = "" - self.wait(1.5) + self.wait(self.wait_max / 4) yield Message.Url, url, data def get_metadata(self, page): diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 4af9d4a..f2019ca 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -109,7 +109,7 @@ EXTRACTORS = { "root": "https://arch.b4k.co", "extra": {"external": "direct"}, "test-thread": ("https://arch.b4k.co/meta/thread/196/", { - "url": "9b0ae01292133268fe9178b71332da1ee25b7704", + "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a", }), }, "desuarchive": { diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index e624a65..0ab42db 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -17,9 +17,7 @@ from .common import ( generate_extractors, ) from .. import text, util -import base64 import json -import re class FoolslideBase(SharedConfigMixin): @@ -83,25 +81,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): }) def images(self, page): - data = None - - if self.decode == "base64": - pos = page.find("'fromCharCode'") - if pos >= 0: - blob = text.extract(page, "'", "'", pos+15)[0] - base64_data = re.sub(r"[a-zA-Z]", _decode_jaiminisbox, blob) - else: - base64_data = text.extract(page, 'atob("', '"')[0] - if base64_data: - data = base64.b64decode(base64_data.encode()).decode() - elif self.decode == "double": - pos = page.find("[{") - if pos >= 0: - data = text.extract(page, " = ", ";", pos)[0] - - if not data: - data = text.extract(page, "var pages = ", ";")[0] - return json.loads(data) + return json.loads(text.extract(page, "var pages = ", ";")[0]) class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): @@ -126,16 +106,6 @@ class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): }))) -def _decode_jaiminisbox(match): - c = match.group(0) - - # ord("Z") == 90, ord("z") == 122 - N = 90 if c <= "Z" else 122 - C = ord(c) + 13 - - return chr(C if N >= C else (C - 26)) - - EXTRACTORS = { "dokireader": { "root": "https://kobato.hologfx.com/reader", @@ -151,19 +121,6 @@ EXTRACTORS = { "keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995", }), }, - "jaiminisbox": { - "root": "https://jaiminisbox.com/reader", - "pattern": r"(?:www\.)?jaiminisbox\.com/reader", - "extra": {"decode": "base64"}, - "test-chapter": - ("https://jaiminisbox.com/reader/read/oshi-no-ko/en/0/1/", { - "keyword": "d6435cfc1522293a42517a4aadda95a8631da0b3", - }), - "test-manga": - ("https://jaiminisbox.com/reader/series/oshi-no-ko/", { - "count": ">= 10", - }), - }, "kireicake": { "root": "https://reader.kireicake.com", "test-chapter": @@ -220,27 +177,6 @@ EXTRACTORS = { "keyword": "562fb5a7362a4cb43d59d5c8a6ea8080fc65cf99", }), }, - "worldthree": { - "root": "http://www.slide.world-three.org", - "pattern": r"(?:www\.)?slide\.world-three\.org", - "test-chapter": ( - (("http://www.slide.world-three.org" - "/read/black_bullet/en/2/7/page/1"), { - "url": "be2f04f6e2d311b35188094cfd3e768583271584", - "keyword": "967d536a65de4d52478d5b666a1760b181eddb6e", - }), - (("http://www.slide.world-three.org" - "/read/idolmster_cg_shuffle/en/0/4/2/"), { - "url": "6028ea5ca282744f925dfad92eeb98509f9cc78c", - "keyword": "f3cfe2ad3388991f1d045c85d0fa94795a7694dc", - }), - ), - "test-manga": - ("http://www.slide.world-three.org/series/black_bullet/", { - "url": "5743b93512d26e6b540d90a7a5d69208b6d4a738", - "keyword": "3a24f1088b4d7f3b798a96163f21ca251293a120", - }), - }, "_ckey": "chapterclass", } diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 6dfd75d..950a174 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -55,7 +55,7 @@ class FuraffinityExtractor(Extractor): title, _, artist = text.unescape(extr( 'property="og:title" content="', '"')).rpartition(" by ") artist_url = artist.replace("_", "").lower() - path = extr('href="//d.facdn.net/', '"') + path = extr('href="//d', '"') if not path: self.log.warning( @@ -76,7 +76,7 @@ class FuraffinityExtractor(Extractor): "artist" : artist, "artist_url": artist_url, "user" : self.user or artist_url, - "url" : "https://d.facdn.net/" + path + "url" : "https://d" + path }) tags = extr('class="tags-row">', '') @@ -179,7 +179,7 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor): subcategory = "gallery" pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)" test = ("https://www.furaffinity.net/gallery/mirlinthloth/", { - "pattern": r"https://d.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -191,7 +191,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "{user!l}", "Scraps") pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)" test = ("https://www.furaffinity.net/scraps/mirlinthloth/", { - "pattern": r"https://d.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.\w+", + "pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.", "count": ">= 3", }) @@ -202,7 +202,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "{user!l}", "Favorites") pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)" test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { - "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -217,7 +217,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "Search", "{search}") pattern = BASE_PATTERN + r"/search/?\?([^#]+)" test = ("https://www.furaffinity.net/search/?q=cute", { - "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -236,7 +236,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor): pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" test = ( ("https://www.furaffinity.net/view/21835115/", { - "url": "eae4ef93d99365c69b31a37561bd800c03d336ad", + "url": "d80254eb4fba654597b4df8320d55916e11ba375", "keyword": { "artist" : "mirlinthloth", "artist_url" : "mirlinthloth", @@ -247,7 +247,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor): "id" : 21835115, "tags" : list, "title" : "Bude's 4 Ever", - "url" : "re:https://d.facdn.net/art/mirlinthloth/music", + "url" : r"re:https://d\d?.facdn.net/art/mirlinthloth/m", "user" : "mirlinthloth", "views" : int, "favorites" : int, diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 181db9a..43479c6 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -50,7 +50,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor): pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))" test = ("https://www.hbrowse.com/10363/c00000", { "url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6", - "keyword": "6c1136522a25de013a6579ffa34dadc1eb0d4d1b", + "keyword": "274996f6c809e5250b6ff3abbc5147e29f89d9a5", "content": "44578ebbe176c2c27434966aef22945787e2781e", }) @@ -78,7 +78,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor): pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$" test = ("https://www.hbrowse.com/10363", { "url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6", - "keyword": "08f5935a4411d2c19ac1786bd4ca552c3785fcae", + "keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312", }) def chapters(self, page): diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 209a4f2..f341c47 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -25,7 +25,7 @@ class HitomiGalleryExtractor(GalleryExtractor): test = ( ("https://hitomi.la/galleries/867789.html", { "pattern": r"https://[a-c]a.hitomi.la/images/./../[0-9a-f]+.jpg", - "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2", + "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae", "count": 16, }), # download test diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 190a4ff..4391e64 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -27,19 +27,17 @@ class ImgurExtractor(Extractor): self.mp4 = self.config("mp4", True) def _prepare(self, image): - try: - del image["ad_url"] - del image["ad_type"] - del image["ad_config"] - except KeyError: - pass + image.update(image["metadata"]) + del image["metadata"] - if image["animated"] and self.mp4 and "mp4" in image: - url = image["mp4"] - else: - url = image["link"] + if image["ext"] == "jpeg": + image["ext"] = "jpg" + elif image["is_animated"] and self.mp4 and image["ext"] == "gif": + image["ext"] = "mp4" - image["date"] = text.parse_timestamp(image["datetime"]) + image["url"] = url = "https://i.imgur.com/{}.{}".format( + image["id"], image["ext"]) + image["date"] = text.parse_datetime(image["created_at"]) text.nameext_from_url(url, image) return url @@ -65,33 +63,38 @@ class ImgurImageExtractor(ImgurExtractor): "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { - "account_id" : None, - "account_url" : None, - "animated" : False, - "bandwidth" : int, - "date" : "dt:2016-11-10 14:24:35", - "datetime" : 1478787875, - "description" : None, - "edited" : "0", - "extension" : "png", - "favorite" : False, - "filename" : "21yMxCS", - "has_sound" : False, - "height" : 32, - "id" : "21yMxCS", - "in_gallery" : False, - "in_most_viral": False, - "is_ad" : False, - "link" : "https://i.imgur.com/21yMxCS.png", - "nsfw" : False, - "section" : None, - "size" : 182, - "tags" : [], - "title" : "Test", - "type" : "image/png", - "views" : int, - "vote" : None, - "width" : 64, + "account_id" : 0, + "comment_count" : int, + "cover_id" : "21yMxCS", + "date" : "dt:2016-11-10 14:24:35", + "description" : "", + "downvote_count": int, + "duration" : 0, + "ext" : "png", + "favorite" : False, + "favorite_count": 0, + "has_sound" : False, + "height" : 32, + "id" : "21yMxCS", + "image_count" : 1, + "in_most_viral" : False, + "is_ad" : False, + "is_album" : False, + "is_animated" : False, + "is_looping" : False, + "is_mature" : False, + "is_pending" : False, + "mime_type" : "image/png", + "name" : "test-テスト", + "point_count" : int, + "privacy" : "", + "score" : int, + "size" : 182, + "title" : "Test", + "upvote_count" : int, + "url" : "https://i.imgur.com/21yMxCS.png", + "view_count" : int, + "width" : 64, }, }), ("http://imgur.com/0gybAXR", { # gifv/mp4 video @@ -101,30 +104,32 @@ class ImgurImageExtractor(ImgurExtractor): ("https://imgur.com/XFfsmuC", { # missing title in API response (#467) "keyword": {"title": "Tears are a natural response to irritants"}, }), - ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1' - "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e", - }), ("https://imgur.com/1Nily2P", { # animated png "pattern": "https://i.imgur.com/1Nily2P.png", }), ("https://imgur.com/zzzzzzz", { # not found "exception": exception.HttpError, }), - ("https://www.imgur.com/21yMxCS"), # www - ("https://m.imgur.com/21yMxCS"), # mobile - ("https://imgur.com/zxaY6"), # 5 character key - ("https://i.imgur.com/21yMxCS.png"), # direct link + ("https://www.imgur.com/21yMxCS"), # www + ("https://m.imgur.com/21yMxCS"), # mobile + ("https://imgur.com/zxaY6"), # 5 character key + ("https://i.imgur.com/21yMxCS.png"), # direct link ("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail - ("https://i.imgur.com/zxaY6.gif"), # direct link (short) - ("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb) + ("https://i.imgur.com/zxaY6.gif"), # direct link (short) + ("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb) ) def items(self): image = self.api.image(self.key) - if not image["title"]: - page = self.request(self.root + "/" + self.key, fatal=False).text - title = text.extract(page, "", "<")[0] or "" - image["title"] = text.unescape(title.rpartition(" - ")[0].strip()) + + try: + del image["ad_url"] + del image["ad_type"] + except KeyError: + pass + + image.update(image["media"][0]) + del image["media"] url = self._prepare(image) yield Message.Version, 1 yield Message.Directory, image @@ -143,53 +148,49 @@ class ImgurAlbumExtractor(ImgurExtractor): "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", "keyword": { "album": { - "account_id" : None, - "account_url" : None, - "cover" : "693j2Kr", - "cover_edited": None, - "cover_height": 1400, - "cover_width" : 951, - "date" : "dt:2015-10-09 10:37:50", - "datetime" : 1444387070, - "description" : None, - "favorite" : False, - "id" : "TcBmP", - "images_count": 19, - "in_gallery" : False, - "is_ad" : False, - "is_album" : True, - "layout" : "blog", - "link" : "https://imgur.com/a/TcBmP", - "nsfw" : bool, - "privacy" : "hidden", - "section" : None, - "title" : "138", - "views" : int, + "account_id" : 0, + "comment_count" : int, + "cover_id" : "693j2Kr", + "date" : "dt:2015-10-09 10:37:50", + "description" : "", + "downvote_count": 0, + "favorite" : False, + "favorite_count": 0, + "id" : "TcBmP", + "image_count" : 19, + "in_most_viral" : False, + "is_ad" : False, + "is_album" : True, + "is_mature" : False, + "is_pending" : False, + "privacy" : "private", + "score" : int, + "title" : "138", + "topic" : "", + "topic_id" : 0, + "upvote_count" : int, + "url" : "https://imgur.com/a/TcBmP", + "view_count" : int, + "virality" : int, }, - "account_id" : None, - "account_url": None, - "animated" : bool, - "bandwidth" : int, + "account_id" : 0, + "count" : 19, "date" : "type:datetime", - "datetime" : int, - "description": None, - "edited" : "0", - "favorite" : False, + "description": "", + "ext" : "jpg", "has_sound" : False, "height" : int, "id" : str, - "in_gallery" : False, - "is_ad" : False, - "link" : r"re:https://i\.imgur\.com/\w+\.jpg", - "nsfw" : None, + "is_animated": False, + "is_looping" : False, + "mime_type" : "image/jpeg", + "name" : str, "num" : int, - "section" : None, "size" : int, - "tags" : list, - "title" : None, - "type" : "image/jpeg", - "views" : int, - "vote" : None, + "title" : str, + "type" : "image", + "updated_at" : None, + "url" : str, "width" : int, }, }), @@ -208,13 +209,15 @@ class ImgurAlbumExtractor(ImgurExtractor): def items(self): album = self.api.album(self.key) - album["date"] = text.parse_timestamp(album["datetime"]) - images = album["images"] + album["date"] = text.parse_datetime(album["created_at"]) + + images = album["media"] + del album["media"] count = len(images) try: - del album["images"] - del album["ad_config"] + del album["ad_url"] + del album["ad_type"] except KeyError: pass @@ -239,22 +242,17 @@ class ImgurGalleryExtractor(ImgurExtractor): ("https://imgur.com/gallery/eD9CT", { "pattern": "https://imgur.com/a/eD9CT", }), - ("https://imgur.com/t/unmuted/26sEhNr", { # unmuted URL - "pattern": "https://imgur.com/26sEhNr", - }), + ("https://imgur.com/t/unmuted/26sEhNr"), ("https://imgur.com/t/cat/qSB8NbN"), ) def items(self): - url = self.root + "/a/" + self.key - with self.request(url, method="HEAD", fatal=False) as response: - if response.status_code < 400: - extr = ImgurAlbumExtractor - else: - extr = ImgurImageExtractor - url = self.root + "/" + self.key - - yield Message.Version, 1 + if self.api.gallery(self.key)["is_album"]: + url = "{}/a/{}".format(self.root, self.key) + extr = ImgurAlbumExtractor + else: + url = "{}/{}".format(self.root, self.key) + extr = ImgurImageExtractor yield Message.Queue, url, {"_extractor": extr} @@ -346,38 +344,46 @@ class ImgurAPI(): } def account_favorites(self, account): - endpoint = "account/{}/gallery_favorites".format(account) + endpoint = "/3/account/{}/gallery_favorites".format(account) return self._pagination(endpoint) def gallery_search(self, query): - endpoint = "gallery/search" + endpoint = "/3/gallery/search" params = {"q": query} return self._pagination(endpoint, params) def account_submissions(self, account): - endpoint = "account/{}/submissions".format(account) + endpoint = "/3/account/{}/submissions".format(account) return self._pagination(endpoint) def gallery_subreddit(self, subreddit): - endpoint = "gallery/r/{}".format(subreddit) + endpoint = "/3/gallery/r/{}".format(subreddit) return self._pagination(endpoint) def gallery_tag(self, tag): - endpoint = "gallery/t/{}".format(tag) + endpoint = "/3/gallery/t/{}".format(tag) return self._pagination(endpoint, key="items") + def image(self, image_hash): + endpoint = "/post/v1/media/" + image_hash + params = {"include": "media,tags,account"} + return self._call(endpoint, params) + def album(self, album_hash): - return self._call("album/" + album_hash) + endpoint = "/post/v1/albums/" + album_hash + params = {"include": "media,tags,account"} + return self._call(endpoint, params) - def image(self, image_hash): - return self._call("image/" + image_hash) + def gallery(self, gallery_hash): + endpoint = "/post/v1/posts/" + gallery_hash + return self._call(endpoint) def _call(self, endpoint, params=None): try: return self.extractor.request( - "https://api.imgur.com/3/" + endpoint, + "https://api.imgur.com" + endpoint, params=params, headers=self.headers, - ).json()["data"] + ).json() except exception.HttpError as exc: if exc.status != 403 or b"capacity" not in exc.response.content: raise @@ -388,7 +394,7 @@ class ImgurAPI(): num = 0 while True: - data = self._call("{}/{}".format(endpoint, num), params) + data = self._call("{}/{}".format(endpoint, num), params)["data"] if key: data = data[key] if not data: diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py new file mode 100644 index 0000000..4a43d57 --- /dev/null +++ b/gallery_dl/extractor/myhentaigallery.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract hentai-gallery from https://myhentaigallery.com/""" + +from .common import GalleryExtractor +from .. import text, exception + + +class MyhentaigalleryGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from myhentaigallery.com""" + category = "myhentaigallery" + directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") + pattern = (r"(?:https?://)?myhentaigallery\.com" + r"/gallery/(?:thumbnails|show)/(\d+)") + test = ( + ("https://myhentaigallery.com/gallery/thumbnails/16247", { + "pattern": r"https://images.myhentaigrid.com/imagesgallery/images" + r"/[^/]+/original/\d+\.jpg", + "keyword": { + "artist" : list, + "count" : 11, + "gallery_id": 16247, + "group" : list, + "parodies" : list, + "tags" : ["Giantess"], + "title" : "Attack Of The 50ft Woman 1", + }, + }), + ("https://myhentaigallery.com/gallery/show/16247/1"), + ) + root = "https://myhentaigallery.com" + + def __init__(self, match): + self.gallery_id = match.group(1) + url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id) + GalleryExtractor.__init__(self, match, url) + self.session.headers["Referer"] = url + + def metadata(self, page): + extr = text.extract_from(page) + split = text.split_html + + title = extr('<div class="comic-description">\n<h1>', '</h1>') + if not title: + raise exception.NotFoundError("gallery") + + return { + "title" : text.unescape(title), + "gallery_id": text.parse_int(self.gallery_id), + "tags" : split(extr('<div>\nCategories:', '</div>')), + "artist" : split(extr('<div>\nArtists:' , '</div>')), + "group" : split(extr('<div>\nGroups:' , '</div>')), + "parodies" : split(extr('<div>\nParodies:' , '</div>')), + } + + def images(self, page): + return [ + (text.unescape(text.extract(url, 'src="', '"')[0]).replace( + "/thumbnail/", "/original/"), None) + for url in text.extract_iter(page, 'class="comic-thumb"', '</div>') + ] diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py index 6862559..60ca1fb 100644 --- a/gallery_dl/extractor/plurk.py +++ b/gallery_dl/extractor/plurk.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,7 @@ """Extractors for https://www.plurk.com/""" from .common import Extractor, Message -from .. import text, extractor, exception +from .. import text, exception import datetime import time import json @@ -23,12 +23,9 @@ class PlurkExtractor(Extractor): def items(self): urls = self._urls_ex if self.config("comments", False) else self._urls - - yield Message.Version, 1 - with extractor.blacklist(("plurk",)): - for plurk in self.plurks(): - for url in urls(plurk): - yield Message.Queue, url, plurk + for plurk in self.plurks(): + for url in urls(plurk): + yield Message.Queue, url, plurk def plurks(self): """Return an iterable with all relevant 'plurk' objects""" diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py index ead5c35..4dd9d5c 100644 --- a/gallery_dl/extractor/recursive.py +++ b/gallery_dl/extractor/recursive.py @@ -9,7 +9,6 @@ """Recursive extractor""" from .common import Extractor, Message -from .. import extractor, util import requests import re @@ -23,17 +22,12 @@ class RecursiveExtractor(Extractor): }) def items(self): - blist = self.config( - "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS) - self.session.mount("file://", FileAdapter()) page = self.request(self.url.partition(":")[2]).text del self.session.adapters["file://"] - yield Message.Version, 1 - with extractor.blacklist(blist): - for match in re.finditer(r"https?://[^\s\"']+", page): - yield Message.Queue, match.group(0), {} + for match in re.finditer(r"https?://[^\s\"']+", page): + yield Message.Queue, match.group(0), {} class FileAdapter(requests.adapters.BaseAdapter): diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 0f02e8b..96be3d8 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -24,7 +24,7 @@ class RedgifsUserExtractor(RedgifsExtractor): directory_fmt = ("{category}", "{userName}") pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?&#]+)" test = ("https://www.redgifs.com/users/Natalifiction", { - "pattern": r"https://thcf\d+\.redgifs\.com/[A-Za-z]+\.mp4", + "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4", "count": ">= 100", }) @@ -38,7 +38,7 @@ class RedgifsSearchExtractor(RedgifsExtractor): directory_fmt = ("{category}", "Search", "{search}") pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?&#]+)" test = ("https://www.redgifs.com/gifs/browse/jav", { - "pattern": r"https://thcf\d+\.redgifs\.com/[A-Za-z]+\.mp4", + "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4", "range": "100-300", "count": "> 200", }) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 4d51851..185f33a 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -9,7 +9,7 @@ """Extract images from https://www.tumblr.com/""" from .common import Extractor, Message -from .. import text, oauth, extractor, exception +from .. import text, oauth, exception from datetime import datetime, timedelta import re @@ -41,7 +41,7 @@ BASE_PATTERN = ( class TumblrExtractor(Extractor): """Base class for tumblr extractors""" category = "tumblr" - directory_fmt = ("{category}", "{name}") + directory_fmt = ("{category}", "{blog_name}") filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" cookiedomain = None @@ -69,7 +69,6 @@ class TumblrExtractor(Extractor): def items(self): blog = None - yield Message.Version, 1 for post in self.posts(): if self.date_min > post["timestamp"]: @@ -79,10 +78,10 @@ class TumblrExtractor(Extractor): if not blog: blog = self.api.info(self.blog) blog["uuid"] = self.blog - yield Message.Directory, blog.copy() if self.avatar: url = self.api.avatar(self.blog) + yield Message.Directory, {"blog": blog} yield self._prepare_avatar(url, post.copy(), blog) reblog = "reblogged_from_id" in post @@ -90,13 +89,13 @@ class TumblrExtractor(Extractor): continue post["reblogged"] = reblog + if "trail" in post: + del post["trail"] post["blog"] = blog post["date"] = text.parse_timestamp(post["timestamp"]) + yield Message.Directory, post post["num"] = 0 - if "trail" in post: - del post["trail"] - if "photos" in post: # type "photo" or "link" photos = post["photos"] del post["photos"] @@ -129,12 +128,9 @@ class TumblrExtractor(Extractor): if self.external: # external links post["extension"] = None - with extractor.blacklist(("tumblr",)): - for key in ("permalink_url", "url"): - url = post.get(key) - if url: - yield Message.Queue, url, post - break + url = post.get("permalink_url") or post.get("url") + if url: + yield Message.Queue, url, post def posts(self): """Return an iterable containing all relevant posts""" @@ -316,7 +312,7 @@ class TumblrTagExtractor(TumblrExtractor): class TumblrLikesExtractor(TumblrExtractor): """Extractor for images from a tumblr-user's liked posts""" subcategory = "likes" - directory_fmt = ("{category}", "{name}", "likes") + directory_fmt = ("{category}", "{blog_name}", "likes") archive_fmt = "f_{blog[name]}_{id}_{num}" pattern = BASE_PATTERN + r"/likes" test = ("http://mikf123.tumblr.com/likes", { diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 71f14dc..236a001 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -245,15 +245,24 @@ class TwitterExtractor(Extractor): class TwitterTimelineExtractor(TwitterExtractor): """Extractor for all images from a user's timeline""" subcategory = "timeline" - pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/?(?:$|[?#])" + pattern = BASE_PATTERN + \ + r"/(?!search)(?:([^/?&#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))" test = ( ("https://twitter.com/supernaturepics", { "range": "1-40", "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", }), ("https://mobile.twitter.com/supernaturepics?p=i"), + ("https://www.twitter.com/id:2976459548"), + ("https://twitter.com/intent/user?user_id=2976459548"), ) + def __init__(self, match): + TwitterExtractor.__init__(self, match) + uid = match.group(2) + if uid: + self.user = "id:" + uid + def tweets(self): return TwitterAPI(self).timeline_profile(self.user) @@ -268,6 +277,7 @@ class TwitterMediaExtractor(TwitterExtractor): "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", }), ("https://mobile.twitter.com/supernaturepics/media#t"), + ("https://www.twitter.com/id:2976459548/media"), ) def tweets(self): @@ -450,18 +460,18 @@ class TwitterAPI(): return tweets def timeline_profile(self, screen_name): - user = self.user_by_screen_name(screen_name) - endpoint = "2/timeline/profile/{}.json".format(user["rest_id"]) + user_id = self._user_id_by_screen_name(screen_name) + endpoint = "2/timeline/profile/{}.json".format(user_id) return self._pagination(endpoint) def timeline_media(self, screen_name): - user = self.user_by_screen_name(screen_name) - endpoint = "2/timeline/media/{}.json".format(user["rest_id"]) + user_id = self._user_id_by_screen_name(screen_name) + endpoint = "2/timeline/media/{}.json".format(user_id) return self._pagination(endpoint) def timeline_favorites(self, screen_name): - user = self.user_by_screen_name(screen_name) - endpoint = "2/timeline/favorites/{}.json".format(user["rest_id"]) + user_id = self._user_id_by_screen_name(screen_name) + endpoint = "2/timeline/favorites/{}.json".format(user_id) return self._pagination(endpoint) def timeline_bookmark(self): @@ -490,6 +500,11 @@ class TwitterAPI(): except KeyError: raise exception.NotFoundError("user") + def _user_id_by_screen_name(self, screen_name): + if screen_name.startswith("id:"): + return screen_name[3:] + return self.user_by_screen_name(screen_name)["rest_id"] + @cache(maxage=3600) def _guest_token(self): endpoint = "1.1/guest/activate.json" diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index 0ada118..4efc92c 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -93,7 +93,7 @@ class WikiartArtworksExtractor(WikiartExtractor): directory_fmt = ("{category}", "Artworks by {group!c}", "{type}") pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)" test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", { - "url": "228426a9d32b5bba9d659944c6b0ba73883af33f", + "url": "36e054fcb3363b7f085c81f4778e6db3994e56a3", }) def __init__(self, match): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 163c3c6..7d08b86 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -59,6 +59,9 @@ class Job(): def run(self): """Execute or run the job""" + sleep = self.extractor.config("sleep-extractor") + if sleep: + time.sleep(sleep) try: log = self.extractor.log for msg in self.extractor: @@ -197,6 +200,7 @@ class DownloadJob(Job): def __init__(self, url, parent=None): Job.__init__(self, url, parent) self.log = self.get_logger("download") + self.blacklist = None self.archive = None self.sleep = None self.downloaders = {} @@ -224,7 +228,14 @@ class DownloadJob(Job): for pp in postprocessors: pp.prepare(pathfmt) - if pathfmt.exists(archive): + if archive and kwdict in archive: + pathfmt.fix_extension() + self.handle_skip() + return + + if pathfmt.exists(): + if archive: + archive.add(kwdict) self.handle_skip() return @@ -248,6 +259,8 @@ class DownloadJob(Job): return if not pathfmt.temppath: + if archive: + archive.add(kwdict) self.handle_skip() return @@ -299,6 +312,12 @@ class DownloadJob(Job): extr = kwdict["_extractor"].from_url(url) else: extr = extractor.find(url) + if extr: + if self.blacklist is None: + self.blacklist = self._build_blacklist() + if extr.category in self.blacklist: + extr = None + if extr: self.status |= self.__class__(extr, self).run() else: @@ -388,6 +407,8 @@ class DownloadJob(Job): if archive: path = util.expand_path(archive) try: + if "{" in path: + path = util.Formatter(path).format_map(kwdict) self.archive = util.DownloadArchive(path, self.extractor) except Exception as exc: self.extractor.log.warning( @@ -396,7 +417,7 @@ class DownloadJob(Job): else: self.extractor.log.debug("Using download archive '%s'", path) - postprocessors = config("postprocessors") + postprocessors = self.extractor.config_accumulate("postprocessors") if postprocessors: pp_log = self.get_logger("postprocessor") pp_list = [] @@ -426,6 +447,25 @@ class DownloadJob(Job): self.extractor.log.debug( "Active postprocessor modules: %s", pp_list) + def _build_blacklist(self): + wlist = self.extractor.config("whitelist") + if wlist: + if isinstance(wlist, str): + wlist = wlist.split(",") + blist = {e.category for e in extractor._list_classes()} + blist.difference_update(wlist) + return blist + + blist = self.extractor.config("blacklist") + if blist: + if isinstance(blist, str): + blist = blist.split(",") + blist = set(blist) + else: + blist = {self.extractor.category} + blist |= util.SPECIAL_EXTRACTORS + return blist + class SimulationJob(DownloadJob): """Simulate the extraction process without downloading anything""" @@ -549,6 +589,10 @@ class DataJob(Job): self.filter = (lambda x: x) if private else util.filter_dict def run(self): + sleep = self.extractor.config("sleep-extractor") + if sleep: + time.sleep(sleep) + # collect data try: for msg in self.extractor: diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 5b99bee..2a48c87 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -209,11 +209,26 @@ def build_parser(): dest="sleep", metavar="SECONDS", type=float, action=ConfigAction, help="Number of seconds to sleep before each download", ) + downloader.add_argument( + "--filesize-min", + dest="filesize-min", metavar="SIZE", action=ConfigAction, + help="Do not download files smaller than SIZE (e.g. 500k or 2.5M)", + ) + downloader.add_argument( + "--filesize-max", + dest="filesize-max", metavar="SIZE", action=ConfigAction, + help="Do not download files larger than SIZE (e.g. 500k or 2.5M)", + ) downloader.add_argument( "--no-part", dest="part", nargs=0, action=ConfigConstAction, const=False, help="Do not use .part files", ) + downloader.add_argument( + "--no-skip", + dest="skip", nargs=0, action=ConfigConstAction, const=False, + help="Do not skip downloads; overwrite existing files", + ) downloader.add_argument( "--no-mtime", dest="mtime", nargs=0, action=ConfigConstAction, const=False, diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index 6970e95..a6e5bc3 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -33,23 +33,23 @@ class ZipPP(PostProcessor): algorithm) algorithm = "store" + self.zfile = None self.path = job.pathfmt.realdirectory - args = (self.path[:-1] + ext, "a", - self.COMPRESSION_ALGORITHMS[algorithm], True) + self.args = (self.path[:-1] + ext, "a", + self.COMPRESSION_ALGORITHMS[algorithm], True) if options.get("mode") == "safe": self.run = self._write_safe - self.zfile = None - self.args = args else: self.run = self._write - self.zfile = zipfile.ZipFile(*args) def _write(self, pathfmt, zfile=None): # 'NameToInfo' is not officially documented, but it's available # for all supported Python versions and using it directly is a lot # faster than calling getinfo() if zfile is None: + if self.zfile is None: + self.zfile = zipfile.ZipFile(*self.args) zfile = self.zfile if pathfmt.filename not in zfile.NameToInfo: zfile.write(pathfmt.temppath, pathfmt.filename) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index f688fa6..dbebfce 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -646,7 +646,7 @@ class Formatter(): obj = kwdict[key] for func in funcs: obj = func(obj) - if obj is not None: + if obj: break except Exception: pass @@ -769,10 +769,8 @@ class PathFormat(): """Open file and return a corresponding file object""" return open(self.temppath, mode) - def exists(self, archive=None): - """Return True if the file exists on disk or in 'archive'""" - if archive and self.kwdict in archive: - return self.fix_extension() + def exists(self): + """Return True if the file exists on disk""" if self.extension and os.path.exists(self.realpath): return self.check_file() return False diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 9af9a43..d7e2737 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.14.5" +__version__ = "1.15.0" diff --git a/test/test_config.py b/test/test_config.py index a9cefd4..7cbb12b 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -96,6 +96,28 @@ class TestConfig(unittest.TestCase): test(("Z1", "Z2", "A1", "A2", "A3"), 999, 8) test((), 9) + def test_accumulate(self): + self.assertEqual(config.accumulate((), "l"), []) + + config.set(() , "l", [5, 6]) + config.set(("c",) , "l", [3, 4]) + config.set(("c", "c"), "l", [1, 2]) + self.assertEqual( + config.accumulate((), "l") , [5, 6]) + self.assertEqual( + config.accumulate(("c",), "l") , [3, 4, 5, 6]) + self.assertEqual( + config.accumulate(("c", "c"), "l"), [1, 2, 3, 4, 5, 6]) + + config.set(("c",), "l", None) + config.unset(("c", "c"), "l") + self.assertEqual( + config.accumulate((), "l") , [5, 6]) + self.assertEqual( + config.accumulate(("c",), "l") , [5, 6]) + self.assertEqual( + config.accumulate(("c", "c"), "l"), [5, 6]) + def test_set(self): config.set(() , "c", [1, 2, 3]) config.set(("b",) , "c", [1, 2, 3]) diff --git a/test/test_downloader.py b/test/test_downloader.py index 5d73a4c..99cfb62 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -179,6 +179,9 @@ class TestHTTPDownloader(TestDownloaderBase): server = http.server.HTTPServer(("", port), HttpRequestHandler) threading.Thread(target=server.serve_forever, daemon=True).start() + def tearDown(self): + self.downloader.minsize = self.downloader.maxsize = None + def test_http_download(self): self._run_test(self._jpg, None, DATA_JPG, "jpg", "jpg") self._run_test(self._png, None, DATA_PNG, "png", "png") @@ -199,6 +202,20 @@ class TestHTTPDownloader(TestDownloaderBase): self._run_test(self._png, None, DATA_PNG, "gif", "png") self._run_test(self._gif, None, DATA_GIF, "jpg", "gif") + def test_http_filesize_min(self): + pathfmt = self._prepare_destination(None, extension=None) + self.downloader.minsize = 100 + with self.assertLogs(self.downloader.log, "WARNING"): + success = self.downloader.download(self._gif, pathfmt) + self.assertFalse(success) + + def test_http_filesize_max(self): + pathfmt = self._prepare_destination(None, extension=None) + self.downloader.maxsize = 100 + with self.assertLogs(self.downloader.log, "WARNING"): + success = self.downloader.download(self._jpg, pathfmt) + self.assertFalse(success) + class TestTextDownloader(TestDownloaderBase): diff --git a/test/test_extractor.py b/test/test_extractor.py index 043bd52..162edc0 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -75,30 +75,6 @@ class TestExtractorModule(unittest.TestCase): self.assertEqual(classes[0], FakeExtractor) self.assertIsInstance(extractor.find(uri), FakeExtractor) - def test_blacklist(self): - link_uri = "https://example.org/file.jpg" - test_uri = "test:" - fake_uri = "fake:" - - self.assertIsInstance(extractor.find(link_uri), DirectlinkExtractor) - self.assertIsInstance(extractor.find(test_uri), Extractor) - self.assertIsNone(extractor.find(fake_uri)) - - with extractor.blacklist(["directlink"]): - self.assertIsNone(extractor.find(link_uri)) - self.assertIsInstance(extractor.find(test_uri), Extractor) - self.assertIsNone(extractor.find(fake_uri)) - - with extractor.blacklist([], [DirectlinkExtractor, FakeExtractor]): - self.assertIsNone(extractor.find(link_uri)) - self.assertIsInstance(extractor.find(test_uri), Extractor) - self.assertIsNone(extractor.find(fake_uri)) - - with extractor.blacklist(["test"], [DirectlinkExtractor]): - self.assertIsNone(extractor.find(link_uri)) - self.assertIsNone(extractor.find(test_uri)) - self.assertIsNone(extractor.find(fake_uri)) - def test_from_url(self): for uri in self.VALID_URIS: cls = extractor.find(uri).__class__ diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 5da3131..ff98477 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -342,9 +342,20 @@ class ZipTest(BasePostprocessorTest): self.assertEqual(pp.path, self.pathfmt.realdirectory) self.assertEqual(pp.run, pp._write) self.assertEqual(pp.delete, True) - self.assertFalse(hasattr(pp, "args")) - self.assertEqual(pp.zfile.compression, zipfile.ZIP_STORED) - self.assertTrue(pp.zfile.filename.endswith("/test.zip")) + self.assertEqual(pp.args, ( + pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True, + )) + self.assertTrue(pp.args[0].endswith("/test.zip")) + + def test_zip_safe(self): + pp = self._create({"mode": "safe"}) + self.assertEqual(pp.path, self.pathfmt.realdirectory) + self.assertEqual(pp.run, pp._write_safe) + self.assertEqual(pp.delete, True) + self.assertEqual(pp.args, ( + pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True, + )) + self.assertTrue(pp.args[0].endswith("/test.zip")) def test_zip_options(self): pp = self._create({ @@ -353,22 +364,13 @@ class ZipTest(BasePostprocessorTest): "extension": "cbz", }) self.assertEqual(pp.delete, False) - self.assertEqual(pp.zfile.compression, zipfile.ZIP_DEFLATED) - self.assertTrue(pp.zfile.filename.endswith("/test.cbz")) - - def test_zip_safe(self): - pp = self._create({"mode": "safe"}) - self.assertEqual(pp.delete, True) - self.assertEqual(pp.path, self.pathfmt.realdirectory) - self.assertEqual(pp.run, pp._write_safe) self.assertEqual(pp.args, ( - pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True, + pp.path[:-1] + ".cbz", "a", zipfile.ZIP_DEFLATED, True, )) - self.assertTrue(pp.args[0].endswith("/test.zip")) + self.assertTrue(pp.args[0].endswith("/test.cbz")) def test_zip_write(self): pp = self._create() - nti = pp.zfile.NameToInfo with tempfile.NamedTemporaryFile("w", dir=self.dir.name) as file: file.write("foobar\n") @@ -382,6 +384,7 @@ class ZipTest(BasePostprocessorTest): pp.prepare(self.pathfmt) pp.run(self.pathfmt) + nti = pp.zfile.NameToInfo self.assertEqual(len(nti), i+1) self.assertIn(name, nti) diff --git a/test/test_results.py b/test/test_results.py index fbbb79c..1380f31 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -31,10 +31,11 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "dokireader", + "8kun", + "dynastyscans", + "fallenangels", "imagevenue", "photobucket", - "worldthree", } diff --git a/test/test_util.py b/test/test_util.py index 5fbaa4e..1515814 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -339,7 +339,7 @@ class TestFormatter(unittest.TestCase): self._run_test("{z|a!C:RH/C/}", "Cello World") self._run_test("{z|y|x:?</>/}", "") - self._run_test("{d[c]|d[b]|d[a]}", "0") + self._run_test("{d[c]|d[b]|d[a]}", "foo") self._run_test("{d[a]|d[b]|d[c]}", "foo") self._run_test("{d[z]|d[y]|d[x]}", "None") -- cgit v1.2.3