summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-09-28 18:27:46 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2020-09-28 18:27:46 -0400
commit9074eee175f76b824fbb6695d56426105191c51c (patch)
tree2294be463d325d7092e600d88f160027c437086d
parent261c8c2bc74969e2242a153297895684742b6995 (diff)
New upstream version 1.15.0.upstream/1.15.0
-rw-r--r--CHANGELOG.md28
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/completion/_gallery-dl3
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/man/gallery-dl.111
-rw-r--r--data/man/gallery-dl.conf.584
-rw-r--r--docs/gallery-dl.conf6
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/config.py19
-rw-r--r--gallery_dl/downloader/http.py29
-rw-r--r--gallery_dl/downloader/ytdl.py4
-rw-r--r--gallery_dl/extractor/500px.py2
-rw-r--r--gallery_dl/extractor/__init__.py19
-rw-r--r--gallery_dl/extractor/aryion.py4
-rw-r--r--gallery_dl/extractor/common.py23
-rw-r--r--gallery_dl/extractor/danbooru.py7
-rw-r--r--gallery_dl/extractor/deviantart.py78
-rw-r--r--gallery_dl/extractor/exhentai.py5
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/foolslide.py66
-rw-r--r--gallery_dl/extractor/furaffinity.py16
-rw-r--r--gallery_dl/extractor/hbrowse.py4
-rw-r--r--gallery_dl/extractor/hitomi.py2
-rw-r--r--gallery_dl/extractor/imgur.py242
-rw-r--r--gallery_dl/extractor/myhentaigallery.py65
-rw-r--r--gallery_dl/extractor/plurk.py13
-rw-r--r--gallery_dl/extractor/recursive.py10
-rw-r--r--gallery_dl/extractor/redgifs.py4
-rw-r--r--gallery_dl/extractor/tumblr.py24
-rw-r--r--gallery_dl/extractor/twitter.py29
-rw-r--r--gallery_dl/extractor/wikiart.py2
-rw-r--r--gallery_dl/job.py48
-rw-r--r--gallery_dl/option.py15
-rw-r--r--gallery_dl/postprocessor/zip.py10
-rw-r--r--gallery_dl/util.py8
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_config.py22
-rw-r--r--test/test_downloader.py17
-rw-r--r--test/test_extractor.py24
-rw-r--r--test/test_postprocessor.py31
-rw-r--r--test/test_results.py5
-rw-r--r--test/test_util.py2
44 files changed, 644 insertions, 366 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b38c9c8..b368535 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
# Changelog
+## 1.15.0 - 2020-09-20
+### Additions
+- [deviantart] support watchers-only/paid deviations ([#995](https://github.com/mikf/gallery-dl/issues/995))
+- [myhentaigallery] add gallery extractor ([#1001](https://github.com/mikf/gallery-dl/issues/1001))
+- [twitter] support specifying users by ID ([#980](https://github.com/mikf/gallery-dl/issues/980))
+- [twitter] support `/intent/user?user_id=…` URLs ([#980](https://github.com/mikf/gallery-dl/issues/980))
+- add `--no-skip` command-line option ([#986](https://github.com/mikf/gallery-dl/issues/986))
+- add `blacklist` and `whitelist` options ([#492](https://github.com/mikf/gallery-dl/issues/492), [#844](https://github.com/mikf/gallery-dl/issues/844))
+- add `filesize-min` and `filesize-max` options ([#780](https://github.com/mikf/gallery-dl/issues/780))
+- add `sleep-extractor` and `sleep-request` options ([#788](https://github.com/mikf/gallery-dl/issues/788))
+- write skipped files to archive ([#550](https://github.com/mikf/gallery-dl/issues/550))
+### Changes
+- [exhentai] update wait time before original image downloads ([#978](https://github.com/mikf/gallery-dl/issues/978))
+- [imgur] use new API endpoints for image/album data
+- [tumblr] create directories for each post ([#965](https://github.com/mikf/gallery-dl/issues/965))
+- support format string replacement fields in download archive paths ([#985](https://github.com/mikf/gallery-dl/issues/985))
+- reduce wait time growth rate for HTTP retries from exponential to linear
+### Fixes
+- [500px] update query hash
+- [aryion] improve post ID extraction ([#981](https://github.com/mikf/gallery-dl/issues/981), [#982](https://github.com/mikf/gallery-dl/issues/982))
+- [danbooru] handle posts without `id` ([#1004](https://github.com/mikf/gallery-dl/issues/1004))
+- [furaffinity] update download URL extraction ([#988](https://github.com/mikf/gallery-dl/issues/988))
+- [imgur] fix image/album detection for galleries
+- [postprocessor:zip] defer zip file creation ([#968](https://github.com/mikf/gallery-dl/issues/968))
+### Removals
+- [jaiminisbox] remove extractors
+- [worldthree] remove extractors
+
## 1.14.5 - 2020-08-30
### Additions
- [aryion] add username/password support ([#960](https://github.com/mikf/gallery-dl/issues/960))
diff --git a/PKG-INFO b/PKG-INFO
index 644b647..19b7f04 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.14.5
+Version: 1.15.0
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.5/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.5/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -311,7 +311,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.5.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 6f5c4bb..ca01764 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.5/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.5/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -300,7 +300,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.5.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index b5074d2..5194312 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -28,7 +28,10 @@ _arguments -C -S \
{-A,--abort}'[Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist]':'<n>' \
--http-timeout'[Timeout for HTTP connections (default: 30.0)]':'<seconds>' \
--sleep'[Number of seconds to sleep before each download]':'<seconds>' \
+--filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'<size>' \
+--filesize-max'[Do not download files larger than SIZE (e.g. 500k or 2.5M)]':'<size>' \
--no-part'[Do not use .part files]' \
+--no-skip'[Do not skip downloads; overwrite existing files]' \
--no-mtime'[Do not set file modification times according to Last-Modified HTTP response headers]' \
--no-download'[Do not download any files]' \
--no-check-certificate'[Disable HTTPS certificate validation]' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 921d601..19cb39f 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 2437195..2a84a06 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-08-30" "1.14.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-09-20" "1.15.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -86,9 +86,18 @@ Timeout for HTTP connections (default: 30.0)
.B "\-\-sleep" \f[I]SECONDS\f[]
Number of seconds to sleep before each download
.TP
+.B "\-\-filesize\-min" \f[I]SIZE\f[]
+Do not download files smaller than SIZE (e.g. 500k or 2.5M)
+.TP
+.B "\-\-filesize\-max" \f[I]SIZE\f[]
+Do not download files larger than SIZE (e.g. 500k or 2.5M)
+.TP
.B "\-\-no\-part"
Do not use .part files
.TP
+.B "\-\-no\-skip"
+Do not skip downloads; overwrite existing files
+.TP
.B "\-\-no\-mtime"
Do not set file modification times according to Last-Modified HTTP response headers
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index a5b1f4d..e37135e 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-08-30" "1.14.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-09-20" "1.15.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -253,6 +253,28 @@ filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.)
.IP "Description:" 4
Number of seconds to sleep before each download.
+.SS extractor.*.sleep-extractor
+.IP "Type:" 6
+\f[I]float\f[]
+
+.IP "Default:" 9
+\f[I]0\f[]
+
+.IP "Description:" 4
+Number of seconds to sleep before handling an input URL,
+i.e. before starting a new extractor.
+
+.SS extractor.*.sleep-request
+.IP "Type:" 6
+\f[I]float\f[]
+
+.IP "Default:" 9
+\f[I]0\f[]
+
+.IP "Description:" 4
+Minimal time interval in seconds between each HTTP request
+during data extraction.
+
.SS extractor.*.username & .password
.IP "Type:" 6
\f[I]string\f[]
@@ -436,6 +458,21 @@ Transfer an extractor's (sub)category values to all child
extractors spawned by it, to let them inherit their parent's
config options.
+.SS extractor.*.blacklist & .whitelist
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["oauth", "recursive", "test"]\f[] + current extractor category
+
+.IP "Description:" 4
+A list of extractor categories to ignore (or allow)
+when spawning child extractors for unknown URLs,
+e.g. from \f[I]reddit\f[] or \f[I]plurk\f[].
+
+Note: Any \f[I]blacklist\f[] setting will automatically include
+\f[I]"oauth"\f[], \f[I]"recursive"\f[], and \f[I]"test"\f[].
+
.SS extractor.*.archive
.IP "Type:" 6
\f[I]Path\f[]
@@ -443,15 +480,23 @@ config options.
.IP "Default:" 9
\f[I]null\f[]
+.IP "Example:" 4
+"$HOME/.archives/{category}.sqlite3"
+
.IP "Description:" 4
File to store IDs of downloaded files in. Downloads of files
-already recorded in this archive file will be skipped_.
+already recorded in this archive file will be
+\f[I]skipped <extractor.*.skip_>\f[].
The resulting archive file is not a plain text file but an SQLite3
database, as either lookup operations are significantly faster or
memory requirements are significantly lower when the
amount of stored IDs gets reasonably large.
+Note: archive paths support regular \f[I]format string\f[] replacements,
+but be aware that using external inputs for building local paths
+may pose a security risk.
+
.SS extractor.*.archive-format
.IP "Type:" 6
\f[I]string\f[]
@@ -839,9 +884,6 @@ or whenever your \f[I]cache file <cache.file_>\f[] is deleted or cleared.
.IP "Description:" 4
Minimum wait time in seconds before API requests.
-Note: This value will internally be rounded up
-to the next power of 2.
-
.SS extractor.exhentai.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -1221,17 +1263,6 @@ Controls how to handle redirects to CAPTCHA pages.
.br
* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait.
-.SS extractor.recursive.blacklist
-.IP "Type:" 6
-\f[I]list\f[] of \f[I]strings\f[]
-
-.IP "Default:" 9
-\f[I]["directlink", "oauth", "recursive", "test"]\f[]
-
-.IP "Description:" 4
-A list of extractor categories which should be ignored when using
-the \f[I]recursive\f[] extractor.
-
.SS extractor.reddit.comments
.IP "Type:" 6
\f[I]integer\f[]
@@ -1594,6 +1625,24 @@ Reverse the order of chapter URLs extracted from manga pages.
.IP "Description:" 4
Enable/Disable this downloader module.
+.SS downloader.*.filesize-min & .filesize-max
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Example:" 4
+"32000", "500k", "2.5M"
+
+.IP "Description:" 4
+Minimum/Maximum allowed file size in bytes.
+Any file smaller/larger than this limit will not be downloaded.
+
+Possible values are valid integer or floating-point numbers
+optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[].
+These suffixes are case-insensitive.
+
.SS downloader.*.mtime
.IP "Type:" 6
\f[I]bool\f[]
@@ -2283,6 +2332,9 @@ Submission Policy, and Terms of Service.
application and put them in your configuration file
as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[]
.br
+* clear your \f[I]cache <cache.file_>\f[] (\f[I]--clear-cache\f[]) to delete
+the \f[I]access-token\f[] from the previous \f[I]client-id\f[]
+.br
* get a new \f[I]refresh-token <extractor.deviantart.refresh-token_>\f[]
if necessary
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 56147e9..2d7b0ff 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -119,10 +119,6 @@
{
"captcha": "stop"
},
- "recursive":
- {
- "blacklist": ["directlink", "oauth", "recursive", "test"]
- },
"reddit":
{
"comments": 0,
@@ -189,6 +185,8 @@
"downloader":
{
+ "filesize-min": null,
+ "filesize-max": null,
"part": true,
"part-directory": null,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index a2fafb1..fc9f14b 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.14.5
+Version: 1.15.0
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.5/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.5/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -311,7 +311,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.5.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 56c9245..648e273 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -108,6 +108,7 @@ gallery_dl/extractor/mangastream.py
gallery_dl/extractor/mangoxo.py
gallery_dl/extractor/mastodon.py
gallery_dl/extractor/message.py
+gallery_dl/extractor/myhentaigallery.py
gallery_dl/extractor/myportfolio.py
gallery_dl/extractor/naver.py
gallery_dl/extractor/newgrounds.py
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index a3c71cd..e0a5459 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -140,6 +140,25 @@ def interpolate_common(common, paths, key, default=None, *, conf=_config):
return default
+def accumulate(path, key, *, conf=_config):
+ """Accumulate the values of 'key' along 'path'"""
+ result = []
+ try:
+ if key in conf:
+ value = conf[key]
+ if value:
+ result.extend(value)
+ for p in path:
+ conf = conf[p]
+ if key in conf:
+ value = conf[key]
+ if value:
+ result[:0] = value
+ except Exception:
+ pass
+ return result
+
+
def set(path, key, value, *, conf=_config):
"""Set the value of property 'key' for this session"""
for p in path:
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 6644827..0e67330 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -31,6 +31,8 @@ class HttpDownloader(DownloaderBase):
self.downloading = False
self.adjust_extension = self.config("adjust-extensions", True)
+ self.minsize = self.config("filesize-min")
+ self.maxsize = self.config("filesize-max")
self.retries = self.config("retries", extractor._retries)
self.timeout = self.config("timeout", extractor._timeout)
self.verify = self.config("verify", extractor._verify)
@@ -39,6 +41,16 @@ class HttpDownloader(DownloaderBase):
if self.retries < 0:
self.retries = float("inf")
+ if self.minsize:
+ minsize = text.parse_bytes(self.minsize)
+ if not minsize:
+ self.log.warning("Invalid minimum filesize (%r)", self.minsize)
+ self.minsize = minsize
+ if self.maxsize:
+ maxsize = text.parse_bytes(self.maxsize)
+ if not maxsize:
+ self.log.warning("Invalid maximum filesize (%r)", self.maxsize)
+ self.maxsize = maxsize
if self.rate:
rate = text.parse_bytes(self.rate)
if rate:
@@ -75,7 +87,7 @@ class HttpDownloader(DownloaderBase):
self.log.warning("%s (%s/%s)", msg, tries, self.retries+1)
if tries > self.retries:
return False
- time.sleep(min(2 ** (tries-1), 1800))
+ time.sleep(tries)
tries += 1
headers = {}
@@ -116,7 +128,20 @@ class HttpDownloader(DownloaderBase):
continue
self.log.warning(msg)
return False
- size = text.parse_int(size)
+
+ # check filesize
+ size = text.parse_int(size, None)
+ if size is not None:
+ if self.minsize and size < self.minsize:
+ self.log.warning(
+ "File size smaller than allowed minimum (%s < %s)",
+ size, self.minsize)
+ return False
+ if self.maxsize and size > self.maxsize:
+ self.log.warning(
+ "File size larger than allowed maximum (%s > %s)",
+ size, self.maxsize)
+ return False
# set missing filename extension
if not pathfmt.extension:
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index c3dd863..8086b5d 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -31,6 +31,10 @@ class YoutubeDLDownloader(DownloaderBase):
"nopart": not self.part,
"updatetime": self.config("mtime", True),
"proxy": extractor.session.proxies.get("http"),
+ "min_filesize": text.parse_bytes(
+ self.config("filesize-min"), None),
+ "max_filesize": text.parse_bytes(
+ self.config("filesize-max"), None),
}
options.update(self.config("raw-options") or {})
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 96cb021..4dc4f0d 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -153,7 +153,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
def metadata(self):
user = self._request_graphql(
"ProfileRendererQuery", {"username": self.user_name},
- "db1dba2cb7b7e94916d1005db16fea1a39d6211437b691c4de2f1a606c21c5fb",
+ "4d02ff5c13927a3ac73b3eef306490508bc765956940c31051468cf30402a503",
)["profile"]
self.user_id = str(user["legacyId"])
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6f8867c..53bc726 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -74,6 +74,7 @@ modules = [
"mangareader",
"mangastream",
"mangoxo",
+ "myhentaigallery",
"myportfolio",
"naver",
"newgrounds",
@@ -140,7 +141,7 @@ def find(url):
"""Find a suitable extractor for the given URL"""
for cls in _list_classes():
match = cls.pattern.match(url)
- if match and cls not in _blacklist:
+ if match:
return cls(match)
return None
@@ -169,26 +170,10 @@ def extractors():
)
-class blacklist():
- """Context Manager to blacklist extractor modules"""
- def __init__(self, categories, extractors=None):
- self.extractors = extractors or []
- for cls in _list_classes():
- if cls.category in categories:
- self.extractors.append(cls)
-
- def __enter__(self):
- _blacklist.update(self.extractors)
-
- def __exit__(self, etype, value, traceback):
- _blacklist.clear()
-
-
# --------------------------------------------------------------------
# internals
_cache = []
-_blacklist = set()
_module_iter = iter(modules)
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 2e4c4d4..374a9fc 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -30,6 +30,7 @@ class AryionExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.recursive = True
+ self._needle = "class='gallery-item' id='"
def login(self):
username, password = self._get_auth_info()
@@ -73,7 +74,7 @@ class AryionExtractor(Extractor):
while True:
page = self.request(url).text
yield from text.extract_iter(
- page, "class='thumb' href='/g4/view/", "'")
+ page, self._needle, "'")
pos = page.find("Next &gt;&gt;")
if pos < 0:
@@ -180,6 +181,7 @@ class AryionGalleryExtractor(AryionExtractor):
url = "{}/g4/gallery/{}".format(self.root, self.user)
return self._pagination(url)
else:
+ self._needle = "class='thumb' href='/g4/view/"
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
return util.advance(self._pagination(url), self.offset)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index e6c0968..357deac 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -31,6 +31,8 @@ class Extractor():
cookiedomain = ""
root = ""
test = None
+ _request_last = 0
+ _request_interval = 0
def __init__(self, match):
self.session = requests.Session()
@@ -40,10 +42,14 @@ class Extractor():
self._cookiefile = None
self._cookiejar = self.session.cookies
self._parentdir = ""
+
+ self._cfgpath = ("extractor", self.category, self.subcategory)
self._write_pages = self.config("write-pages", False)
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
+ self._request_interval = self.config(
+ "sleep-request", self._request_interval)
if self._retries < 0:
self._retries = float("inf")
@@ -69,8 +75,10 @@ class Extractor():
return 0
def config(self, key, default=None):
- return config.interpolate(
- ("extractor", self.category, self.subcategory), key, default)
+ return config.interpolate(self._cfgpath, key, default)
+
+ def config_accumulate(self, key):
+ return config.accumulate(self._cfgpath, key)
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
@@ -81,6 +89,13 @@ class Extractor():
kwargs.setdefault("verify", self._verify)
response = None
+ if self._request_interval:
+ seconds = (self._request_interval -
+ (time.time() - Extractor._request_last))
+ if seconds > 0:
+ self.log.debug("Sleeping for %.5s seconds", seconds)
+ time.sleep(seconds)
+
while True:
try:
response = session.request(method, url, **kwargs)
@@ -119,11 +134,13 @@ class Extractor():
msg = "'{} {}' for '{}'".format(code, reason, url)
if code < 500 and code != 429 and code != 430:
break
+ finally:
+ Extractor._request_last = time.time()
self.log.debug("%s (%s/%s)", msg, tries, retries+1)
if tries > retries:
break
- time.sleep(min(2 ** (tries-1), 1800))
+ time.sleep(tries)
tries += 1
raise exception.HttpError(msg, response)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index e0edf89..1ebaf5b 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -93,7 +93,12 @@ class DanbooruExtractor(SharedConfigMixin, Extractor):
if pagenum:
params["page"] += 1
else:
- params["page"] = "b{}".format(posts[-1]["id"])
+ for post in reversed(posts):
+ if "id" in post:
+ params["page"] = "b{}".format(post["id"])
+ break
+ else:
+ return
class DanbooruTagExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 73ef20d..a0f4d1c 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -14,7 +14,6 @@ from ..cache import cache, memcache
import collections
import itertools
import mimetypes
-import math
import time
import re
@@ -55,6 +54,7 @@ class DeviantartExtractor(Extractor):
self._update_content = self._update_content_image
self.original = True
+ self._premium_cache = {}
self.commit_journal = {
"html": self._commit_journal_html,
"text": self._commit_journal_text,
@@ -66,6 +66,8 @@ class DeviantartExtractor(Extractor):
def items(self):
self.api = DeviantartOAuthAPI(self)
+ if not self.api.refresh_token_key:
+ self._fetch_premium = self._fetch_premium_notoken
if self.user:
profile = self.api.user_profile(self.user)
@@ -83,6 +85,10 @@ class DeviantartExtractor(Extractor):
yield Message.Queue, url, data
continue
+ if "premium_folder_data" in deviation:
+ if not self._fetch_premium(deviation):
+ continue
+
self.prepare(deviation)
yield Message.Directory, deviation
@@ -261,7 +267,9 @@ class DeviantartExtractor(Extractor):
return [(url + folder["name"], folder) for folder in folders]
def _update_content_default(self, deviation, content):
- content.update(self.api.deviation_download(deviation["deviationid"]))
+ public = "premium_folder_data" not in deviation
+ data = self.api.deviation_download(deviation["deviationid"], public)
+ content.update(data)
def _update_content_image(self, deviation, content):
data = self.api.deviation_download(deviation["deviationid"])
@@ -290,6 +298,41 @@ class DeviantartExtractor(Extractor):
return response
self.wait(seconds=180)
+ def _fetch_premium(self, deviation):
+ cache = self._premium_cache
+
+ if deviation["deviationid"] not in cache:
+
+ # check accessibility
+ dev = self.api.deviation(deviation["deviationid"], False)
+ has_access = dev["premium_folder_data"]["has_access"]
+
+ if has_access:
+ self.log.info("Fetching premium folder data")
+ else:
+ self.log.warning("Unable to access premium content (type: %s)",
+ dev["premium_folder_data"]["type"])
+ # fill cache
+ for dev in self.api.gallery(
+ deviation["author"]["username"],
+ deviation["premium_folder_data"]["gallery_id"],
+ public=False,
+ ):
+ cache[dev["deviationid"]] = dev if has_access else None
+
+ data = cache[deviation["deviationid"]]
+ if data:
+ deviation.update(data)
+ return True
+ return False
+
+ def _fetch_premium_notoken(self, deviation):
+ if not self._premium_cache:
+ self.log.warning(
+ "Unable to access premium content (no refresh-token)")
+ self._premium_cache = True
+ return False
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -837,8 +880,7 @@ class DeviantartOAuthAPI():
self.log = extractor.log
self.headers = {}
- delay = extractor.config("wait-min", 0)
- self.delay = math.ceil(math.log2(delay)) if delay >= 1 else -1
+ self.delay = extractor.config("wait-min", 0)
self.delay_min = max(2, self.delay)
self.mature = extractor.config("mature", "true")
@@ -897,27 +939,27 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination_folders(endpoint, params)
- def deviation(self, deviation_id):
+ def deviation(self, deviation_id, public=True):
"""Query and return info about a single Deviation"""
endpoint = "deviation/" + deviation_id
- deviation = self._call(endpoint)
+ deviation = self._call(endpoint, public=public)
if self.metadata:
self._metadata((deviation,))
if self.folders:
self._folders((deviation,))
return deviation
- def deviation_content(self, deviation_id):
+ def deviation_content(self, deviation_id, public=False):
"""Get extended content of a single Deviation"""
endpoint = "deviation/content"
params = {"deviationid": deviation_id}
- return self._call(endpoint, params, public=False)
+ return self._call(endpoint, params, public=public)
- def deviation_download(self, deviation_id):
+ def deviation_download(self, deviation_id, public=True):
"""Get the original file download (if allowed)"""
endpoint = "deviation/download/" + deviation_id
params = {"mature_content": self.mature}
- return self._call(endpoint, params)
+ return self._call(endpoint, params, public=public)
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
@@ -930,12 +972,12 @@ class DeviantartOAuthAPI():
params = {"mature_content": self.mature}
return self._call(endpoint, params)["metadata"]
- def gallery(self, username, folder_id="", offset=0, extend=True):
+ def gallery(self, username, folder_id, offset=0, extend=True, public=True):
"""Yield all Deviation-objects contained in a gallery folder"""
endpoint = "gallery/" + folder_id
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature, "mode": "newest"}
- return self._pagination(endpoint, params, extend)
+ return self._pagination(endpoint, params, extend, public)
def gallery_all(self, username, offset=0):
"""Yield all Deviation-objects of a specific user"""
@@ -993,8 +1035,8 @@ class DeviantartOAuthAPI():
"""Call an API endpoint"""
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
while True:
- if self.delay >= 0:
- time.sleep(2 ** self.delay)
+ if self.delay:
+ time.sleep(self.delay)
self.authenticate(None if public else self.refresh_token_key)
response = self.extractor.request(
@@ -1015,15 +1057,15 @@ class DeviantartOAuthAPI():
msg = "API responded with {} {}".format(
status, response.reason)
if status == 429:
- if self.delay < 9:
+ if self.delay < 30:
self.delay += 1
- self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
+ self.log.warning("%s. Using %ds delay.", msg, self.delay)
else:
self.log.error(msg)
return data
- def _pagination(self, endpoint, params, extend=True):
- public = warn = True
+ def _pagination(self, endpoint, params, extend=True, public=True):
+ warn = True
while True:
data = self._call(endpoint, params, public=public)
if "results" not in data:
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 80c7187..cb4df11 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -34,6 +34,9 @@ class ExhentaiExtractor(Extractor):
LIMIT = False
def __init__(self, match):
+ # allow calling 'self.config()' before 'Extractor.__init__()'
+ self._cfgpath = ("extractor", self.category, self.subcategory)
+
version = match.group(1)
domain = self.config("domain", "auto")
if domain == "auto":
@@ -193,7 +196,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self._check_limits(data)
if "/fullimg.php" in url:
data["extension"] = ""
- self.wait(1.5)
+ self.wait(self.wait_max / 4)
yield Message.Url, url, data
def get_metadata(self, page):
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 4af9d4a..f2019ca 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -109,7 +109,7 @@ EXTRACTORS = {
"root": "https://arch.b4k.co",
"extra": {"external": "direct"},
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
- "url": "9b0ae01292133268fe9178b71332da1ee25b7704",
+ "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
},
"desuarchive": {
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index e624a65..0ab42db 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -17,9 +17,7 @@ from .common import (
generate_extractors,
)
from .. import text, util
-import base64
import json
-import re
class FoolslideBase(SharedConfigMixin):
@@ -83,25 +81,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
})
def images(self, page):
- data = None
-
- if self.decode == "base64":
- pos = page.find("'fromCharCode'")
- if pos >= 0:
- blob = text.extract(page, "'", "'", pos+15)[0]
- base64_data = re.sub(r"[a-zA-Z]", _decode_jaiminisbox, blob)
- else:
- base64_data = text.extract(page, 'atob("', '"')[0]
- if base64_data:
- data = base64.b64decode(base64_data.encode()).decode()
- elif self.decode == "double":
- pos = page.find("[{")
- if pos >= 0:
- data = text.extract(page, " = ", ";", pos)[0]
-
- if not data:
- data = text.extract(page, "var pages = ", ";")[0]
- return json.loads(data)
+ return json.loads(text.extract(page, "var pages = ", ";")[0])
class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
@@ -126,16 +106,6 @@ class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
})))
-def _decode_jaiminisbox(match):
- c = match.group(0)
-
- # ord("Z") == 90, ord("z") == 122
- N = 90 if c <= "Z" else 122
- C = ord(c) + 13
-
- return chr(C if N >= C else (C - 26))
-
-
EXTRACTORS = {
"dokireader": {
"root": "https://kobato.hologfx.com/reader",
@@ -151,19 +121,6 @@ EXTRACTORS = {
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
}),
},
- "jaiminisbox": {
- "root": "https://jaiminisbox.com/reader",
- "pattern": r"(?:www\.)?jaiminisbox\.com/reader",
- "extra": {"decode": "base64"},
- "test-chapter":
- ("https://jaiminisbox.com/reader/read/oshi-no-ko/en/0/1/", {
- "keyword": "d6435cfc1522293a42517a4aadda95a8631da0b3",
- }),
- "test-manga":
- ("https://jaiminisbox.com/reader/series/oshi-no-ko/", {
- "count": ">= 10",
- }),
- },
"kireicake": {
"root": "https://reader.kireicake.com",
"test-chapter":
@@ -220,27 +177,6 @@ EXTRACTORS = {
"keyword": "562fb5a7362a4cb43d59d5c8a6ea8080fc65cf99",
}),
},
- "worldthree": {
- "root": "http://www.slide.world-three.org",
- "pattern": r"(?:www\.)?slide\.world-three\.org",
- "test-chapter": (
- (("http://www.slide.world-three.org"
- "/read/black_bullet/en/2/7/page/1"), {
- "url": "be2f04f6e2d311b35188094cfd3e768583271584",
- "keyword": "967d536a65de4d52478d5b666a1760b181eddb6e",
- }),
- (("http://www.slide.world-three.org"
- "/read/idolmster_cg_shuffle/en/0/4/2/"), {
- "url": "6028ea5ca282744f925dfad92eeb98509f9cc78c",
- "keyword": "f3cfe2ad3388991f1d045c85d0fa94795a7694dc",
- }),
- ),
- "test-manga":
- ("http://www.slide.world-three.org/series/black_bullet/", {
- "url": "5743b93512d26e6b540d90a7a5d69208b6d4a738",
- "keyword": "3a24f1088b4d7f3b798a96163f21ca251293a120",
- }),
- },
"_ckey": "chapterclass",
}
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 6dfd75d..950a174 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -55,7 +55,7 @@ class FuraffinityExtractor(Extractor):
title, _, artist = text.unescape(extr(
'property="og:title" content="', '"')).rpartition(" by ")
artist_url = artist.replace("_", "").lower()
- path = extr('href="//d.facdn.net/', '"')
+ path = extr('href="//d', '"')
if not path:
self.log.warning(
@@ -76,7 +76,7 @@ class FuraffinityExtractor(Extractor):
"artist" : artist,
"artist_url": artist_url,
"user" : self.user or artist_url,
- "url" : "https://d.facdn.net/" + path
+ "url" : "https://d" + path
})
tags = extr('class="tags-row">', '</section>')
@@ -179,7 +179,7 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor):
subcategory = "gallery"
pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)"
test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
- "pattern": r"https://d.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+",
+ "pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
@@ -191,7 +191,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
directory_fmt = ("{category}", "{user!l}", "Scraps")
pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)"
test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
- "pattern": r"https://d.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.\w+",
+ "pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.",
"count": ">= 3",
})
@@ -202,7 +202,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
directory_fmt = ("{category}", "{user!l}", "Favorites")
pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)"
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
- "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
@@ -217,7 +217,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
test = ("https://www.furaffinity.net/search/?q=cute", {
- "pattern": r"https://d.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
@@ -236,7 +236,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
test = (
("https://www.furaffinity.net/view/21835115/", {
- "url": "eae4ef93d99365c69b31a37561bd800c03d336ad",
+ "url": "d80254eb4fba654597b4df8320d55916e11ba375",
"keyword": {
"artist" : "mirlinthloth",
"artist_url" : "mirlinthloth",
@@ -247,7 +247,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
"id" : 21835115,
"tags" : list,
"title" : "Bude's 4 Ever",
- "url" : "re:https://d.facdn.net/art/mirlinthloth/music",
+ "url" : r"re:https://d\d?.facdn.net/art/mirlinthloth/m",
"user" : "mirlinthloth",
"views" : int,
"favorites" : int,
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
index 181db9a..43479c6 100644
--- a/gallery_dl/extractor/hbrowse.py
+++ b/gallery_dl/extractor/hbrowse.py
@@ -50,7 +50,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
test = ("https://www.hbrowse.com/10363/c00000", {
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
- "keyword": "6c1136522a25de013a6579ffa34dadc1eb0d4d1b",
+ "keyword": "274996f6c809e5250b6ff3abbc5147e29f89d9a5",
"content": "44578ebbe176c2c27434966aef22945787e2781e",
})
@@ -78,7 +78,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
test = ("https://www.hbrowse.com/10363", {
"url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
- "keyword": "08f5935a4411d2c19ac1786bd4ca552c3785fcae",
+ "keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312",
})
def chapters(self, page):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 209a4f2..f341c47 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -25,7 +25,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
test = (
("https://hitomi.la/galleries/867789.html", {
"pattern": r"https://[a-c]a.hitomi.la/images/./../[0-9a-f]+.jpg",
- "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2",
+ "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae",
"count": 16,
}),
# download test
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 190a4ff..4391e64 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -27,19 +27,17 @@ class ImgurExtractor(Extractor):
self.mp4 = self.config("mp4", True)
def _prepare(self, image):
- try:
- del image["ad_url"]
- del image["ad_type"]
- del image["ad_config"]
- except KeyError:
- pass
+ image.update(image["metadata"])
+ del image["metadata"]
- if image["animated"] and self.mp4 and "mp4" in image:
- url = image["mp4"]
- else:
- url = image["link"]
+ if image["ext"] == "jpeg":
+ image["ext"] = "jpg"
+ elif image["is_animated"] and self.mp4 and image["ext"] == "gif":
+ image["ext"] = "mp4"
- image["date"] = text.parse_timestamp(image["datetime"])
+ image["url"] = url = "https://i.imgur.com/{}.{}".format(
+ image["id"], image["ext"])
+ image["date"] = text.parse_datetime(image["created_at"])
text.nameext_from_url(url, image)
return url
@@ -65,33 +63,38 @@ class ImgurImageExtractor(ImgurExtractor):
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
- "account_id" : None,
- "account_url" : None,
- "animated" : False,
- "bandwidth" : int,
- "date" : "dt:2016-11-10 14:24:35",
- "datetime" : 1478787875,
- "description" : None,
- "edited" : "0",
- "extension" : "png",
- "favorite" : False,
- "filename" : "21yMxCS",
- "has_sound" : False,
- "height" : 32,
- "id" : "21yMxCS",
- "in_gallery" : False,
- "in_most_viral": False,
- "is_ad" : False,
- "link" : "https://i.imgur.com/21yMxCS.png",
- "nsfw" : False,
- "section" : None,
- "size" : 182,
- "tags" : [],
- "title" : "Test",
- "type" : "image/png",
- "views" : int,
- "vote" : None,
- "width" : 64,
+ "account_id" : 0,
+ "comment_count" : int,
+ "cover_id" : "21yMxCS",
+ "date" : "dt:2016-11-10 14:24:35",
+ "description" : "",
+ "downvote_count": int,
+ "duration" : 0,
+ "ext" : "png",
+ "favorite" : False,
+ "favorite_count": 0,
+ "has_sound" : False,
+ "height" : 32,
+ "id" : "21yMxCS",
+ "image_count" : 1,
+ "in_most_viral" : False,
+ "is_ad" : False,
+ "is_album" : False,
+ "is_animated" : False,
+ "is_looping" : False,
+ "is_mature" : False,
+ "is_pending" : False,
+ "mime_type" : "image/png",
+ "name" : "test-テスト",
+ "point_count" : int,
+ "privacy" : "",
+ "score" : int,
+ "size" : 182,
+ "title" : "Test",
+ "upvote_count" : int,
+ "url" : "https://i.imgur.com/21yMxCS.png",
+ "view_count" : int,
+ "width" : 64,
},
}),
("http://imgur.com/0gybAXR", { # gifv/mp4 video
@@ -101,30 +104,32 @@ class ImgurImageExtractor(ImgurExtractor):
("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
"keyword": {"title": "Tears are a natural response to irritants"},
}),
- ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1'
- "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e",
- }),
("https://imgur.com/1Nily2P", { # animated png
"pattern": "https://i.imgur.com/1Nily2P.png",
}),
("https://imgur.com/zzzzzzz", { # not found
"exception": exception.HttpError,
}),
- ("https://www.imgur.com/21yMxCS"), # www
- ("https://m.imgur.com/21yMxCS"), # mobile
- ("https://imgur.com/zxaY6"), # 5 character key
- ("https://i.imgur.com/21yMxCS.png"), # direct link
+ ("https://www.imgur.com/21yMxCS"), # www
+ ("https://m.imgur.com/21yMxCS"), # mobile
+ ("https://imgur.com/zxaY6"), # 5 character key
+ ("https://i.imgur.com/21yMxCS.png"), # direct link
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
- ("https://i.imgur.com/zxaY6.gif"), # direct link (short)
- ("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
+ ("https://i.imgur.com/zxaY6.gif"), # direct link (short)
+ ("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
)
def items(self):
image = self.api.image(self.key)
- if not image["title"]:
- page = self.request(self.root + "/" + self.key, fatal=False).text
- title = text.extract(page, "<title>", "<")[0] or ""
- image["title"] = text.unescape(title.rpartition(" - ")[0].strip())
+
+ try:
+ del image["ad_url"]
+ del image["ad_type"]
+ except KeyError:
+ pass
+
+ image.update(image["media"][0])
+ del image["media"]
url = self._prepare(image)
yield Message.Version, 1
yield Message.Directory, image
@@ -143,53 +148,49 @@ class ImgurAlbumExtractor(ImgurExtractor):
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": {
"album": {
- "account_id" : None,
- "account_url" : None,
- "cover" : "693j2Kr",
- "cover_edited": None,
- "cover_height": 1400,
- "cover_width" : 951,
- "date" : "dt:2015-10-09 10:37:50",
- "datetime" : 1444387070,
- "description" : None,
- "favorite" : False,
- "id" : "TcBmP",
- "images_count": 19,
- "in_gallery" : False,
- "is_ad" : False,
- "is_album" : True,
- "layout" : "blog",
- "link" : "https://imgur.com/a/TcBmP",
- "nsfw" : bool,
- "privacy" : "hidden",
- "section" : None,
- "title" : "138",
- "views" : int,
+ "account_id" : 0,
+ "comment_count" : int,
+ "cover_id" : "693j2Kr",
+ "date" : "dt:2015-10-09 10:37:50",
+ "description" : "",
+ "downvote_count": 0,
+ "favorite" : False,
+ "favorite_count": 0,
+ "id" : "TcBmP",
+ "image_count" : 19,
+ "in_most_viral" : False,
+ "is_ad" : False,
+ "is_album" : True,
+ "is_mature" : False,
+ "is_pending" : False,
+ "privacy" : "private",
+ "score" : int,
+ "title" : "138",
+ "topic" : "",
+ "topic_id" : 0,
+ "upvote_count" : int,
+ "url" : "https://imgur.com/a/TcBmP",
+ "view_count" : int,
+ "virality" : int,
},
- "account_id" : None,
- "account_url": None,
- "animated" : bool,
- "bandwidth" : int,
+ "account_id" : 0,
+ "count" : 19,
"date" : "type:datetime",
- "datetime" : int,
- "description": None,
- "edited" : "0",
- "favorite" : False,
+ "description": "",
+ "ext" : "jpg",
"has_sound" : False,
"height" : int,
"id" : str,
- "in_gallery" : False,
- "is_ad" : False,
- "link" : r"re:https://i\.imgur\.com/\w+\.jpg",
- "nsfw" : None,
+ "is_animated": False,
+ "is_looping" : False,
+ "mime_type" : "image/jpeg",
+ "name" : str,
"num" : int,
- "section" : None,
"size" : int,
- "tags" : list,
- "title" : None,
- "type" : "image/jpeg",
- "views" : int,
- "vote" : None,
+ "title" : str,
+ "type" : "image",
+ "updated_at" : None,
+ "url" : str,
"width" : int,
},
}),
@@ -208,13 +209,15 @@ class ImgurAlbumExtractor(ImgurExtractor):
def items(self):
album = self.api.album(self.key)
- album["date"] = text.parse_timestamp(album["datetime"])
- images = album["images"]
+ album["date"] = text.parse_datetime(album["created_at"])
+
+ images = album["media"]
+ del album["media"]
count = len(images)
try:
- del album["images"]
- del album["ad_config"]
+ del album["ad_url"]
+ del album["ad_type"]
except KeyError:
pass
@@ -239,22 +242,17 @@ class ImgurGalleryExtractor(ImgurExtractor):
("https://imgur.com/gallery/eD9CT", {
"pattern": "https://imgur.com/a/eD9CT",
}),
- ("https://imgur.com/t/unmuted/26sEhNr", { # unmuted URL
- "pattern": "https://imgur.com/26sEhNr",
- }),
+ ("https://imgur.com/t/unmuted/26sEhNr"),
("https://imgur.com/t/cat/qSB8NbN"),
)
def items(self):
- url = self.root + "/a/" + self.key
- with self.request(url, method="HEAD", fatal=False) as response:
- if response.status_code < 400:
- extr = ImgurAlbumExtractor
- else:
- extr = ImgurImageExtractor
- url = self.root + "/" + self.key
-
- yield Message.Version, 1
+ if self.api.gallery(self.key)["is_album"]:
+ url = "{}/a/{}".format(self.root, self.key)
+ extr = ImgurAlbumExtractor
+ else:
+ url = "{}/{}".format(self.root, self.key)
+ extr = ImgurImageExtractor
yield Message.Queue, url, {"_extractor": extr}
@@ -346,38 +344,46 @@ class ImgurAPI():
}
def account_favorites(self, account):
- endpoint = "account/{}/gallery_favorites".format(account)
+ endpoint = "/3/account/{}/gallery_favorites".format(account)
return self._pagination(endpoint)
def gallery_search(self, query):
- endpoint = "gallery/search"
+ endpoint = "/3/gallery/search"
params = {"q": query}
return self._pagination(endpoint, params)
def account_submissions(self, account):
- endpoint = "account/{}/submissions".format(account)
+ endpoint = "/3/account/{}/submissions".format(account)
return self._pagination(endpoint)
def gallery_subreddit(self, subreddit):
- endpoint = "gallery/r/{}".format(subreddit)
+ endpoint = "/3/gallery/r/{}".format(subreddit)
return self._pagination(endpoint)
def gallery_tag(self, tag):
- endpoint = "gallery/t/{}".format(tag)
+ endpoint = "/3/gallery/t/{}".format(tag)
return self._pagination(endpoint, key="items")
+ def image(self, image_hash):
+ endpoint = "/post/v1/media/" + image_hash
+ params = {"include": "media,tags,account"}
+ return self._call(endpoint, params)
+
def album(self, album_hash):
- return self._call("album/" + album_hash)
+ endpoint = "/post/v1/albums/" + album_hash
+ params = {"include": "media,tags,account"}
+ return self._call(endpoint, params)
- def image(self, image_hash):
- return self._call("image/" + image_hash)
+ def gallery(self, gallery_hash):
+ endpoint = "/post/v1/posts/" + gallery_hash
+ return self._call(endpoint)
def _call(self, endpoint, params=None):
try:
return self.extractor.request(
- "https://api.imgur.com/3/" + endpoint,
+ "https://api.imgur.com" + endpoint,
params=params, headers=self.headers,
- ).json()["data"]
+ ).json()
except exception.HttpError as exc:
if exc.status != 403 or b"capacity" not in exc.response.content:
raise
@@ -388,7 +394,7 @@ class ImgurAPI():
num = 0
while True:
- data = self._call("{}/{}".format(endpoint, num), params)
+ data = self._call("{}/{}".format(endpoint, num), params)["data"]
if key:
data = data[key]
if not data:
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
new file mode 100644
index 0000000..4a43d57
--- /dev/null
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract hentai-gallery from https://myhentaigallery.com/"""
+
+from .common import GalleryExtractor
+from .. import text, exception
+
+
+class MyhentaigalleryGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from myhentaigallery.com"""
+ category = "myhentaigallery"
+ directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
+ pattern = (r"(?:https?://)?myhentaigallery\.com"
+ r"/gallery/(?:thumbnails|show)/(\d+)")
+ test = (
+ ("https://myhentaigallery.com/gallery/thumbnails/16247", {
+ "pattern": r"https://images.myhentaigrid.com/imagesgallery/images"
+ r"/[^/]+/original/\d+\.jpg",
+ "keyword": {
+ "artist" : list,
+ "count" : 11,
+ "gallery_id": 16247,
+ "group" : list,
+ "parodies" : list,
+ "tags" : ["Giantess"],
+ "title" : "Attack Of The 50ft Woman 1",
+ },
+ }),
+ ("https://myhentaigallery.com/gallery/show/16247/1"),
+ )
+ root = "https://myhentaigallery.com"
+
+ def __init__(self, match):
+ self.gallery_id = match.group(1)
+ url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
+ self.session.headers["Referer"] = url
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ split = text.split_html
+
+ title = extr('<div class="comic-description">\n<h1>', '</h1>')
+ if not title:
+ raise exception.NotFoundError("gallery")
+
+ return {
+ "title" : text.unescape(title),
+ "gallery_id": text.parse_int(self.gallery_id),
+ "tags" : split(extr('<div>\nCategories:', '</div>')),
+ "artist" : split(extr('<div>\nArtists:' , '</div>')),
+ "group" : split(extr('<div>\nGroups:' , '</div>')),
+ "parodies" : split(extr('<div>\nParodies:' , '</div>')),
+ }
+
+ def images(self, page):
+ return [
+ (text.unescape(text.extract(url, 'src="', '"')[0]).replace(
+ "/thumbnail/", "/original/"), None)
+ for url in text.extract_iter(page, 'class="comic-thumb"', '</div>')
+ ]
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 6862559..60ca1fb 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
"""Extractors for https://www.plurk.com/"""
from .common import Extractor, Message
-from .. import text, extractor, exception
+from .. import text, exception
import datetime
import time
import json
@@ -23,12 +23,9 @@ class PlurkExtractor(Extractor):
def items(self):
urls = self._urls_ex if self.config("comments", False) else self._urls
-
- yield Message.Version, 1
- with extractor.blacklist(("plurk",)):
- for plurk in self.plurks():
- for url in urls(plurk):
- yield Message.Queue, url, plurk
+ for plurk in self.plurks():
+ for url in urls(plurk):
+ yield Message.Queue, url, plurk
def plurks(self):
"""Return an iterable with all relevant 'plurk' objects"""
diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py
index ead5c35..4dd9d5c 100644
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@@ -9,7 +9,6 @@
"""Recursive extractor"""
from .common import Extractor, Message
-from .. import extractor, util
import requests
import re
@@ -23,17 +22,12 @@ class RecursiveExtractor(Extractor):
})
def items(self):
- blist = self.config(
- "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS)
-
self.session.mount("file://", FileAdapter())
page = self.request(self.url.partition(":")[2]).text
del self.session.adapters["file://"]
- yield Message.Version, 1
- with extractor.blacklist(blist):
- for match in re.finditer(r"https?://[^\s\"']+", page):
- yield Message.Queue, match.group(0), {}
+ for match in re.finditer(r"https?://[^\s\"']+", page):
+ yield Message.Queue, match.group(0), {}
class FileAdapter(requests.adapters.BaseAdapter):
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 0f02e8b..96be3d8 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -24,7 +24,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
directory_fmt = ("{category}", "{userName}")
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?&#]+)"
test = ("https://www.redgifs.com/users/Natalifiction", {
- "pattern": r"https://thcf\d+\.redgifs\.com/[A-Za-z]+\.mp4",
+ "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
"count": ">= 100",
})
@@ -38,7 +38,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
directory_fmt = ("{category}", "Search", "{search}")
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?&#]+)"
test = ("https://www.redgifs.com/gifs/browse/jav", {
- "pattern": r"https://thcf\d+\.redgifs\.com/[A-Za-z]+\.mp4",
+ "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
"range": "100-300",
"count": "> 200",
})
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 4d51851..185f33a 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -9,7 +9,7 @@
"""Extract images from https://www.tumblr.com/"""
from .common import Extractor, Message
-from .. import text, oauth, extractor, exception
+from .. import text, oauth, exception
from datetime import datetime, timedelta
import re
@@ -41,7 +41,7 @@ BASE_PATTERN = (
class TumblrExtractor(Extractor):
"""Base class for tumblr extractors"""
category = "tumblr"
- directory_fmt = ("{category}", "{name}")
+ directory_fmt = ("{category}", "{blog_name}")
filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
cookiedomain = None
@@ -69,7 +69,6 @@ class TumblrExtractor(Extractor):
def items(self):
blog = None
- yield Message.Version, 1
for post in self.posts():
if self.date_min > post["timestamp"]:
@@ -79,10 +78,10 @@ class TumblrExtractor(Extractor):
if not blog:
blog = self.api.info(self.blog)
blog["uuid"] = self.blog
- yield Message.Directory, blog.copy()
if self.avatar:
url = self.api.avatar(self.blog)
+ yield Message.Directory, {"blog": blog}
yield self._prepare_avatar(url, post.copy(), blog)
reblog = "reblogged_from_id" in post
@@ -90,13 +89,13 @@ class TumblrExtractor(Extractor):
continue
post["reblogged"] = reblog
+ if "trail" in post:
+ del post["trail"]
post["blog"] = blog
post["date"] = text.parse_timestamp(post["timestamp"])
+ yield Message.Directory, post
post["num"] = 0
- if "trail" in post:
- del post["trail"]
-
if "photos" in post: # type "photo" or "link"
photos = post["photos"]
del post["photos"]
@@ -129,12 +128,9 @@ class TumblrExtractor(Extractor):
if self.external: # external links
post["extension"] = None
- with extractor.blacklist(("tumblr",)):
- for key in ("permalink_url", "url"):
- url = post.get(key)
- if url:
- yield Message.Queue, url, post
- break
+ url = post.get("permalink_url") or post.get("url")
+ if url:
+ yield Message.Queue, url, post
def posts(self):
"""Return an iterable containing all relevant posts"""
@@ -316,7 +312,7 @@ class TumblrTagExtractor(TumblrExtractor):
class TumblrLikesExtractor(TumblrExtractor):
"""Extractor for images from a tumblr-user's liked posts"""
subcategory = "likes"
- directory_fmt = ("{category}", "{name}", "likes")
+ directory_fmt = ("{category}", "{blog_name}", "likes")
archive_fmt = "f_{blog[name]}_{id}_{num}"
pattern = BASE_PATTERN + r"/likes"
test = ("http://mikf123.tumblr.com/likes", {
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 71f14dc..236a001 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -245,15 +245,24 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
- pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/?(?:$|[?#])"
+ pattern = BASE_PATTERN + \
+ r"/(?!search)(?:([^/?&#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))"
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
}),
("https://mobile.twitter.com/supernaturepics?p=i"),
+ ("https://www.twitter.com/id:2976459548"),
+ ("https://twitter.com/intent/user?user_id=2976459548"),
)
+ def __init__(self, match):
+ TwitterExtractor.__init__(self, match)
+ uid = match.group(2)
+ if uid:
+ self.user = "id:" + uid
+
def tweets(self):
return TwitterAPI(self).timeline_profile(self.user)
@@ -268,6 +277,7 @@ class TwitterMediaExtractor(TwitterExtractor):
"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
}),
("https://mobile.twitter.com/supernaturepics/media#t"),
+ ("https://www.twitter.com/id:2976459548/media"),
)
def tweets(self):
@@ -450,18 +460,18 @@ class TwitterAPI():
return tweets
def timeline_profile(self, screen_name):
- user = self.user_by_screen_name(screen_name)
- endpoint = "2/timeline/profile/{}.json".format(user["rest_id"])
+ user_id = self._user_id_by_screen_name(screen_name)
+ endpoint = "2/timeline/profile/{}.json".format(user_id)
return self._pagination(endpoint)
def timeline_media(self, screen_name):
- user = self.user_by_screen_name(screen_name)
- endpoint = "2/timeline/media/{}.json".format(user["rest_id"])
+ user_id = self._user_id_by_screen_name(screen_name)
+ endpoint = "2/timeline/media/{}.json".format(user_id)
return self._pagination(endpoint)
def timeline_favorites(self, screen_name):
- user = self.user_by_screen_name(screen_name)
- endpoint = "2/timeline/favorites/{}.json".format(user["rest_id"])
+ user_id = self._user_id_by_screen_name(screen_name)
+ endpoint = "2/timeline/favorites/{}.json".format(user_id)
return self._pagination(endpoint)
def timeline_bookmark(self):
@@ -490,6 +500,11 @@ class TwitterAPI():
except KeyError:
raise exception.NotFoundError("user")
+ def _user_id_by_screen_name(self, screen_name):
+ if screen_name.startswith("id:"):
+ return screen_name[3:]
+ return self.user_by_screen_name(screen_name)["rest_id"]
+
@cache(maxage=3600)
def _guest_token(self):
endpoint = "1.1/guest/activate.json"
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 0ada118..4efc92c 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -93,7 +93,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
- "url": "228426a9d32b5bba9d659944c6b0ba73883af33f",
+ "url": "36e054fcb3363b7f085c81f4778e6db3994e56a3",
})
def __init__(self, match):
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 163c3c6..7d08b86 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -59,6 +59,9 @@ class Job():
def run(self):
"""Execute or run the job"""
+ sleep = self.extractor.config("sleep-extractor")
+ if sleep:
+ time.sleep(sleep)
try:
log = self.extractor.log
for msg in self.extractor:
@@ -197,6 +200,7 @@ class DownloadJob(Job):
def __init__(self, url, parent=None):
Job.__init__(self, url, parent)
self.log = self.get_logger("download")
+ self.blacklist = None
self.archive = None
self.sleep = None
self.downloaders = {}
@@ -224,7 +228,14 @@ class DownloadJob(Job):
for pp in postprocessors:
pp.prepare(pathfmt)
- if pathfmt.exists(archive):
+ if archive and kwdict in archive:
+ pathfmt.fix_extension()
+ self.handle_skip()
+ return
+
+ if pathfmt.exists():
+ if archive:
+ archive.add(kwdict)
self.handle_skip()
return
@@ -248,6 +259,8 @@ class DownloadJob(Job):
return
if not pathfmt.temppath:
+ if archive:
+ archive.add(kwdict)
self.handle_skip()
return
@@ -299,6 +312,12 @@ class DownloadJob(Job):
extr = kwdict["_extractor"].from_url(url)
else:
extr = extractor.find(url)
+ if extr:
+ if self.blacklist is None:
+ self.blacklist = self._build_blacklist()
+ if extr.category in self.blacklist:
+ extr = None
+
if extr:
self.status |= self.__class__(extr, self).run()
else:
@@ -388,6 +407,8 @@ class DownloadJob(Job):
if archive:
path = util.expand_path(archive)
try:
+ if "{" in path:
+ path = util.Formatter(path).format_map(kwdict)
self.archive = util.DownloadArchive(path, self.extractor)
except Exception as exc:
self.extractor.log.warning(
@@ -396,7 +417,7 @@ class DownloadJob(Job):
else:
self.extractor.log.debug("Using download archive '%s'", path)
- postprocessors = config("postprocessors")
+ postprocessors = self.extractor.config_accumulate("postprocessors")
if postprocessors:
pp_log = self.get_logger("postprocessor")
pp_list = []
@@ -426,6 +447,25 @@ class DownloadJob(Job):
self.extractor.log.debug(
"Active postprocessor modules: %s", pp_list)
+ def _build_blacklist(self):
+ wlist = self.extractor.config("whitelist")
+ if wlist:
+ if isinstance(wlist, str):
+ wlist = wlist.split(",")
+ blist = {e.category for e in extractor._list_classes()}
+ blist.difference_update(wlist)
+ return blist
+
+ blist = self.extractor.config("blacklist")
+ if blist:
+ if isinstance(blist, str):
+ blist = blist.split(",")
+ blist = set(blist)
+ else:
+ blist = {self.extractor.category}
+ blist |= util.SPECIAL_EXTRACTORS
+ return blist
+
class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""
@@ -549,6 +589,10 @@ class DataJob(Job):
self.filter = (lambda x: x) if private else util.filter_dict
def run(self):
+ sleep = self.extractor.config("sleep-extractor")
+ if sleep:
+ time.sleep(sleep)
+
# collect data
try:
for msg in self.extractor:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 5b99bee..2a48c87 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -210,11 +210,26 @@ def build_parser():
help="Number of seconds to sleep before each download",
)
downloader.add_argument(
+ "--filesize-min",
+ dest="filesize-min", metavar="SIZE", action=ConfigAction,
+ help="Do not download files smaller than SIZE (e.g. 500k or 2.5M)",
+ )
+ downloader.add_argument(
+ "--filesize-max",
+ dest="filesize-max", metavar="SIZE", action=ConfigAction,
+ help="Do not download files larger than SIZE (e.g. 500k or 2.5M)",
+ )
+ downloader.add_argument(
"--no-part",
dest="part", nargs=0, action=ConfigConstAction, const=False,
help="Do not use .part files",
)
downloader.add_argument(
+ "--no-skip",
+ dest="skip", nargs=0, action=ConfigConstAction, const=False,
+ help="Do not skip downloads; overwrite existing files",
+ )
+ downloader.add_argument(
"--no-mtime",
dest="mtime", nargs=0, action=ConfigConstAction, const=False,
help=("Do not set file modification times according to "
diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py
index 6970e95..a6e5bc3 100644
--- a/gallery_dl/postprocessor/zip.py
+++ b/gallery_dl/postprocessor/zip.py
@@ -33,23 +33,23 @@ class ZipPP(PostProcessor):
algorithm)
algorithm = "store"
+ self.zfile = None
self.path = job.pathfmt.realdirectory
- args = (self.path[:-1] + ext, "a",
- self.COMPRESSION_ALGORITHMS[algorithm], True)
+ self.args = (self.path[:-1] + ext, "a",
+ self.COMPRESSION_ALGORITHMS[algorithm], True)
if options.get("mode") == "safe":
self.run = self._write_safe
- self.zfile = None
- self.args = args
else:
self.run = self._write
- self.zfile = zipfile.ZipFile(*args)
def _write(self, pathfmt, zfile=None):
# 'NameToInfo' is not officially documented, but it's available
# for all supported Python versions and using it directly is a lot
# faster than calling getinfo()
if zfile is None:
+ if self.zfile is None:
+ self.zfile = zipfile.ZipFile(*self.args)
zfile = self.zfile
if pathfmt.filename not in zfile.NameToInfo:
zfile.write(pathfmt.temppath, pathfmt.filename)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index f688fa6..dbebfce 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -646,7 +646,7 @@ class Formatter():
obj = kwdict[key]
for func in funcs:
obj = func(obj)
- if obj is not None:
+ if obj:
break
except Exception:
pass
@@ -769,10 +769,8 @@ class PathFormat():
"""Open file and return a corresponding file object"""
return open(self.temppath, mode)
- def exists(self, archive=None):
- """Return True if the file exists on disk or in 'archive'"""
- if archive and self.kwdict in archive:
- return self.fix_extension()
+ def exists(self):
+ """Return True if the file exists on disk"""
if self.extension and os.path.exists(self.realpath):
return self.check_file()
return False
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 9af9a43..d7e2737 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.14.5"
+__version__ = "1.15.0"
diff --git a/test/test_config.py b/test/test_config.py
index a9cefd4..7cbb12b 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -96,6 +96,28 @@ class TestConfig(unittest.TestCase):
test(("Z1", "Z2", "A1", "A2", "A3"), 999, 8)
test((), 9)
+ def test_accumulate(self):
+ self.assertEqual(config.accumulate((), "l"), [])
+
+ config.set(() , "l", [5, 6])
+ config.set(("c",) , "l", [3, 4])
+ config.set(("c", "c"), "l", [1, 2])
+ self.assertEqual(
+ config.accumulate((), "l") , [5, 6])
+ self.assertEqual(
+ config.accumulate(("c",), "l") , [3, 4, 5, 6])
+ self.assertEqual(
+ config.accumulate(("c", "c"), "l"), [1, 2, 3, 4, 5, 6])
+
+ config.set(("c",), "l", None)
+ config.unset(("c", "c"), "l")
+ self.assertEqual(
+ config.accumulate((), "l") , [5, 6])
+ self.assertEqual(
+ config.accumulate(("c",), "l") , [5, 6])
+ self.assertEqual(
+ config.accumulate(("c", "c"), "l"), [5, 6])
+
def test_set(self):
config.set(() , "c", [1, 2, 3])
config.set(("b",) , "c", [1, 2, 3])
diff --git a/test/test_downloader.py b/test/test_downloader.py
index 5d73a4c..99cfb62 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -179,6 +179,9 @@ class TestHTTPDownloader(TestDownloaderBase):
server = http.server.HTTPServer(("", port), HttpRequestHandler)
threading.Thread(target=server.serve_forever, daemon=True).start()
+ def tearDown(self):
+ self.downloader.minsize = self.downloader.maxsize = None
+
def test_http_download(self):
self._run_test(self._jpg, None, DATA_JPG, "jpg", "jpg")
self._run_test(self._png, None, DATA_PNG, "png", "png")
@@ -199,6 +202,20 @@ class TestHTTPDownloader(TestDownloaderBase):
self._run_test(self._png, None, DATA_PNG, "gif", "png")
self._run_test(self._gif, None, DATA_GIF, "jpg", "gif")
+ def test_http_filesize_min(self):
+ pathfmt = self._prepare_destination(None, extension=None)
+ self.downloader.minsize = 100
+ with self.assertLogs(self.downloader.log, "WARNING"):
+ success = self.downloader.download(self._gif, pathfmt)
+ self.assertFalse(success)
+
+ def test_http_filesize_max(self):
+ pathfmt = self._prepare_destination(None, extension=None)
+ self.downloader.maxsize = 100
+ with self.assertLogs(self.downloader.log, "WARNING"):
+ success = self.downloader.download(self._jpg, pathfmt)
+ self.assertFalse(success)
+
class TestTextDownloader(TestDownloaderBase):
diff --git a/test/test_extractor.py b/test/test_extractor.py
index 043bd52..162edc0 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -75,30 +75,6 @@ class TestExtractorModule(unittest.TestCase):
self.assertEqual(classes[0], FakeExtractor)
self.assertIsInstance(extractor.find(uri), FakeExtractor)
- def test_blacklist(self):
- link_uri = "https://example.org/file.jpg"
- test_uri = "test:"
- fake_uri = "fake:"
-
- self.assertIsInstance(extractor.find(link_uri), DirectlinkExtractor)
- self.assertIsInstance(extractor.find(test_uri), Extractor)
- self.assertIsNone(extractor.find(fake_uri))
-
- with extractor.blacklist(["directlink"]):
- self.assertIsNone(extractor.find(link_uri))
- self.assertIsInstance(extractor.find(test_uri), Extractor)
- self.assertIsNone(extractor.find(fake_uri))
-
- with extractor.blacklist([], [DirectlinkExtractor, FakeExtractor]):
- self.assertIsNone(extractor.find(link_uri))
- self.assertIsInstance(extractor.find(test_uri), Extractor)
- self.assertIsNone(extractor.find(fake_uri))
-
- with extractor.blacklist(["test"], [DirectlinkExtractor]):
- self.assertIsNone(extractor.find(link_uri))
- self.assertIsNone(extractor.find(test_uri))
- self.assertIsNone(extractor.find(fake_uri))
-
def test_from_url(self):
for uri in self.VALID_URIS:
cls = extractor.find(uri).__class__
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 5da3131..ff98477 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -342,9 +342,20 @@ class ZipTest(BasePostprocessorTest):
self.assertEqual(pp.path, self.pathfmt.realdirectory)
self.assertEqual(pp.run, pp._write)
self.assertEqual(pp.delete, True)
- self.assertFalse(hasattr(pp, "args"))
- self.assertEqual(pp.zfile.compression, zipfile.ZIP_STORED)
- self.assertTrue(pp.zfile.filename.endswith("/test.zip"))
+ self.assertEqual(pp.args, (
+ pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True,
+ ))
+ self.assertTrue(pp.args[0].endswith("/test.zip"))
+
+ def test_zip_safe(self):
+ pp = self._create({"mode": "safe"})
+ self.assertEqual(pp.path, self.pathfmt.realdirectory)
+ self.assertEqual(pp.run, pp._write_safe)
+ self.assertEqual(pp.delete, True)
+ self.assertEqual(pp.args, (
+ pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True,
+ ))
+ self.assertTrue(pp.args[0].endswith("/test.zip"))
def test_zip_options(self):
pp = self._create({
@@ -353,22 +364,13 @@ class ZipTest(BasePostprocessorTest):
"extension": "cbz",
})
self.assertEqual(pp.delete, False)
- self.assertEqual(pp.zfile.compression, zipfile.ZIP_DEFLATED)
- self.assertTrue(pp.zfile.filename.endswith("/test.cbz"))
-
- def test_zip_safe(self):
- pp = self._create({"mode": "safe"})
- self.assertEqual(pp.delete, True)
- self.assertEqual(pp.path, self.pathfmt.realdirectory)
- self.assertEqual(pp.run, pp._write_safe)
self.assertEqual(pp.args, (
- pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True,
+ pp.path[:-1] + ".cbz", "a", zipfile.ZIP_DEFLATED, True,
))
- self.assertTrue(pp.args[0].endswith("/test.zip"))
+ self.assertTrue(pp.args[0].endswith("/test.cbz"))
def test_zip_write(self):
pp = self._create()
- nti = pp.zfile.NameToInfo
with tempfile.NamedTemporaryFile("w", dir=self.dir.name) as file:
file.write("foobar\n")
@@ -382,6 +384,7 @@ class ZipTest(BasePostprocessorTest):
pp.prepare(self.pathfmt)
pp.run(self.pathfmt)
+ nti = pp.zfile.NameToInfo
self.assertEqual(len(nti), i+1)
self.assertIn(name, nti)
diff --git a/test/test_results.py b/test/test_results.py
index fbbb79c..1380f31 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -31,10 +31,11 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "dokireader",
+ "8kun",
+ "dynastyscans",
+ "fallenangels",
"imagevenue",
"photobucket",
- "worldthree",
}
diff --git a/test/test_util.py b/test/test_util.py
index 5fbaa4e..1515814 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -339,7 +339,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{z|a!C:RH/C/}", "Cello World")
self._run_test("{z|y|x:?</>/}", "")
- self._run_test("{d[c]|d[b]|d[a]}", "0")
+ self._run_test("{d[c]|d[b]|d[a]}", "foo")
self._run_test("{d[a]|d[b]|d[c]}", "foo")
self._run_test("{d[z]|d[y]|d[x]}", "None")