aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-12-25 19:40:28 -0500
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-12-25 19:40:28 -0500
commitf9a1a9dcb7df977eeac9544786df9c0b93795815 (patch)
tree8cb69cf7685da8d7e4deb7dc1d6b209098e1ddfb
parent0c73e982fa596da07f23b377621ab894a9e64884 (diff)
downloadgallery-dl-f9a1a9dcb7df977eeac9544786df9c0b93795815.tar.bz2
gallery-dl-f9a1a9dcb7df977eeac9544786df9c0b93795815.tar.xz
gallery-dl-f9a1a9dcb7df977eeac9544786df9c0b93795815.tar.zst
New upstream version 1.12.1upstream/1.12.1
-rw-r--r--PKG-INFO16
-rw-r--r--README.rst14
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/man/gallery-dl.111
-rw-r--r--data/man/gallery-dl.conf.5157
-rw-r--r--gallery_dl.egg-info/PKG-INFO16
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/__init__.py19
-rw-r--r--gallery_dl/cache.py2
-rw-r--r--gallery_dl/cloudflare.py33
-rw-r--r--gallery_dl/config.py79
-rw-r--r--gallery_dl/downloader/common.py2
-rw-r--r--gallery_dl/downloader/http.py56
-rw-r--r--gallery_dl/extractor/2chan.py10
-rw-r--r--gallery_dl/extractor/3dbooru.py2
-rw-r--r--gallery_dl/extractor/4chan.py27
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/behance.py27
-rw-r--r--gallery_dl/extractor/bobx.py23
-rw-r--r--gallery_dl/extractor/common.py81
-rw-r--r--gallery_dl/extractor/deviantart.py306
-rw-r--r--gallery_dl/extractor/directlink.py32
-rw-r--r--gallery_dl/extractor/flickr.py30
-rw-r--r--gallery_dl/extractor/foolfuuka.py4
-rw-r--r--gallery_dl/extractor/foolslide.py17
-rw-r--r--gallery_dl/extractor/gelbooru.py2
-rw-r--r--gallery_dl/extractor/hbrowse.py4
-rw-r--r--gallery_dl/extractor/hentaifox.py75
-rw-r--r--gallery_dl/extractor/hentainexus.py2
-rw-r--r--gallery_dl/extractor/imagefap.py46
-rw-r--r--gallery_dl/extractor/imagehosts.py14
-rw-r--r--gallery_dl/extractor/imgbb.py38
-rw-r--r--gallery_dl/extractor/imgur.py22
-rw-r--r--gallery_dl/extractor/instagram.py23
-rw-r--r--gallery_dl/extractor/kissmanga.py4
-rw-r--r--gallery_dl/extractor/livedoor.py14
-rw-r--r--gallery_dl/extractor/mangoxo.py4
-rw-r--r--gallery_dl/extractor/mastodon.py8
-rw-r--r--gallery_dl/extractor/message.py1
-rw-r--r--gallery_dl/extractor/newgrounds.py321
-rw-r--r--gallery_dl/extractor/nijie.py4
-rw-r--r--gallery_dl/extractor/oauth.py10
-rw-r--r--gallery_dl/extractor/patreon.py86
-rw-r--r--gallery_dl/extractor/photobucket.py2
-rw-r--r--gallery_dl/extractor/piczel.py2
-rw-r--r--gallery_dl/extractor/pinterest.py12
-rw-r--r--gallery_dl/extractor/pixiv.py21
-rw-r--r--gallery_dl/extractor/plurk.py18
-rw-r--r--gallery_dl/extractor/realbooru.py59
-rw-r--r--gallery_dl/extractor/reddit.py56
-rw-r--r--gallery_dl/extractor/senmanga.py2
-rw-r--r--gallery_dl/extractor/sexcom.py8
-rw-r--r--gallery_dl/extractor/smugmug.py11
-rw-r--r--gallery_dl/extractor/tumblr.py4
-rw-r--r--gallery_dl/extractor/twitter.py75
-rw-r--r--gallery_dl/extractor/vsco.py56
-rw-r--r--gallery_dl/extractor/wallhaven.py4
-rw-r--r--gallery_dl/extractor/weibo.py13
-rw-r--r--gallery_dl/extractor/wikiart.py2
-rw-r--r--gallery_dl/extractor/xvideos.py126
-rw-r--r--gallery_dl/job.py57
-rw-r--r--gallery_dl/oauth.py5
-rw-r--r--gallery_dl/option.py37
-rw-r--r--gallery_dl/output.py8
-rw-r--r--gallery_dl/postprocessor/common.py4
-rw-r--r--gallery_dl/postprocessor/metadata.py38
-rw-r--r--gallery_dl/util.py21
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_config.py140
-rw-r--r--test/test_cookies.py12
-rw-r--r--test/test_downloader.py2
-rw-r--r--test/test_postprocessor.py39
-rw-r--r--test/test_results.py77
-rw-r--r--test/test_util.py15
74 files changed, 1670 insertions, 904 deletions
diff --git a/PKG-INFO b/PKG-INFO
index a2145f9..b7094a1 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.11.1
+Version: 1.12.1
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -95,8 +95,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -132,14 +132,14 @@ Description: ==========
.. code:: bash
- $ gallery-dl http://danbooru.donmai.us/posts?tags=bonocho
+ $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
Get the direct URL of an image from a site that requires authentication:
.. code:: bash
- $ gallery-dl -g -u <username> -p <password> http://seiga.nicovideo.jp/seiga/im3211703
+ $ gallery-dl -g -u "<username>" -p "<password>" "https://seiga.nicovideo.jp/seiga/im3211703"
| Search a remote resource for URLs and download images from them:
@@ -147,7 +147,7 @@ Description: ==========
.. code:: bash
- $ gallery-dl r:https://pastebin.com/raw/FLwrCYsT
+ $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
Configuration
@@ -241,8 +241,8 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.6.zip
- .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz
+ .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
diff --git a/README.rst b/README.rst
index 29fc856..90ca29a 100644
--- a/README.rst
+++ b/README.rst
@@ -84,8 +84,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -121,14 +121,14 @@ Download images; in this case from danbooru via tag search for 'bonocho':
.. code:: bash
- $ gallery-dl http://danbooru.donmai.us/posts?tags=bonocho
+ $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
Get the direct URL of an image from a site that requires authentication:
.. code:: bash
- $ gallery-dl -g -u <username> -p <password> http://seiga.nicovideo.jp/seiga/im3211703
+ $ gallery-dl -g -u "<username>" -p "<password>" "https://seiga.nicovideo.jp/seiga/im3211703"
| Search a remote resource for URLs and download images from them:
@@ -136,7 +136,7 @@ Get the direct URL of an image from a site that requires authentication:
.. code:: bash
- $ gallery-dl r:https://pastebin.com/raw/FLwrCYsT
+ $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
Configuration
@@ -230,8 +230,8 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.6.zip
-.. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz
+.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 415bf5c..11a796a 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --abort-on-skip --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --exec --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index a775e76..a530760 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2019-11-09" "1.11.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2019-12-22" "1.12.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -131,9 +131,6 @@ Like '--filter', but applies to manga-chapters and other delegated URLs
.B "\-\-zip"
Store downloaded files in a ZIP archive
.TP
-.B "\-\-exec" \f[I]CMD\f[]
-Execute CMD for each downloaded file. Example: --exec 'magick convert {} {}.png && rm {}'
-.TP
.B "\-\-ugoira\-conv"
Convert Pixiv Ugoira to WebM (requires FFmpeg)
.TP
@@ -148,6 +145,12 @@ Write image tags to separate text files
.TP
.B "\-\-mtime\-from\-date"
Set file modification times according to 'date' metadata
+.TP
+.B "\-\-exec" \f[I]CMD\f[]
+Execute CMD for each downloaded file. Example: --exec 'convert {} {}.png && rm {}'
+.TP
+.B "\-\-exec\-after" \f[I]CMD\f[]
+Execute CMD after all files were downloaded successfully. Example: --exec-after 'cd {} && convert * ../doc.pdf'
.SH EXAMPLES
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 8902f51..07f1b88 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2019-11-09" "1.11.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2019-12-22" "1.12.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -171,7 +171,7 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"\\\\u0000-\\\\u001f\\\\u007f"\f[] (ASCII control characters)
+\f[I]"\\u0000-\\u001f\\u007f"\f[] (ASCII control characters)
.IP "Description:" 4
Set of characters to remove from generated path names.
@@ -191,8 +191,6 @@ Controls the behavior when downloading files that have been
downloaded before, i.e. a file with the same filename already
exists or its ID is in a \f[I]download archive\f[].
-__ \f[I]extractor.*.archive\f[]
-
* \f[I]true\f[]: Skip downloads
@@ -211,8 +209,8 @@ after \f[I]N\f[] consecutive skips
after \f[I]N\f[] consecutive skips
-* \f[I]"enumerate"\f[]: Append a numeric suffix to the end of the
-original filename (\f[I]file.ext.1\f[], \f[I]file.ext.2\f[], etc)
+* \f[I]"enumerate"\f[]: Add an enumeration index to the beginning of the
+filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.)
.SS extractor.*.sleep
.IP "Type:" 6
@@ -280,7 +278,7 @@ be \f[I]strings\f[], will be used as cookie-names and -values.
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]false\f[]
+\f[I]true\f[]
.IP "Description:" 4
If \f[I]extractor.*.cookies\f[] specifies a cookies.txt file, update its
@@ -623,20 +621,22 @@ Use with caution.
.SS extractor.deviantart.include
.IP "Type:" 6
-\f[I]list\f[] of \f[I]strings\f[] or \f[I]string\f[]
+\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
-\f[I]["gallery"]\f[]
+\f[I]"gallery"\f[]
+
+.IP "Example:" 4
+"favorite,journal,scraps" or ["favorite", "journal", "scraps"]
.IP "Description:" 4
-Selects the subcategories to include when processing a user profile.
+A (comma-separated) list of subcategories to include
+when processing a user profile.
-Possible values are \f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[],
-\f[I]"favorite"\f[].
+Possible values are
+\f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], \f[I]"favorite"\f[].
-It is also possible to use a string with the initial character of
-each subcategory, i.e. \f[I]"gsj"\f[] for
-\f[I]["gallery", "scraps", "journal"]\f[]
+You can use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.deviantart.journals
.IP "Type:" 6
@@ -869,6 +869,16 @@ Controls whether to choose the GIF or MP4 version of an animation.
Include *Story Highlights* when downloading a user profile.
(requires authentication)
+.SS extractor.instagram.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download video files.
+
.SS extractor.kissmanga.captcha
.IP "Type:" 6
\f[I]string\f[]
@@ -884,6 +894,25 @@ Controls how to handle redirects to CAPTCHA pages.
* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait.
+.SS extractor.newgrounds.include
+.IP "Type:" 6
+\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"art"\f[]
+
+.IP "Example:" 4
+"movies,audio" or ["movies", "audio"]
+
+.IP "Description:" 4
+A (comma-separated) list of subcategories to include
+when processing a user profile.
+
+Possible values are
+\f[I]"art"\f[], \f[I]"audio"\f[], \f[I]"movies"\f[].
+
+You can use \f[I]"all"\f[] instead of listing all values separately.
+
.SS extractor.oauth.browser
.IP "Type:" 6
\f[I]bool\f[]
@@ -975,7 +1004,7 @@ the \f[I]recursive\f[] extractor.
\f[I]integer\f[]
.IP "Default:" 9
-\f[I]500\f[]
+\f[I]0\f[]
.IP "Description:" 4
The value of the \f[I]limit\f[] parameter when loading
@@ -1197,6 +1226,16 @@ video extraction and download
* \f[I]false\f[]: Skip video Tweets
+.SS extractor.vsco.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download video files.
+
.SS extractor.wallhaven.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -1299,8 +1338,8 @@ alongside the actual output files.
.IP "Default:" 9
\f[I]null\f[]
-.IP "Examples:" 4
-\f[I]"32000"\f[], \f[I]"500k"\f[], \f[I]"2.5M"\f[]
+.IP "Example:" 4
+"32000", "500k", "2.5M"
.IP "Description:" 4
Maximum download rate in bytes per second.
@@ -1571,10 +1610,10 @@ or to let it run asynchronously.
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
.IP "Example:" 4
-
-* "convert {} {}.png && rm {}"
-
-* ["echo", "{user[account]}", "{id}"]
+"convert {} {}.png && rm {}",
+.br
+["echo", "{user[account]}", "{id}"]
+.br
.IP "Description:" 4
The command to run.
@@ -1620,8 +1659,8 @@ Select how to write metadata.
* \f[I]"tags"\f[]: \f[I]tags\f[] separated by newlines
-* \f[I]"custom"\f[]: result of applying \f[I]metadata.format\f[] to a file's
-metadata dictionary
+* \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[]
+to a file's metadata dictionary
.SS metadata.extension
.IP "Type:" 6
@@ -1631,9 +1670,26 @@ metadata dictionary
\f[I]"json"\f[] or \f[I]"txt"\f[]
.IP "Description:" 4
-Filename extension for metadata files.
+Filename extension for metadata files that will be appended to the
+original file names.
-.SS metadata.format
+.SS metadata.extension-format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+"{extension}.json",
+.br
+"json"
+.br
+
+.IP "Description:" 4
+Custom format string to build filename extensions for metadata
+files with, which will replace the original filename extensions.
+
+Note: \f[I]metadata.extension\f[] is ignored if this option is set.
+
+.SS metadata.content-format
.IP "Type:" 6
\f[I]string\f[]
@@ -1641,7 +1697,7 @@ Filename extension for metadata files.
"tags:\\n\\n{tags:J\\n}\\n"
.IP "Description:" 4
-Custom format string to build content of metadata files.
+Custom format string to build the content of metadata files with.
Note: Only applies for \f[I]"mode": "custom"\f[].
@@ -1845,6 +1901,17 @@ this cache.
(See \f[I]SSLContext.set_ciphers() <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.set_ciphers>\f[]
for details)
+.SS pyopenssl
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Use \f[I]pyOpenSSL <https://www.pyopenssl.org/en/stable/>\f[]-backed
+SSL-support.
+
.SH API TOKENS & IDS
.SS extractor.deviantart.client-id & .client-secret
.IP "Type:" 6
@@ -1965,19 +2032,19 @@ and put them in your configuration file
.IP "Type:" 6
\f[I]string\f[] or \f[I]integer\f[]
-.IP "Examples:" 4
-
-* \f[I]"2019-01-01T00:00:00"\f[]
-
-* \f[I]"2019"\f[] with \f[I]"%Y"\f[] as \f[I]date-format\f[]
-
-* \f[I]1546297200\f[]
+.IP "Example:" 4
+"2019-01-01T00:00:00",
+.br
+"2019" with "%Y" as \f[I]date-format\f[],
+.br
+1546297200
+.br
.IP "Description:" 4
A \f[I]Date\f[] value represents a specific point in time.
-* If given as \f[I]string\f[], it is parsed according to date-format_.
+* If given as \f[I]string\f[], it is parsed according to \f[I]date-format\f[].
* If given as \f[I]integer\f[], it is interpreted as UTC timestamp.
@@ -1985,15 +2052,15 @@ A \f[I]Date\f[] value represents a specific point in time.
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
-.IP "Examples:" 4
-
-* \f[I]"file.ext"\f[]
-
-* \f[I]"~/path/to/file.ext"\f[]
-
-* \f[I]"$HOME/path/to/file.ext"\f[]
-
-* \f[I]["$HOME", "path", "to", "file.ext"]\f[]
+.IP "Example:" 4
+"file.ext",
+.br
+"~/path/to/file.ext",
+.br
+"$HOME/path/to/file.ext",
+.br
+["$HOME", "path", "to", "file.ext"]
+.br
.IP "Description:" 4
A \f[I]Path\f[] is a \f[I]string\f[] representing the location of a file
@@ -2015,7 +2082,7 @@ The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as
\f[I]object\f[]
-.IP "Examples:" 4
+.IP "Example:" 4
.. code::
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index ab46b5c..a8700a6 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.11.1
+Version: 1.12.1
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -95,8 +95,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -132,14 +132,14 @@ Description: ==========
.. code:: bash
- $ gallery-dl http://danbooru.donmai.us/posts?tags=bonocho
+ $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
Get the direct URL of an image from a site that requires authentication:
.. code:: bash
- $ gallery-dl -g -u <username> -p <password> http://seiga.nicovideo.jp/seiga/im3211703
+ $ gallery-dl -g -u "<username>" -p "<password>" "https://seiga.nicovideo.jp/seiga/im3211703"
| Search a remote resource for URLs and download images from them:
@@ -147,7 +147,7 @@ Description: ==========
.. code:: bash
- $ gallery-dl r:https://pastebin.com/raw/FLwrCYsT
+ $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
Configuration
@@ -241,8 +241,8 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.6.zip
- .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz
+ .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 16db33a..513b6c7 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -115,6 +115,7 @@ gallery_dl/extractor/pornhub.py
gallery_dl/extractor/pururin.py
gallery_dl/extractor/reactor.py
gallery_dl/extractor/readcomiconline.py
+gallery_dl/extractor/realbooru.py
gallery_dl/extractor/recursive.py
gallery_dl/extractor/reddit.py
gallery_dl/extractor/rule34.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 9665823..ffaed3d 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -93,7 +93,8 @@ def parse_inputfile(file, log):
log.warning("input file: unable to parse '%s': %s", value, exc)
continue
- conf.append((key.strip().split("."), value))
+ key = key.strip().split(".")
+ conf.append((key[:-1], key[-1], value))
else:
# url
@@ -122,11 +123,11 @@ def main():
if args.yamlfiles:
config.load(args.yamlfiles, strict=True, fmt="yaml")
if args.postprocessors:
- config.set(("postprocessors",), args.postprocessors)
+ config.set((), "postprocessors", args.postprocessors)
if args.abort:
- config.set(("skip",), "abort:" + str(args.abort))
- for key, value in args.options:
- config.set(key, value)
+ config.set((), "skip", "abort:" + str(args.abort))
+ for opts in args.options:
+ config.set(*opts)
# stream logging handler
output.configure_logging_handler(
@@ -140,7 +141,7 @@ def main():
# loglevels
if args.loglevel >= logging.ERROR:
- config.set(("output", "mode"), "null")
+ config.set(("output",), "mode", "null")
elif args.loglevel <= logging.DEBUG:
import platform
import subprocess
@@ -230,7 +231,7 @@ def main():
ulog.propagate = False
job.Job.ulog = ulog
- pformat = config.get(("output", "progress"), True)
+ pformat = config.get(("output",), "progress", True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
urls = progress(urls, pformat)
@@ -239,8 +240,8 @@ def main():
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
if isinstance(url, util.ExtendedUrl):
- for key, value in url.gconfig:
- config.set(key, value)
+ for opts in url.gconfig:
+ config.set(*opts)
with config.apply(url.lconfig):
retval |= jobtype(url.value).run()
else:
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 3ceef75..1824195 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -188,7 +188,7 @@ def clear():
def _path():
- path = config.get(("cache", "file"), -1)
+ path = config.get(("cache",), "file", -1)
if path != -1:
return util.expand_path(path)
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py
index b9bf32d..6e23c83 100644
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@@ -35,10 +35,11 @@ def solve_challenge(session, response, kwargs):
cf_kwargs = {}
headers = cf_kwargs["headers"] = collections.OrderedDict()
- params = cf_kwargs["params"] = collections.OrderedDict()
+ params = cf_kwargs["data"] = collections.OrderedDict()
page = response.text
- params["s"] = text.extract(page, 'name="s" value="', '"')[0]
+ url = root + text.extract(page, 'action="', '"')[0]
+ params["r"] = text.extract(page, 'name="r" value="', '"')[0]
params["jschl_vc"] = text.extract(page, 'name="jschl_vc" value="', '"')[0]
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
params["jschl_answer"] = solve_js_challenge(page, parsed.netloc)
@@ -46,12 +47,14 @@ def solve_challenge(session, response, kwargs):
time.sleep(4)
- url = root + "/cdn-cgi/l/chk_jschl"
cf_kwargs["allow_redirects"] = False
- cf_response = session.request("GET", url, **cf_kwargs)
+ cf_response = session.request("POST", url, **cf_kwargs)
- location = cf_response.headers.get("Location")
- if not location:
+ cookies = {
+ cookie.name: cookie.value
+ for cookie in cf_response.cookies
+ }
+ if not cookies:
import logging
log = logging.getLogger("cloudflare")
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
@@ -60,18 +63,9 @@ def solve_challenge(session, response, kwargs):
log.debug("Content:\n%s", cf_response.text)
raise exception.StopExtraction()
- if location[0] == "/":
- location = root + location
- else:
- location = re.sub(r"(https?):/(?!/)", r"\1://", location)
-
- for cookie in cf_response.cookies:
- if cookie.name == "cf_clearance":
- return location, cookie.domain, {
- cookie.name: cookie.value,
- "__cfduid" : response.cookies.get("__cfduid", ""),
- }
- return location, "", {}
+ domain = next(iter(cf_response.cookies)).domain
+ cookies["__cfduid"] = response.cookies.get("__cfduid", "")
+ return cf_response, domain, cookies
def solve_js_challenge(page, netloc):
@@ -110,8 +104,7 @@ def solve_js_challenge(page, netloc):
solution += len(netloc)
if ".toFixed(" in expr:
# trim solution to 10 decimal places
- # and strip trailing zeros
- solution = "{:.10f}".format(solution).rstrip("0")
+ solution = "{:.10f}".format(solution)
return solution
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index da52f1e..785ffc3 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -57,7 +57,7 @@ def load(files=None, strict=False, fmt="json"):
confdict = parsefunc(file)
except OSError as exc:
if strict:
- log.error("%s", exc)
+ log.error(exc)
sys.exit(1)
except Exception as exc:
log.warning("Could not parse '%s': %s", path, exc)
@@ -75,62 +75,57 @@ def clear():
_config.clear()
-def get(keys, default=None, conf=_config):
+def get(path, key, default=None, *, conf=_config):
"""Get the value of property 'key' or a default value"""
try:
- for k in keys:
- conf = conf[k]
- return conf
- except (KeyError, AttributeError):
+ for p in path:
+ conf = conf[p]
+ return conf[key]
+ except Exception:
return default
-def interpolate(keys, default=None, conf=_config):
+def interpolate(path, key, default=None, *, conf=_config):
"""Interpolate the value of 'key'"""
+ if key in conf:
+ return conf[key]
try:
- lkey = keys[-1]
- if lkey in conf:
- return conf[lkey]
- for k in keys:
- if lkey in conf:
- default = conf[lkey]
- conf = conf[k]
- return conf
- except (KeyError, AttributeError):
- return default
+ for p in path:
+ conf = conf[p]
+ if key in conf:
+ default = conf[key]
+ except Exception:
+ pass
+ return default
-def set(keys, value, conf=_config):
+def set(path, key, value, *, conf=_config):
"""Set the value of property 'key' for this session"""
- for k in keys[:-1]:
+ for p in path:
try:
- conf = conf[k]
+ conf = conf[p]
except KeyError:
- temp = {}
- conf[k] = temp
- conf = temp
- conf[keys[-1]] = value
+ conf[p] = conf = {}
+ conf[key] = value
-def setdefault(keys, value, conf=_config):
+def setdefault(path, key, value, *, conf=_config):
"""Set the value of property 'key' if it doesn't exist"""
- for k in keys[:-1]:
+ for p in path:
try:
- conf = conf[k]
+ conf = conf[p]
except KeyError:
- temp = {}
- conf[k] = temp
- conf = temp
- return conf.setdefault(keys[-1], value)
+ conf[p] = conf = {}
+ return conf.setdefault(key, value)
-def unset(keys, conf=_config):
+def unset(path, key, *, conf=_config):
"""Unset the value of property 'key'"""
try:
- for k in keys[:-1]:
- conf = conf[k]
- del conf[keys[-1]]
- except (KeyError, AttributeError):
+ for p in path:
+ conf = conf[p]
+ del conf[key]
+ except Exception:
pass
@@ -143,13 +138,13 @@ class apply():
self.kvlist = kvlist
def __enter__(self):
- for key, value in self.kvlist:
- self.original.append((key, get(key, self._sentinel)))
- set(key, value)
+ for path, key, value in self.kvlist:
+ self.original.append((path, key, get(path, key, self._sentinel)))
+ set(path, key, value)
def __exit__(self, etype, value, traceback):
- for key, value in self.original:
+ for path, key, value in self.original:
if value is self._sentinel:
- unset(key)
+ unset(path, key)
else:
- set(key, value)
+ set(path, key, value)
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index 6e5cd4c..596c956 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -30,7 +30,7 @@ class DownloaderBase():
def config(self, key, default=None):
"""Interpolate downloader config value for 'key'"""
- return config.interpolate(("downloader", self.scheme, key), default)
+ return config.interpolate(("downloader", self.scheme), key, default)
def download(self, url, pathfmt):
"""Write data from 'url' into the file specified by 'pathfmt'"""
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 1c78cfb..fab96ba 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -15,10 +15,11 @@ from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
from .. import text
+from ssl import SSLError
try:
- from OpenSSL.SSL import Error as SSLError
+ from OpenSSL.SSL import Error as OpenSSLError
except ImportError:
- from ssl import SSLError
+ OpenSSLError = SSLError
class HttpDownloader(DownloaderBase):
@@ -39,11 +40,13 @@ class HttpDownloader(DownloaderBase):
self.retries = float("inf")
if self.rate:
rate = text.parse_bytes(self.rate)
- if not rate:
+ if rate:
+ if rate < self.chunk_size:
+ self.chunk_size = rate
+ self.rate = rate
+ self.receive = self._receive_rate
+ else:
self.log.warning("Invalid rate limit (%r)", self.rate)
- elif rate < self.chunk_size:
- self.chunk_size = rate
- self.rate = rate
def download(self, url, pathfmt):
try:
@@ -77,12 +80,15 @@ class HttpDownloader(DownloaderBase):
time.sleep(min(2 ** (tries-1), 1800))
tries += 1
+ headers = {}
# check for .part file
filesize = pathfmt.part_size()
if filesize:
- headers = {"Range": "bytes={}-".format(filesize)}
- else:
- headers = None
+ headers["Range"] = "bytes={}-".format(filesize)
+ # file-specific headers
+ extra = pathfmt.kwdict.get("_http_headers")
+ if extra:
+ headers.update(extra)
# connect to (remote) source
try:
@@ -93,7 +99,7 @@ class HttpDownloader(DownloaderBase):
msg = str(exc)
continue
except Exception as exc:
- self.log.warning("%s", exc)
+ self.log.warning(exc)
return False
# check response
@@ -110,7 +116,7 @@ class HttpDownloader(DownloaderBase):
msg = "'{} {}' for '{}'".format(code, response.reason, url)
if code == 429 or 500 <= code < 600: # Server Error
continue
- self.log.warning("%s", msg)
+ self.log.warning(msg)
return False
size = text.parse_int(size)
@@ -140,7 +146,7 @@ class HttpDownloader(DownloaderBase):
# download content
try:
self.receive(response, file)
- except (RequestException, SSLError) as exc:
+ except (RequestException, SSLError, OpenSSLError) as exc:
msg = str(exc)
print()
continue
@@ -166,20 +172,26 @@ class HttpDownloader(DownloaderBase):
return True
def receive(self, response, file):
- if self.rate:
- total = 0 # total amount of bytes received
- start = time.time() # start time
+ for data in response.iter_content(self.chunk_size):
+ file.write(data)
+
+ def _receive_rate(self, response, file):
+ t1 = time.time()
+ rt = self.rate
for data in response.iter_content(self.chunk_size):
file.write(data)
- if self.rate:
- total += len(data)
- expected = total / self.rate # expected elapsed time
- delta = time.time() - start # actual elapsed time since start
- if delta < expected:
- # sleep if less time passed than expected
- time.sleep(expected - delta)
+ t2 = time.time() # current time
+ actual = t2 - t1 # actual elapsed time
+ expected = len(data) / rt # expected elapsed time
+
+ if actual < expected:
+ # sleep if less time elapsed than expected
+ time.sleep(expected - actual)
+ t1 = time.time()
+ else:
+ t1 = t2
def get_extension(self, response):
mtype = response.headers.get("Content-Type", "image/jpeg")
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index 33e7929..c34cfec 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -68,6 +68,8 @@ class _2chanThreadExtractor(Extractor):
def parse(self, post):
"""Build post-object by extracting data from an HTML post"""
data = self._extract_post(post)
+ if data["name"]:
+ data["name"] = data["name"].strip()
if '<a href="/' in post:
self._extract_image(post, data)
data["tim"], _, data["extension"] = data["filename"].partition(".")
@@ -78,10 +80,10 @@ class _2chanThreadExtractor(Extractor):
@staticmethod
def _extract_post(post):
return text.extract_all(post, (
- ("no" , 'name="', '"'),
- ("post", '<b>', '</b>'),
- ("name", '<b>', ' </b>'),
- ("now" , '</font> ', ' '),
+ ("post", 'class="csb">' , '<'),
+ ("name", 'class="cnm">' , '<'),
+ ("now" , 'class="cnw">' , '<'),
+ ("no" , 'class="cno">No.', '<'),
(None , '<blockquote', ''),
("com" , '>', '</blockquote>'),
))[0]
diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py
index febbb51..ac96211 100644
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -67,7 +67,7 @@ class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor):
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?")
test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
- "url": "f5a26c624da9a3d1dbc610e4a614bc57df6251c5",
+ "url": "8b1a5c5b7a10f8f5d3d6124d1aabfee0277078cb",
"count": 20,
})
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 36a0573..980dc20 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -59,3 +59,30 @@ class _4chanThreadExtractor(Extractor):
url = "https://i.4cdn.org/{}/{}{}".format(
post["board"], post["tim"], post["ext"])
yield Message.Url, url, post
+
+
+class _4chanBoardExtractor(Extractor):
+ """Extractor for 4chan boards"""
+ category = "4chan"
+ subcategory = "board"
+ pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?&#]+)/\d*$"
+ test = ("https://boards.4channel.org/po/", {
+ "pattern": _4chanThreadExtractor.pattern,
+ "count": ">= 100",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ url = "https://a.4cdn.org/{}/threads.json".format(self.board)
+ threads = self.request(url).json()
+
+ for page in threads:
+ for thread in page["threads"]:
+ url = "https://boards.4chan.org/{}/thread/{}/".format(
+ self.board, thread["no"])
+ thread["page"] = page["page"]
+ thread["_extractor"] = _4chanThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b8f74d1..9ff3746 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -86,6 +86,7 @@ modules = [
"pururin",
"reactor",
"readcomiconline",
+ "realbooru",
"reddit",
"rule34",
"safebooru",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index c701927..1126615 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -72,6 +72,11 @@ class BehanceGalleryExtractor(BehanceExtractor):
"url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
"keyword": {"owners": ["Alex Strohl"]},
}),
+ # 'media_collection' modules
+ ("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
+ "count": 20,
+ "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
+ }),
)
def __init__(self, match):
@@ -112,20 +117,28 @@ class BehanceGalleryExtractor(BehanceExtractor):
@staticmethod
def get_images(data):
"""Extract image results from an API response"""
- results = []
+ result = []
+ append = result.append
for module in data["modules"]:
+ mtype = module["type"]
- if module["type"] == "image":
+ if mtype == "image":
url = module["sizes"]["original"]
- results.append((url, module))
+ append((url, module))
+
+ elif mtype == "media_collection":
+ for component in module["components"]:
+ url = component["sizes"]["source"]
+ append((url, module))
- elif module["type"] == "embed":
+ elif mtype == "embed":
embed = module.get("original_embed") or module.get("embed")
- url = "ytdl:" + text.extract(embed, 'src="', '"')[0]
- results.append((url, module))
+ if embed:
+ url = "ytdl:" + text.extract(embed, 'src="', '"')[0]
+ append((url, module))
- return results
+ return result
class BehanceUserExtractor(BehanceExtractor):
diff --git a/gallery_dl/extractor/bobx.py b/gallery_dl/extractor/bobx.py
index dba5fe7..94a2840 100644
--- a/gallery_dl/extractor/bobx.py
+++ b/gallery_dl/extractor/bobx.py
@@ -10,18 +10,38 @@
from .common import Extractor, Message
from .. import text
+from ..cache import memcache
+import random
+import time
class BobxExtractor(Extractor):
"""Base class for bobx extractors"""
category = "bobx"
root = "http://www.bobx.com"
+ cookiedomain = ".bobx.com"
per_page = 80
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
+ def login(self):
+ if not self._check_cookies(("BobXUser",)):
+ self._update_cookies(self._login_impl())
+
+ @memcache()
+ def _login_impl(self):
+ """Generate a randomized 'BobXUser' cookie"""
+ rand = random.randrange
+ tnow = time.time() - rand(60, 3600)
+
+ return {"BobXUser": "{}.{}.{}.{}.{}.{}".format(
+ int(tnow),
+ rand(128, 192), rand(0, 256), rand(0, 256), rand(0, 256),
+ tnow + 622080000, # timestamp in 7200 days
+ )}
+
class BobxGalleryExtractor(BobxExtractor):
"""Extractor for individual image galleries on bobx.com"""
@@ -46,6 +66,8 @@ class BobxGalleryExtractor(BobxExtractor):
)
def items(self):
+ self.login()
+
num = 0
while True:
url = "{}/{}-{}-10-8.html".format(self.root, self.path, num)
@@ -99,6 +121,7 @@ class BobxIdolExtractor(BobxExtractor):
})
def items(self):
+ self.login()
url = "{}/{}/".format(self.root, self.path)
data = {"_extractor": BobxGalleryExtractor}
page = self.request(url).text
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 0d258eb..a1a4890 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -69,7 +69,7 @@ class Extractor():
def config(self, key, default=None):
return config.interpolate(
- ("extractor", self.category, self.subcategory, key), default)
+ ("extractor", self.category, self.subcategory), key, default)
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
@@ -101,18 +101,14 @@ class Extractor():
raise exception.NotFoundError(notfound)
if cloudflare.is_challenge(response):
self.log.info("Solving Cloudflare challenge")
- url, domain, cookies = cloudflare.solve_challenge(
+ response, domain, cookies = cloudflare.solve_challenge(
session, response, kwargs)
+ if response.status_code >= 400:
+ continue
cloudflare.cookies.update(self.category, (domain, cookies))
- continue
+ return response
if cloudflare.is_captcha(response):
- try:
- import OpenSSL # noqa
- except ImportError:
- msg = " - Install 'pyOpenSSL' and try again"
- else:
- msg = ""
- self.log.warning("Cloudflare CAPTCHA" + msg)
+ self.log.warning("Cloudflare CAPTCHA")
msg = "'{} {}' for '{}'".format(code, response.reason, url)
if code < 500 and code != 429 and code != 430:
@@ -200,7 +196,7 @@ class Extractor():
def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file"""
- if self._cookiefile and self.config("cookies-update", False):
+ if self._cookiefile and self.config("cookies-update", True):
cookiejar = http.cookiejar.MozillaCookieJar()
for cookie in self._cookiejar:
cookiejar.set_cookie(cookie)
@@ -233,12 +229,14 @@ class Extractor():
"""Check if all 'cookienames' are in the session's cookiejar"""
if domain is None:
domain = self.cookiedomain
- try:
- for name in cookienames:
- self._cookiejar._find(name, domain)
- except KeyError:
- return False
- return True
+
+ names = set(cookienames)
+ for cookie in self._cookiejar:
+ if cookie.domain == domain:
+ names.discard(cookie.name)
+ if not names:
+ return True
+ return False
def _get_date_min_max(self, dmin=None, dmax=None):
"""Retrieve and parse 'date-min' and 'date-max' config values"""
@@ -254,6 +252,26 @@ class Extractor():
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
return get("date-min", dmin), get("date-max", dmax)
+ def _dispatch_extractors(self, extractor_data, default=()):
+ """ """
+ extractors = {
+ data[0].subcategory: data
+ for data in extractor_data
+ }
+
+ include = self.config("include", default) or ()
+ if include == "all":
+ include = extractors
+ elif isinstance(include, str):
+ include = include.split(",")
+
+ result = [(Message.Version, 1)]
+ for category in include:
+ if category in extractors:
+ extr, url = extractors[category]
+ result.append((Message.Queue, url, {"_extractor": extr}))
+ return iter(result)
+
@classmethod
def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
@@ -284,7 +302,7 @@ class GalleryExtractor(Extractor):
def items(self):
self.login()
- page = self.request(self.gallery_url).text
+ page = self.request(self.gallery_url, notfound=self.subcategory).text
data = self.metadata(page)
imgs = self.images(page)
@@ -402,16 +420,13 @@ class SharedConfigMixin():
def config(self, key, default=None, *, sentinel=object()):
value = Extractor.config(self, key, sentinel)
- if value is sentinel:
- cat, self.category = self.category, self.basecategory
- value = Extractor.config(self, key, default)
- self.category = cat
- return value
+ return value if value is not sentinel else config.interpolate(
+ ("extractor", self.basecategory, self.subcategory), key, default)
def generate_extractors(extractor_data, symtable, classes):
"""Dynamically generate Extractor classes"""
- extractors = config.get(("extractor", classes[0].basecategory))
+ extractors = config.get(("extractor",), classes[0].basecategory)
ckey = extractor_data.get("_ckey")
prev = None
@@ -456,10 +471,21 @@ def generate_extractors(extractor_data, symtable, classes):
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
-# Replace default cipher list of urllib3 to avoid Cloudflare CAPTCHAs
-ciphers = config.get(("ciphers",), True)
+
+# Undo automatic pyOpenSSL injection by requests
+pyopenssl = config.get((), "pyopenssl", False)
+if not pyopenssl:
+ try:
+ from requests.packages.urllib3.contrib import pyopenssl # noqa
+ pyopenssl.extract_from_urllib3()
+ except ImportError:
+ pass
+del pyopenssl
+
+
+# Replace urllib3's default cipher list to avoid Cloudflare CAPTCHAs
+ciphers = config.get((), "ciphers", True)
if ciphers:
- logging.getLogger("gallery-dl").debug("Updating urllib3 ciphers")
if ciphers is True:
ciphers = (
@@ -489,3 +515,4 @@ if ciphers:
from requests.packages.urllib3.util import ssl_ # noqa
ssl_.DEFAULT_CIPHERS = ciphers
del ssl_
+del ciphers
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index eeee74a..604966f 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -29,7 +29,7 @@ BASE_PATTERN = (
class DeviantartExtractor(Extractor):
"""Base class for deviantart extractors using the OAuth API"""
category = "deviantart"
- directory_fmt = ("{category}", "{author[username]!l}")
+ directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
root = "https://www.deviantart.com"
@@ -47,6 +47,12 @@ class DeviantartExtractor(Extractor):
if self.quality:
self.quality = "q_{}".format(self.quality)
+ if self.original != "image":
+ self._update_content = self._update_content_default
+ else:
+ self._update_content = self._update_content_image
+ self.original = True
+
self.commit_journal = {
"html": self._commit_journal_html,
"text": self._commit_journal_text,
@@ -62,6 +68,7 @@ class DeviantartExtractor(Extractor):
self.group = not profile
if self.group:
self.subcategory = "group-" + self.subcategory
+ self.user = self.user.lower()
else:
self.user = profile["user"]["username"]
@@ -95,8 +102,7 @@ class DeviantartExtractor(Extractor):
yield self.commit(deviation, content)
elif deviation["is_downloadable"]:
- content = {}
- self._update_content(deviation, content)
+ content = self.api.deviation_download(deviation["deviationid"])
yield self.commit(deviation, content)
if "videos" in deviation:
@@ -127,8 +133,14 @@ class DeviantartExtractor(Extractor):
deviation["url"].rpartition("-")[2])
except KeyError:
deviation["index"] = 0
+
if self.user:
deviation["username"] = self.user
+ deviation["_username"] = self.user.lower()
+ else:
+ deviation["username"] = deviation["author"]["username"]
+ deviation["_username"] = deviation["username"].lower()
+
deviation["da_category"] = deviation["category"]
deviation["published_time"] = text.parse_int(
deviation["published_time"])
@@ -238,81 +250,51 @@ class DeviantartExtractor(Extractor):
url = "{}/{}/{}/0/".format(self.root, self.user, category)
return [(url + folder["name"], folder) for folder in folders]
- def _update_content(self, deviation, content):
- try:
- data = self.api.deviation_extended_fetch(
- deviation["index"],
- deviation["author"]["username"],
- "journal" if "excerpt" in deviation else "art",
- )
- download = data["deviation"]["extended"]["download"]
- download["src"] = download["url"]
- except Exception as e:
- self.log.warning(
- "Unable to fetch original download URL for ID %s ('%s: %s')",
- deviation["index"], e.__class__.__name__, e,
- )
- self.log.debug("Server response: %s", data)
- else:
- if self.original == "image":
- url = data["src"].partition("?")[0]
- mtype = mimetypes.guess_type(url, False)[0]
- if not mtype or not mtype.startswith("image/"):
- return
- del download["url"]
- content.update(download)
+ def _update_content_default(self, deviation, content):
+ content.update(self.api.deviation_download(deviation["deviationid"]))
+ def _update_content_image(self, deviation, content):
+ data = self.api.deviation_download(deviation["deviationid"])
+ url = data["src"].partition("?")[0]
+ mtype = mimetypes.guess_type(url, False)[0]
+ if mtype and mtype.startswith("image/"):
+ content.update(data)
-class DeviantartUserExtractor(Extractor):
+
+class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
- category = "deviantart"
subcategory = "user"
pattern = BASE_PATTERN + r"/?$"
test = (
("https://www.deviantart.com/shimoda7", {
- "options": (("include", "gsjf"),),
- "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)",
+ "pattern": r"/shimoda7/gallery$",
+ }),
+ ("https://www.deviantart.com/shimoda7", {
+ "options": (("include", "all"),),
+ "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$",
"count": 4,
}),
("https://shimoda7.deviantart.com/"),
)
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user = match.group(1) or match.group(2)
-
- incl = self.config("include") or "g"
- if isinstance(incl, list):
- incl = "".join(item[0] for item in incl if item)
- self.include = incl.lower()
-
def items(self):
- base = "https://www.deviantart.com/{}/".format(self.user)
- incl = self.include
- data = {}
-
- if "g" in incl:
- data["_extractor"] = DeviantartGalleryExtractor
- yield Message.Queue, base + "gallery", data
- if "s" in incl:
- data["_extractor"] = DeviantartScrapsExtractor
- yield Message.Queue, base + "gallery/scraps", data
- if "j" in incl:
- data["_extractor"] = DeviantartJournalExtractor
- yield Message.Queue, base + "posts", data
- if "f" in incl:
- data["_extractor"] = DeviantartFavoriteExtractor
- yield Message.Queue, base + "favourites", data
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (DeviantartGalleryExtractor , base + "gallery"),
+ (DeviantartScrapsExtractor , base + "gallery/scraps"),
+ (DeviantartJournalExtractor , base + "posts"),
+ (DeviantartFavoriteExtractor, base + "favourites"),
+ ), ("gallery",))
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
- archive_fmt = "g_{username}_{index}.{extension}"
+ archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
- "pattern": r"https://(www.deviantart.com/download/\d+/"
+ "pattern": r"https://(api-da\.wixmp\.com/_api/download/file"
r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)",
"count": ">= 30",
"keyword": {
@@ -398,7 +380,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
class DeviantartFolderExtractor(DeviantartExtractor):
"""Extractor for deviations inside an artist's gallery folder"""
subcategory = "folder"
- directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}")
+ directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"
test = (
@@ -418,14 +400,19 @@ class DeviantartFolderExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- self.fname = match.group(4)
- self.folder = {"owner": self.user, "index": match.group(3)}
+ self.folder = None
+ self.folder_id = match.group(3)
+ self.folder_name = match.group(4)
def deviations(self):
folders = self.api.gallery_folders(self.user)
- folder = self._find_folder(folders, self.fname)
- self.folder["title"] = folder["name"]
- self.folder["uuid"] = folder["folderid"]
+ folder = self._find_folder(folders, self.folder_name)
+ self.folder = {
+ "title": folder["name"],
+ "uuid" : folder["folderid"],
+ "index": self.folder_id,
+ "owner": self.user,
+ }
return self.api.gallery(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
@@ -440,7 +427,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = (
("https://sta.sh/022c83odnaxc", {
- "pattern": r"https://sta.sh/download/7549925030122512/.+\?token=",
+ "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
+ "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
"count": 1,
}),
# multiple stash items
@@ -450,7 +438,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
}),
# downloadable, but no "content" field (#307)
("https://sta.sh/024t4coz16mi", {
- "pattern": r"https://sta.sh/download/7800709982190282/.+\?token=",
+ "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
"count": 1,
}),
("https://sta.sh/abcdefghijkl", {
@@ -468,41 +456,25 @@ class DeviantartStashExtractor(DeviantartExtractor):
def deviations(self):
url = "https://sta.sh/" + self.stash_id
page = self.request(url).text
- deviation_id, pos = text.extract(page, '//deviation/', '"')
+ deviation_id = text.extract(page, '//deviation/', '"')[0]
if deviation_id:
- deviation = self.api.deviation(deviation_id)
- pos = page.find("dev-page-download", pos)
- if pos >= 0:
- deviation["_download"] = {
- "width" : text.parse_int(text.extract(
- page, 'data-download_width="' , '"', pos)[0]),
- "height": text.parse_int(text.extract(
- page, 'data-download_height="', '"', pos)[0]),
- "src" : text.unescape(text.extract(
- page, 'data-download_url="' , '"', pos)[0]),
- }
- return (deviation,)
+ return (self.api.deviation(deviation_id),)
+
else:
data = {"_extractor": DeviantartStashExtractor}
- page = text.extract(
- page, 'id="stash-body"', 'class="footer"', pos)[0]
+ page = text.extract(page, 'id="stash-body"', 'class="footer"')[0]
return [
(url, data)
for url in text.extract_iter(page, '<a href="', '"')
]
- def _update_content(self, deviation, content):
- if "_download" in deviation:
- content.update(deviation["_download"])
- del deviation["_download"]
-
class DeviantartFavoriteExtractor(DeviantartExtractor):
"""Extractor for an artist's favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "{username}", "Favourites")
- archive_fmt = "f_{username}_{index}.{extension}"
+ archive_fmt = "f_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/h3813067/favourites/", {
@@ -530,8 +502,8 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
class DeviantartCollectionExtractor(DeviantartExtractor):
"""Extractor for a single favorite collection"""
subcategory = "collection"
- directory_fmt = ("{category}", "{collection[owner]}",
- "Favourites", "{collection[title]}")
+ directory_fmt = ("{category}", "{username}", "Favourites",
+ "{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"
test = (
@@ -546,14 +518,19 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- _, _, cid, self.cname = match.groups()
- self.collection = {"owner": self.user, "index": cid}
+ self.collection = None
+ self.collection_id = match.group(3)
+ self.collection_name = match.group(4)
def deviations(self):
folders = self.api.collections_folders(self.user)
- folder = self._find_folder(folders, self.cname)
- self.collection["title"] = folder["name"]
- self.collection["uuid"] = folder["folderid"]
+ folder = self._find_folder(folders, self.collection_name)
+ self.collection = {
+ "title": folder["name"],
+ "uuid" : folder["folderid"],
+ "index": self.collection_id,
+ "owner": self.user,
+ }
return self.api.collections(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
@@ -565,7 +542,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
"""Extractor for an artist's journals"""
subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal")
- archive_fmt = "j_{username}_{index}.{extension}"
+ archive_fmt = "j_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
test = (
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
@@ -635,8 +612,18 @@ class DeviantartPopularExtractor(DeviantartExtractor):
class DeviantartExtractorV2(DeviantartExtractor):
"""Base class for deviantart extractors using the NAPI"""
+ cookiedomain = ".deviantart.com"
+ cookienames = ("auth", "auth_secure", "userinfo")
+ _warning = True
def items(self):
+ if self.original and not self._check_cookies(self.cookienames):
+ self.original = False
+ if self._warning:
+ DeviantartExtractorV2._warning = False
+ self.log.warning("No session cookies set: "
+ "Disabling original file downloads.")
+
yield Message.Version, 1
for deviation in self.deviations():
data = self.api.deviation_extended_fetch(
@@ -646,10 +633,14 @@ class DeviantartExtractorV2(DeviantartExtractor):
)
if "deviation" not in data:
- self.log.warning("Skipping ID %s", deviation["deviationId"])
+ self.log.warning("Unable to fetch deviation ID %s",
+ deviation["deviationId"])
self.log.debug("Server response: %s", data)
continue
+
deviation = self._extract(data)
+ if not deviation:
+ continue
yield Message.Directory, deviation
yield Message.Url, deviation["target"]["src"], deviation
@@ -662,13 +653,14 @@ class DeviantartExtractorV2(DeviantartExtractor):
def _extract(self, data):
deviation = data["deviation"]
extended = deviation["extended"]
- files = deviation["files"]
+ media = deviation["media"]
del deviation["extended"]
- del deviation["files"]
+ del deviation["media"]
# prepare deviation metadata
deviation["description"] = extended.get("description", "")
- deviation["username"] = self.user.lower()
+ deviation["username"] = deviation["author"]["username"]
+ deviation["_username"] = deviation["username"].lower()
deviation["stats"] = extended["stats"]
deviation["stats"]["comments"] = data["comments"]["total"]
deviation["index"] = deviation["deviationId"]
@@ -682,53 +674,69 @@ class DeviantartExtractorV2(DeviantartExtractor):
)
# extract download target
- target = files[-1]
+ target = media["types"][-1]
+ src = token = None
- if "textContent" in deviation and self.commit_journal:
+ if "textContent" in deviation:
+ if not self.commit_journal:
+ return None
journal = deviation["textContent"]
journal["html"] = journal["html"]["markup"]
- target["src"] = self.commit_journal(deviation, journal)[1]
- elif target["type"] == "gif":
- pass
- elif target["type"] == "video":
- # select largest video
- target = max(
- files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
- elif target["type"] == "flash":
- if target["src"].startswith("https://sandbox.deviantart.com"):
- # extract SWF file from "sandbox"
- target["src"] = text.extract(
- self.request(target["src"]).text,
- 'id="sandboxembed" src="', '"',
- )[0]
- elif "download" in extended:
+ src = self.commit_journal(deviation, journal)[1]
+
+ elif target["t"] == "gif":
+ src = target["b"]
+ token = media["token"][0]
+
+ elif "download" in extended and self.original:
target = extended["download"]
- target["src"] = target["url"]
+ src = target["url"]
del target["url"]
- elif target["src"].startswith("https://images-wixmp-"):
- if deviation["index"] <= 790677560:
- # https://github.com/r888888888/danbooru/issues/4069
- target["src"] = re.sub(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", target["src"])
- if self.quality:
- target["src"] = re.sub(
- r"q_\d+", self.quality, target["src"])
+
+ elif target["t"] == "video":
+ # select largest video
+ target = max(media["types"],
+ key=lambda x: text.parse_int(x.get("q", "")[:-1]))
+ src = target["s"]
+
+ elif target["t"] == "flash":
+ src = target["s"]
+ if src.startswith("https://sandbox.deviantart.com"):
+ # extract SWF file from "sandbox"
+ src = text.extract(
+ self.request(src).text, 'id="sandboxembed" src="', '"')[0]
+
+ else:
+ src = media["baseUri"]
+ if "token" in media:
+ token = media["token"][0]
+
+ if "c" in target:
+ src += "/" + target["c"].replace(
+ "<prettyName>", media["prettyName"])
+ if src.startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ src = re.sub(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src)
+ if self.quality:
+ src = re.sub(r"q_\d+", self.quality, src)
# filename and extension metadata
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
sub = re.compile(r"\W").sub
- deviation["filename"] = target["filename"] = "".join((
+ deviation["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
sub("_", deviation["author"]["username"].lower()), "-d",
util.bencode(deviation["index"], alphabet),
))
if "extension" not in deviation:
- deviation["extension"] = target["extension"] = (
- text.ext_from_url(target["src"])
- )
- deviation["target"] = target
+ deviation["extension"] = text.ext_from_url(src)
+ if token:
+ src = src + "?token=" + token
+ target["src"] = src
+ deviation["target"] = target
return deviation
@@ -740,19 +748,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+ # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
"count": 0,
}),
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
- "pattern": (r"https://www.deviantart.com/download/261986576"
- r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+ # "pattern": (r"https://www.deviantart.com/download/261986576"
+ # r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+ r"/intermediary/f/[^/]+/[^.]+\.jpg")
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg$")
+ r"/intermediary/f/[^/]+/[^.]+\.jpg")
}),
# wixmp URL rewrite v2 (#369)
(("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
@@ -774,20 +784,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
"url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
"keyword": {
+ "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
+ "extension": "mp4",
"target": {
- "duration": 306,
- "extension": "mp4",
- "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
- "filesize": 9963639,
- "quality": "1080p",
+ "d": 306,
+ "f": 9963639,
+ "q": "1080p",
+ "t": "video",
"src": str,
- "type": "video",
},
}
}),
# archive
("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
- "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
+ # "pattern": r"https://.+deviantart.com/download/763300948/.*rar",
+ "pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png"
}),
# swf
("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
@@ -830,7 +841,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
"""Extractor for an artist's scraps"""
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
- archive_fmt = "s_{username}_{index}.{extension}"
+ archive_fmt = "s_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
test = (
("https://www.deviantart.com/shimoda7/gallery/scraps", {
@@ -841,14 +852,6 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
)
def deviations(self):
- # copy self.session
- session = self.session.__class__()
- for attr in session.__attrs__:
- setattr(session, attr, getattr(self.session, attr, None))
-
- # reset cookies in the original session object
- self.session.cookies = session.cookies.__class__()
-
url = self.root + "/_napi/da-user-profile/api/gallery/contents"
params = {
"username" : self.user,
@@ -861,8 +864,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
}
while True:
- data = self.request(
- url, session=session, params=params, headers=headers).json()
+ data = self.request(url, params=params, headers=headers).json()
for obj in data["results"]:
yield obj["deviation"]
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 77a19f6..80db096 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -15,25 +15,30 @@ from .. import text
class DirectlinkExtractor(Extractor):
"""Extractor for direct links to images and other media files"""
category = "directlink"
- filename_fmt = "{domain}/{path}"
- archive_fmt = "{domain}/{path}"
+ filename_fmt = "{domain}/{path}/{filename}.{extension}"
+ archive_fmt = filename_fmt
pattern = (r"(?i)https?://(?P<domain>[^/?&#]+)/(?P<path>[^?&#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$")
test = (
(("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
"url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
- "keyword": "e81b9fe3022e971365dd859f38e4ef717a6c69ed",
+ "keyword": "105770a3f4393618ab7b811b731b22663b5d3794",
+ }),
+ # empty path
+ (("https://example.org/file.webm"), {
+ "url": "2d807ed7059d1b532f1bb71dc24b510b80ff943f",
+ "keyword": "29dad729c40fb09349f83edafa498dba1297464a",
}),
# more complex example
- ("https://example.org/path/file.webm?que=1&ry=2#fragment", {
- "url": "fd4aec8a32842343394e6078a06c3e6b647bf671",
- "keyword": "ff75764b1ae66615b723a6357b8193fa2de84678",
+ ("https://example.org/path/to/file.webm?que=1&ry=2#fragment", {
+ "url": "114b8f1415cc224b0f26488ccd4c2e7ce9136622",
+ "keyword": "06014abd503e3b2b58aa286f9bdcefdd2ae336c0",
}),
# percent-encoded characters
("https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E", {
"url": "2627e8140727fdf743f86fe18f69f99a052c9718",
- "keyword": "4d19dc12e41ffcb4cbec2013e335cf482377c35e",
+ "keyword": "831790fddda081bdddd14f96985ab02dc5b5341f",
}),
# upper case file extension (#296)
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
@@ -46,11 +51,14 @@ class DirectlinkExtractor(Extractor):
self.data = match.groupdict()
def items(self):
- text.nameext_from_url(self.url, self.data)
- for key, value in self.data.items():
+ data = self.data
+ for key, value in data.items():
if value:
- self.data[key] = text.unquote(value)
+ data[key] = text.unquote(value)
+ data["path"], _, name = data["path"].rpartition("/")
+ data["filename"], _, ext = name.rpartition(".")
+ data["extension"] = ext.lower()
yield Message.Version, 1
- yield Message.Directory, self.data
- yield Message.Url, self.url, self.data
+ yield Message.Directory, data
+ yield Message.Url, self.url, data
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index b71fc4d..bd34bdb 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -280,18 +280,22 @@ class FlickrAPI(oauth.OAuth1API):
API_KEY = "ac4fd7aa98585b9eee1ba761c209de68"
API_SECRET = "3adb0f568dc68393"
FORMATS = [
- ("o", "Original" , None),
- ("k", "Large 2048" , 2048),
- ("h", "Large 1600" , 1600),
- ("l", "Large" , 1024),
- ("c", "Medium 800" , 800),
- ("z", "Medium 640" , 640),
- ("m", "Medium" , 500),
- ("n", "Small 320" , 320),
- ("s", "Small" , 240),
- ("q", "Large Square", 150),
- ("t", "Thumbnail" , 100),
- ("s", "Square" , 75),
+ ("o" , "Original" , None),
+ ("6k", "X-Large 6K" , 6144),
+ ("5k", "X-Large 5K" , 5120),
+ ("4k", "X-Large 4K" , 4096),
+ ("3k", "X-Large 3K" , 3072),
+ ("k" , "Large 2048" , 2048),
+ ("h" , "Large 1600" , 1600),
+ ("l" , "Large" , 1024),
+ ("c" , "Medium 800" , 800),
+ ("z" , "Medium 640" , 640),
+ ("m" , "Medium" , 500),
+ ("n" , "Small 320" , 320),
+ ("s" , "Small" , 240),
+ ("q" , "Large Square", 150),
+ ("t" , "Thumbnail" , 100),
+ ("s" , "Square" , 75),
]
VIDEO_FORMATS = {
"orig" : 9,
@@ -325,7 +329,7 @@ class FlickrAPI(oauth.OAuth1API):
if not fmt[2] or fmt[2] <= self.maxsize]
else:
self.formats = self.FORMATS
- self.formats = self.formats[:4]
+ self.formats = self.formats[:8]
def favorites_getList(self, user_id):
"""Returns a list of the user's favorite photos."""
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 645b53a..428f3c3 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -20,7 +20,6 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor):
subcategory = "thread"
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num}{title:? - //}")
- filename_fmt = "{media[media]}"
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
pattern_fmt = r"/([^/]+)/thread/(\d+)"
external = "default"
@@ -50,7 +49,8 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor):
if url.startswith("/"):
url = self.root + url
- post["extension"] = url.rpartition(".")[2]
+ post["filename"], _, post["extension"] = \
+ media["media"].rpartition(".")
yield Message.Url, url, post
def posts(self):
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index fc7dbf9..1f8c567 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -82,13 +82,18 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
})
def images(self, page):
+ data = None
+
if self.decode == "base64":
- base64_data = text.extract(page, 'atob("', '"')[0].encode()
- data = base64.b64decode(base64_data).decode()
+ base64_data = text.extract(page, 'atob("', '"')[0]
+ if base64_data:
+ data = base64.b64decode(base64_data.encode()).decode()
elif self.decode == "double":
pos = page.find("[{")
- data = text.extract(page, " = ", ";", pos)[0]
- else:
+ if pos >= 0:
+ data = text.extract(page, " = ", ";", pos)[0]
+
+ if not data:
data = text.extract(page, "var pages = ", ";")[0]
return json.loads(data)
@@ -138,8 +143,8 @@ EXTRACTORS = {
("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", {
"keyword": "6009af77cc9c05528ab1fdda47b1ad9d4811c673",
}),
- ("https://jaiminisbox.com/reader/read/dr-stone/en/0/16/", {
- "keyword": "8607375c24b1d0db7f52d059ef5baff793aa458e",
+ ("https://jaiminisbox.com/reader/read/red-storm/en/0/336/", {
+ "keyword": "53c6dddf3e5a61b6002a886ccd7e3354e973299a",
}),
),
"test-manga":
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 4ec7f00..0c05a97 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -52,7 +52,7 @@ class GelbooruExtractor(booru.XmlParserMixin,
page = self.request(self.post_url.format(post_id)).text
data = text.extract_all(page, (
(None , '<meta name="keywords"', ''),
- ("tags" , ' imageboard, ', '"'),
+ ("tags" , ' imageboard- ', '"'),
("id" , '<li>Id: ', '<'),
("created_at", '<li>Posted: ', '<'),
("width" , '<li>Size: ', 'x'),
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
index 43479c6..181db9a 100644
--- a/gallery_dl/extractor/hbrowse.py
+++ b/gallery_dl/extractor/hbrowse.py
@@ -50,7 +50,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
test = ("https://www.hbrowse.com/10363/c00000", {
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
- "keyword": "274996f6c809e5250b6ff3abbc5147e29f89d9a5",
+ "keyword": "6c1136522a25de013a6579ffa34dadc1eb0d4d1b",
"content": "44578ebbe176c2c27434966aef22945787e2781e",
})
@@ -78,7 +78,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
test = ("https://www.hbrowse.com/10363", {
"url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
- "keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312",
+ "keyword": "08f5935a4411d2c19ac1786bd4ca552c3785fcae",
})
def chapters(self, page):
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index 7e0b63c..84ad3af 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -23,8 +23,8 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
test = ("https://hentaifox.com/gallery/56622/", {
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
+ "keyword": "b7ff141331d0c7fc711ab28d45dfbb013a83d8e9",
"count": 24,
- "keyword": "903ebe227d85e484460382fc6cbab42be7a244d5",
})
def __init__(self, match):
@@ -37,19 +37,43 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
return {
"gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr("<h1>", "</h1>")),
- "parody" : split(extr(">Parodies:" , "</a></span>"))[::2],
- "characters": split(extr(">Characters:", "</a></span>"))[::2],
- "tags" : split(extr(">Tags:" , "</a></span>"))[::2],
- "artist" : split(extr(">Artists:" , "</a></span>"))[::2],
- "group" : split(extr(">Groups:" , "</a></span>"))[::2],
- "type" : text.remove_html(extr(">Category:", "</a></span>")),
+ "parody" : split(extr(">Parodies:" , "</ul>"))[::2],
+ "characters": split(extr(">Characters:", "</ul>"))[::2],
+ "tags" : split(extr(">Tags:" , "</ul>"))[::2],
+ "artist" : split(extr(">Artists:" , "</ul>"))[::2],
+ "group" : split(extr(">Groups:" , "</ul>"))[::2],
+ "type" : text.remove_html(extr(">Category:", "<span")),
"language" : "English",
"lang" : "en",
}
def images(self, page):
+ pos = page.find('id="load_all"')
+ if pos >= 0:
+ extr = text.extract
+ load_id = extr(page, 'id="load_id" value="', '"', pos)[0]
+ load_dir = extr(page, 'id="load_dir" value="', '"', pos)[0]
+ load_pages = extr(page, 'id="load_pages" value="', '"', pos)[0]
+
+ url = self.root + "/includes/thumbs_loader.php"
+ data = {
+ "u_id" : self.gallery_id,
+ "g_id" : load_id,
+ "img_dir" : load_dir,
+ "visible_pages": "0",
+ "total_pages" : load_pages,
+ "type" : "2",
+ }
+ headers = {
+ "Origin": self.root,
+ "Referer": self.gallery_url,
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ page = self.request(
+ url, method="POST", headers=headers, data=data).text
+
return [
- (text.urljoin(self.root, url.replace("t.", ".")), None)
+ (url.replace("t.", "."), None)
for url in text.extract_iter(page, 'data-src="', '"')
]
@@ -64,15 +88,13 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
("https://hentaifox.com/character/reimu-hakurei/"),
("https://hentaifox.com/artist/distance/"),
("https://hentaifox.com/search/touhou/"),
- ("https://hentaifox.com/tag/full-colour/", {
+ ("https://hentaifox.com/tag/heterochromia/", {
"pattern": HentaifoxGalleryExtractor.pattern,
- "count": ">= 40",
+ "count": ">= 60",
"keyword": {
- "url": str,
+ "url" : str,
"gallery_id": int,
- "thumbnail": r"re:https://i\d*.hentaifox.com/\d+/\d+/thumb\.",
- "title": str,
- "tags": list,
+ "title" : str,
},
}),
)
@@ -87,31 +109,26 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
yield Message.Queue, gallery["url"], gallery
def galleries(self):
- url = "{}/{}/".format(self.root, self.path)
+ num = 1
while True:
+ url = "{}{}/pag/{}/".format(self.root, self.path, num)
page = self.request(url).text
- info, gpos = text.extract(
- page, 'class="galleries_overview">', 'class="clear">')
- for ginfo in text.extract_iter(info, '<div class="item', '</a>'):
- tags , pos = text.extract(ginfo, '', '"')
- url , pos = text.extract(ginfo, 'href="', '"', pos)
- title, pos = text.extract(ginfo, 'alt="', '"', pos)
- thumb, pos = text.extract(ginfo, 'src="', '"', pos)
+ for info in text.extract_iter(
+ page, 'class="g_title"><a href="', '</a>'):
+ url, _, title = info.partition('">')
yield {
- "url": text.urljoin(self.root, url),
+ "url" : text.urljoin(self.root, url),
"gallery_id": text.parse_int(
url.strip("/").rpartition("/")[2]),
- "thumbnail": text.urljoin(self.root, thumb),
- "title": text.unescape(title),
- "tags": tags.split(),
+ "title" : text.unescape(title),
"_extractor": HentaifoxGalleryExtractor,
}
- pos = page.find('class="current"', gpos)
- url = text.extract(page, 'href="', '"', pos)[0]
+ pos = page.find(">Next<")
+ url = text.rextract(page, "href=", ">", pos)[0]
if pos == -1 or "/pag" not in url:
return
- url = text.urljoin(self.root, url)
+ num += 1
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index 9e2ee9f..193cadf 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
test = (
("https://hentainexus.com/view/5688", {
"url": "746d0043e20030f1171aae5ea113176607302517",
- "keyword": "9512cf5f258130e5f75de9954d7a13217c2405e7",
+ "keyword": "c1b7091e2bc2f733f6401711e072ad11cf93dd69",
}),
("https://hentainexus.com/read/5688"),
)
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 152b631..d6eea7f 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -33,13 +33,13 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")
test = (
("https://www.imagefap.com/pictures/7102714", {
- "url": "268995eac5d01ddecd0fe58cfa9828390dc85a84",
- "keyword": "b5bd65ab2ff574ed1639db9a43c7b1b8583c85ef",
+ "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}),
("https://www.imagefap.com/gallery/5486966", {
- "url": "14906b4f0b8053d1d69bc730a325acb793cbc898",
- "keyword": "ab90972f3527a2011478fabc621a2c99a541f752",
+ "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "keyword": "3e24eace5b09639b881ebd393165862feb46adde",
}),
("https://www.imagefap.com/gallery.php?gid=7102714"),
)
@@ -89,9 +89,10 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
if not imgurl:
return
num += 1
- _, imgid, name = imgurl.rsplit("/", 2)
- data = {"image_id": text.parse_int(imgid), "num": num}
- yield imgurl, text.nameext_from_url(name, data)
+ data = text.nameext_from_url(imgurl)
+ data["num"] = num
+ data["image_id"] = text.parse_int(data["filename"])
+ yield imgurl, data
params["idx"] += 24
@@ -100,8 +101,8 @@ class ImagefapImageExtractor(ImagefapExtractor):
subcategory = "image"
pattern = r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"
test = ("https://www.imagefap.com/photo/1369341772/", {
- "url": "b31ee405b61ff0450020a1bf11c0581ca9adb471",
- "keyword": "eadaa8f8012298384996efd21cf1f9e9e0dddb9b",
+ "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
})
def __init__(self, match):
@@ -109,27 +110,32 @@ class ImagefapImageExtractor(ImagefapExtractor):
self.image_id = match.group(1)
def items(self):
- data = self.get_job_metadata()
+ url, data = self.get_image()
yield Message.Version, 1
yield Message.Directory, data
- yield Message.Url, data["url"], data
+ yield Message.Url, url, data
- def get_job_metadata(self):
- """Collect metadata for extractor-job"""
+ def get_image(self):
url = "{}/photo/{}/".format(self.root, self.image_id)
page = self.request(url).text
- info = json.loads(text.extract(
- page, '<script type="application/ld+json">', '</script>')[0])
- parts = info["contentUrl"].rsplit("/", 3)
- return text.nameext_from_url(parts[3], {
- "url": info["contentUrl"],
+
+ info, pos = text.extract(
+ page, '<script type="application/ld+json">', '</script>')
+ image_id, pos = text.extract(
+ page, 'id="imageid_input" value="', '"', pos)
+ gallery_id, pos = text.extract(
+ page, 'id="galleryid_input" value="', '"', pos)
+ info = json.loads(info)
+ url = info["contentUrl"]
+
+ return url, text.nameext_from_url(url, {
"title": text.unescape(info["name"]),
"uploader": info["author"],
"date": info["datePublished"],
"width": text.parse_int(info["width"]),
"height": text.parse_int(info["height"]),
- "gallery_id": text.parse_int(parts[1]),
- "image_id": text.parse_int(parts[2]),
+ "gallery_id": text.parse_int(gallery_id),
+ "image_id": text.parse_int(image_id),
})
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 954c1f0..4015bfd 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -154,10 +154,10 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imagetwist.com"""
category = "imagetwist"
pattern = r"(?:https?://)?((?:www\.)?imagetwist\.com/([a-z0-9]{12}))"
- test = ("https://imagetwist.com/4e46hv31tu0q/test.jpg", {
- "url": "c999dc1a5dec0525ac9eb8c092f173dfe6dba0b0",
- "keyword": "a9f2e01757ec96d4ee4752cbd8446ede80f7935e",
- "content": "96b1fd099b06faad5879fce23a7e4eb8290d8810",
+ test = ("https://imagetwist.com/f1i2s4vhvbrq/test.png", {
+ "url": "8d5e168c0bee30211f821c6f3b2116e419d42671",
+ "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
})
https = True
params = None
@@ -199,9 +199,9 @@ class PixhostImageExtractor(ImagehostImageExtractor):
category = "pixhost"
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/show/\d+/(\d+)_[^/?&#]+)")
- test = ("https://pixhost.to/show/224/96246707_test-.png", {
- "url": "8f3d41fdd2dbec4c844e5ee45bf49961fbd79c67",
- "keyword": "ecefe2d5814286f9d1dff3d88d9bdc78dd456c5d",
+ test = ("http://pixhost.to/show/190/130327671_test-.png", {
+ "url": "4e5470dcf6513944773044d40d883221bbc46cff",
+ "keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
https = True
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index fb321d0..15152b7 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -39,7 +39,7 @@ class ImgbbExtractor(Extractor):
for img in self.images(page):
image = {
"id" : img["url_viewer"].rpartition("/")[2],
- "user" : img["user"]["username"],
+ "user" : img["user"]["username"] if "user" in img else "",
"title" : text.unescape(img["title"]),
"url" : img["image"]["url"],
"extension": img["image"]["extension"],
@@ -79,8 +79,15 @@ class ImgbbExtractor(Extractor):
return self.session.cookies
def _pagination(self, page, endpoint, params):
- params["page"] = 2
data = None
+ seek, pos = text.extract(page, 'data-seek="', '"')
+ tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos)
+ params["action"] = "list"
+ params["list"] = "images"
+ params["sort"] = self.sort
+ params["seek"] = seek
+ params["page"] = 2
+ params["auth_token"] = tokn
while True:
for img in text.extract_iter(page, "data-object='", "'"):
@@ -90,6 +97,8 @@ class ImgbbExtractor(Extractor):
return
params["seek"] = data["seekEnd"]
params["page"] += 1
+ elif not seek or 'class="pagination-next"' not in page:
+ return
data = self.request(endpoint, method="POST", data=params).json()
page = data["html"]
@@ -110,6 +119,11 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
"url": "e2e387b8fdb3690bd75d804d0af2833112e385cd",
"keyword": "a307fc9d2085bdc0eb7c538c8d866c59198d460c",
}),
+ # no user data (#471)
+ ("https://ibb.co/album/kYKpwF", {
+ "url": "ac0abcfcb89f4df6adc2f7e4ff872f3b03ef1bc7",
+ "keyword": {"user": ""},
+ }),
# deleted
("https://ibb.co/album/fDArrF", {
"exception": exception.NotFoundError,
@@ -133,21 +147,13 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
return {
"album_id" : self.album_id,
"album_name": text.unescape(album),
- "user" : user.lower(),
+ "user" : user.lower() if user else "",
}
def images(self, page):
- seek, pos = text.extract(page, 'data-seek="', '"')
- tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos)
-
return self._pagination(page, "https://ibb.co/json", {
- "action" : "list",
- "list" : "images",
"from" : "album",
- "sort" : self.sort,
"albumid" : self.album_id,
- "seek" : seek,
- "auth_token": tokn,
"params_hidden[list]" : "images",
"params_hidden[from]" : "album",
"params_hidden[albumid]": self.album_id,
@@ -173,18 +179,10 @@ class ImgbbUserExtractor(ImgbbExtractor):
return {"user": self.user}
def images(self, page):
- seek, pos = text.extract(page, 'data-seek="', '"')
- tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos)
- user, pos = text.extract(page, '.obj.resource={"id":"', '"', pos)
-
+ user = text.extract(page, '.obj.resource={"id":"', '"')[0]
return self._pagination(page, self.page_url + "json", {
- "action" : "list",
- "list" : "images",
"from" : "user",
- "sort" : self.sort,
- "seek" : seek,
"userid" : user,
- "auth_token": tokn,
"params_hidden[userid]": user,
"params_hidden[from]" : "user",
})
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index b1be995..ce3e1ce 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -116,8 +116,8 @@ class ImgurImageExtractor(ImgurExtractor):
image = self.api.image(self.key)
if not image["title"]:
page = self.request(self.root + "/" + self.key, fatal=False).text
- title = text.extract(page, "<title>", "<")[0]
- image["title"] = (title or "").rpartition(" - ")[0].strip()
+ title = text.extract(page, "<title>", "<")[0] or ""
+ image["title"] = text.unescape(title.rpartition(" - ")[0].strip())
url = self._prepare(image)
yield Message.Version, 1
yield Message.Directory, image
@@ -280,6 +280,20 @@ class ImgurFavoriteExtractor(ImgurExtractor):
return self._items_queue(self.api.account_favorites(self.key))
+class ImgurSubredditExtractor(ImgurExtractor):
+ """Extractor for a subreddits's imgur links"""
+ subcategory = "subreddit"
+ pattern = BASE_PATTERN + r"/r/([^/?&#]+)"
+ test = ("https://imgur.com/r/pics", {
+ "range": "1-100",
+ "count": 100,
+ "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
+ })
+
+ def items(self):
+ return self._items_queue(self.api.gallery_subreddit(self.key))
+
+
class ImgurAPI():
def __init__(self, extractor):
@@ -297,6 +311,10 @@ class ImgurAPI():
endpoint = "account/{}/submissions".format(account)
return self._pagination(endpoint)
+ def gallery_subreddit(self, subreddit):
+ endpoint = "gallery/r/{}".format(subreddit)
+ return self._pagination(endpoint)
+
def album(self, album_hash):
return self._call("album/" + album_hash)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index a14225f..05adac1 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -31,6 +31,7 @@ class InstagramExtractor(Extractor):
self.login()
yield Message.Version, 1
+ videos = self.config("videos", True)
metadata = self.get_metadata()
for data in self.instagrams():
data.update(metadata)
@@ -41,7 +42,11 @@ class InstagramExtractor(Extractor):
data['_extractor'] = InstagramStoriesExtractor
yield Message.Queue, url, data
else:
- url = data['video_url'] or data['display_url']
+ url = data.get('video_url')
+ if not url:
+ url = data['display_url']
+ elif not videos:
+ continue
yield Message.Url, url, text.nameext_from_url(url, data)
def login(self):
@@ -109,8 +114,14 @@ class InstagramExtractor(Extractor):
return data
def _extract_postpage(self, url):
- data = self.request(url + "?__a=1").json()
- media = data['graphql']['shortcode_media']
+ try:
+ with self.request(url + '?__a=1', fatal=False) as response:
+ media = response.json()['graphql']['shortcode_media']
+ except (KeyError, ValueError) as exc:
+ self.log.warning("Unable to fetch data from '%s': %s: %s",
+ url, exc.__class__.__name__, exc)
+ self.log.debug("Server response: %s", response.text)
+ return ()
common = {
'date': text.parse_timestamp(media['taken_at_timestamp']),
@@ -199,10 +210,10 @@ class InstagramExtractor(Extractor):
'expires': text.parse_timestamp(media['expiring_at_timestamp']),
'media_id': media['id'],
'typename': media['__typename'],
+ 'display_url': media['display_url'],
}
if media['__typename'] == 'GraphStoryImage':
media_data.update({
- 'display_url': media['display_url'],
'height': text.parse_int(media['dimensions']['height']),
'width': text.parse_int(media['dimensions']['width']),
})
@@ -210,7 +221,7 @@ class InstagramExtractor(Extractor):
vr = media['video_resources'][0]
media_data.update({
'duration': text.parse_float(media['video_duration']),
- 'display_url': vr['src'],
+ 'video_url': vr['src'],
'height': text.parse_int(vr['config_height']),
'width': text.parse_int(vr['config_width']),
})
@@ -292,7 +303,7 @@ class InstagramImageExtractor(InstagramExtractor):
# GraphImage
("https://www.instagram.com/p/BqvsDleB3lV/", {
"pattern": r"https://[^/]+\.(cdninstagram\.com|fbcdn\.net)"
- r"/vp/[0-9a-f]+/[0-9A-F]+/t51.2885-15/e35"
+ r"/v(p/[0-9a-f]+/[0-9A-F]+)?/t51.2885-15/e35"
r"/44877605_725955034447492_3123079845831750529_n.jpg",
"keyword": {
"date": "type:datetime",
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
index bb89f93..7151de0 100644
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -19,9 +19,9 @@ import re
class RedirectMixin():
"""Detect and handle redirects to CAPTCHA pages"""
- def request(self, url):
+ def request(self, url, **kwargs):
while True:
- response = Extractor.request(self, url)
+ response = Extractor.request(self, url, **kwargs)
if not response.history or "/AreYouHuman" not in response.url:
return response
if self.config("captcha", "stop") == "wait":
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index e922f61..e47b7db 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -64,7 +64,7 @@ class LivedoorExtractor(Extractor):
if not src:
continue
if "://livedoor.blogimg.jp/" in src:
- url = src.replace("-s.", ".")
+ url = src.replace("http:", "https:", 1).replace("-s.", ".")
else:
url = text.urljoin(self.root, src)
name, _, ext = url.rpartition("/")[2].rpartition(".")
@@ -131,16 +131,16 @@ class LivedoorPostExtractor(LivedoorExtractor):
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/archives/(\d+)"
test = (
("http://blog.livedoor.jp/zatsu_ke/archives/51493859.html", {
- "url": "8826fe623f19dc868e7538e8519bf8491e92a0a2",
- "keyword": "83993111d5d0c08d021196802dd36b73f04c7057",
+ "url": "9ca3bbba62722c8155be79ad7fc47be409e4a7a2",
+ "keyword": "1f5b558492e0734f638b760f70bfc0b65c5a97b9",
}),
("http://blog.livedoor.jp/amaumauma/archives/7835811.html", {
- "url": "fc1d6a9557245b5a27d3a10bf0fa9922ef377215",
- "keyword": "fd700760c98897c3125328e157972f905fd34aaa",
+ "url": "204bbd6a9db4969c50e0923855aeede04f2e4a62",
+ "keyword": "05821c7141360e6057ef2d382b046f28326a799d",
}),
("http://blog.livedoor.jp/uotapo/archives/1050616939.html", {
- "url": "3f3581807ec4776e6a67ed7985a22494d4bc4904",
- "keyword": "9e319413a42e08d32f0dcbe8aa3b452ad41aa906",
+ "url": "4b5ab144b7309eb870d9c08f8853d1abee9946d2",
+ "keyword": "84fbf6e4eef16675013d6333039a7cfcb22c2d50",
}),
)
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index 4ad8da2..114a48e 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -72,8 +72,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d",
"keyword": {
"channel": {
- "id": "QeYKRkO0",
- "name": "美女图社",
+ "id": "Jpw9ywQ4",
+ "name": "绘画艺术赏析",
"cover": str,
},
"album": {
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 28a2c2d..a325264 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -31,8 +31,8 @@ class MastodonExtractor(Extractor):
if value is not sentinel:
return value
return config.interpolate(
- ("extractor", "mastodon", self.instance, self.subcategory, key),
- default,
+ ("extractor", "mastodon", self.instance, self.subcategory),
+ key, default,
)
def items(self):
@@ -145,10 +145,10 @@ def generate_extractors():
"""Dynamically generate Extractor classes for Mastodon instances"""
symtable = globals()
- extractors = config.get(("extractor", "mastodon"))
+ extractors = config.get(("extractor",), "mastodon")
if extractors:
EXTRACTORS.update(extractors)
- config.set(("extractor", "mastodon"), EXTRACTORS)
+ config.set(("extractor",), "mastodon", EXTRACTORS)
for instance, info in EXTRACTORS.items():
diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py
index 1831620..088fdd6 100644
--- a/gallery_dl/extractor/message.py
+++ b/gallery_dl/extractor/message.py
@@ -52,3 +52,4 @@ class Message():
# Cookies = 5
Queue = 6
Urllist = 7
+ Metadata = 8
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 1ca1073..5454e52 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -9,43 +9,107 @@
"""Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
import json
class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors"""
category = "newgrounds"
- directory_fmt = ("{category}", "{user}")
+ directory_fmt = ("{category}", "{artist[:10]:J, }")
filename_fmt = "{category}_{index}_{title}.{extension}"
archive_fmt = "{index}"
+ root = "https://www.newgrounds.com"
+ cookiedomain = ".newgrounds.com"
+ cookienames = ("NG_GG_username", "vmk1du5I8m")
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
- self.root = "https://{}.newgrounds.com".format(self.user)
+ self.user_root = "https://{}.newgrounds.com".format(self.user)
def items(self):
- data = self.get_metadata()
+ self.login()
yield Message.Version, 1
- yield Message.Directory, data
- for page_url in self.get_page_urls():
- image = self.parse_page_data(page_url)
- image.update(data)
- url = image["url"]
- yield Message.Url, url, text.nameext_from_url(url, image)
+ for post_url in self.posts():
+ try:
+ file = self.extract_post(post_url)
+ url = file["url"]
+ # except Exception:
+ except OSError:
+ url = None
+ if not url:
+ self.log.warning("Unable to get download URL for %s", post_url)
+ continue
+ yield Message.Directory, file
+ yield Message.Url, url, text.nameext_from_url(url, file)
- def get_metadata(self):
- """Collect metadata for extractor-job"""
- return {"user": self.user}
-
- def get_page_urls(self):
+ def posts(self):
"""Return urls of all relevant image pages"""
+ return self._pagination(self.subcategory)
+
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=360*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/passport/"
+ page = self.request(url).text
+ headers = {"Origin": self.root, "Referer": url}
+
+ url = text.urljoin(self.root, text.extract(page, 'action="', '"')[0])
+ data = {
+ "username": username,
+ "password": password,
+ "remember": "1",
+ "login" : "1",
+ }
+
+ response = self.request(url, method="POST", headers=headers, data=data)
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return {
+ cookie.name: cookie.value
+ for cookie in response.history[0].cookies
+ if cookie.expires and cookie.domain == self.cookiedomain
+ }
+
+ def extract_post(self, post_url):
+ page = self.request(post_url).text
+ extr = text.extract_from(page)
+
+ if "/art/view/" in post_url:
+ data = self._extract_image_data(extr, post_url)
+ elif "/audio/listen/" in post_url:
+ data = self._extract_audio_data(extr, post_url)
+ else:
+ data = self._extract_media_data(extr, post_url)
+
+ data["comment"] = text.unescape(text.remove_html(extr(
+ 'id="author_comments">', '</div>'), "", ""))
+ data["favorites"] = text.parse_int(extr(
+ 'id="faves_load">', '<').replace(",", ""))
+ data["score"] = text.parse_float(extr('id="score_number">', '<'))
+ data["tags"] = text.split_html(extr(
+ '<dd class="tags momag">', '</dd>'))
+ data["artist"] = [
+ text.extract(user, '//', '.')[0]
+ for user in text.extract_iter(page, '<div class="item-user">', '>')
+ ]
+
+ data["tags"].sort()
+ data["user"] = self.user or data["artist"][0]
+ return data
- def parse_page_data(self, page_url):
- """Collect url and metadata from an image page"""
- extr = text.extract_from(self.request(page_url).text)
+ @staticmethod
+ def _extract_image_data(extr, url):
full = text.extract_from(json.loads(extr('"full_image_text":', '});')))
data = {
"title" : text.unescape(extr('"og:title" content="', '"')),
@@ -53,53 +117,68 @@ class NewgroundsExtractor(Extractor):
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
"rating" : extr('class="rated-', '"'),
- "favorites" : text.parse_int(extr('id="faves_load">', '<')),
- "score" : text.parse_float(extr('id="score_number">', '<')),
- "tags" : text.split_html(extr(
- '<dd class="tags momag">', '</dd>')),
"url" : full('src="', '"'),
"width" : text.parse_int(full('width="', '"')),
"height" : text.parse_int(full('height="', '"')),
}
- data["tags"].sort()
data["index"] = text.parse_int(
data["url"].rpartition("/")[2].partition("_")[0])
return data
- def _pagination(self, url):
+ @staticmethod
+ def _extract_audio_data(extr, url):
+ return {
+ "title" : text.unescape(extr('"og:title" content="', '"')),
+ "description": text.unescape(extr(':description" content="', '"')),
+ "date" : text.parse_datetime(extr(
+ 'itemprop="datePublished" content="', '"')),
+ "url" : extr('{"url":"', '"').replace("\\/", "/"),
+ "index" : text.parse_int(url.split("/")[5]),
+ "rating" : "",
+ }
+
+ @staticmethod
+ def _extract_media_data(extr, url):
+ return {
+ "title" : text.unescape(extr('"og:title" content="', '"')),
+ "url" : extr('{"url":"', '"').replace("\\/", "/"),
+ "date" : text.parse_datetime(extr(
+ 'itemprop="datePublished" content="', '"')),
+ "description": text.unescape(extr(
+ 'itemprop="description" content="', '"')),
+ "rating" : extr('class="rated-', '"'),
+ "index" : text.parse_int(url.split("/")[5]),
+ }
+
+ def _pagination(self, kind):
+ root = self.user_root
headers = {
- "Referer": self.root,
- "X-Requested-With": "XMLHttpRequest",
"Accept": "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Referer": root,
}
+ url = "{}/{}/page/1".format(root, kind)
while True:
- data = self.request(url, headers=headers).json()
+ with self.request(url, headers=headers, fatal=False) as response:
+ try:
+ data = response.json()
+ except ValueError:
+ return
+ if not data:
+ return
+ if "errors" in data:
+ msg = ", ".join(text.unescape(e) for e in data["errors"])
+ raise exception.StopExtraction(msg)
for year in data["sequence"]:
for item in data["years"][str(year)]["items"]:
page_url = text.extract(item, 'href="', '"')[0]
- yield text.urljoin(self.root, page_url)
+ yield text.urljoin(root, page_url)
if not data["more"]:
return
- url = text.urljoin(self.root, data["more"])
-
-
-class NewgroundsUserExtractor(NewgroundsExtractor):
- """Extractor for all images of a newgrounds user"""
- subcategory = "user"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com(?:/art)?/?$"
- test = (
- ("https://blitzwuff.newgrounds.com/art", {
- "url": "24b19c4a135a09889fac7b46a74e427e4308d02b",
- "keyword": "62981f7bdd66e1f1c72ab1d9b932423c156bc9a1",
- }),
- ("https://blitzwuff.newgrounds.com/"),
- )
-
- def get_page_urls(self):
- return self._pagination(self.root + "/art/page/1")
+ url = text.urljoin(root, data["more"])
class NewgroundsImageExtractor(NewgroundsExtractor):
@@ -109,14 +188,28 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
r"(?:www\.)?newgrounds\.com/art/view/([^/?&#]+)/[^/?&#]+"
r"|art\.ngfiles\.com/images/\d+/\d+_([^_]+)_([^.]+))")
test = (
- ("https://www.newgrounds.com/art/view/blitzwuff/ffx", {
- "url": "e7778c4597a2fb74b46e5f04bb7fa1d80ca02818",
- "keyword": "cbe90f8f32da4341938f59b08d70f76137028a7e",
- "content": "cb067d6593598710292cdd340d350d14a26fe075",
+ ("https://www.newgrounds.com/art/view/tomfulp/ryu-is-hawt", {
+ "url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
+ "content": "8f395e08333eb2457ba8d8b715238f8910221365",
+ "keyword": {
+ "artist" : ["tomfulp"],
+ "comment" : "re:Consider this the bottom threshold for ",
+ "date" : "type:datetime",
+ "description": "re:Consider this the bottom threshold for ",
+ "favorites" : int,
+ "filename" : "94_tomfulp_ryu-is-hawt",
+ "height" : 476,
+ "index" : 94,
+ "rating" : "e",
+ "score" : float,
+ "tags" : ["ryu", "streetfighter"],
+ "title" : "Ryu is Hawt",
+ "user" : "tomfulp",
+ "width" : 447,
+ },
}),
- ("https://art.ngfiles.com/images/587000/587551_blitzwuff_ffx.png", {
- "url": "e7778c4597a2fb74b46e5f04bb7fa1d80ca02818",
- "keyword": "cbe90f8f32da4341938f59b08d70f76137028a7e",
+ ("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", {
+ "url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
}),
)
@@ -124,30 +217,120 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
NewgroundsExtractor.__init__(self, match)
if match.group(2):
self.user = match.group(2)
- self.page_url = "https://www.newgrounds.com/art/view/{}/{}".format(
+ self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
self.user, match.group(3))
else:
- self.page_url = match.group(0)
+ url = match.group(0)
+ if not url.startswith("http"):
+ url = "https://" + url
+ self.post_url = url
+
+ def posts(self):
+ return (self.post_url,)
+
+
+class NewgroundsMediaExtractor(NewgroundsExtractor):
+ """Extractor for a media file from newgrounds.com"""
+ subcategory = "media"
+ pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
+ r"(/(?:portal/view|audio/listen)/\d+)")
+ test = (
+ ("https://www.newgrounds.com/portal/view/589549", {
+ "url": "48d916d819c99139e6a3acbbf659a78a867d363e",
+ "content": "ceb865426727ec887177d99e0d20bb021e8606ae",
+ "keyword": {
+ "artist" : ["psychogoldfish", "tomfulp"],
+ "comment" : "re:People have been asking me how I like the ",
+ "date" : "type:datetime",
+ "description": "re:People have been asking how I like the ",
+ "favorites" : int,
+ "filename" : "527818_alternate_1896",
+ "index" : 589549,
+ "rating" : "t",
+ "score" : float,
+ "tags" : ["newgrounds", "psychogoldfish",
+ "rage", "redesign-2012"],
+ "title" : "Redesign Rage",
+ "user" : "psychogoldfish",
+ },
+ }),
+ ("https://www.newgrounds.com/audio/listen/609768", {
+ "url": "f4c5490ae559a3b05e46821bb7ee834f93a43c95",
+ "keyword": {
+ "artist" : ["zj", "tomfulp"],
+ "comment" : "re:RECORDED 12-09-2014\n\nFrom The ZJ \"Late ",
+ "date" : "type:datetime",
+ "description": "From The ZJ Report Show!",
+ "favorites" : int,
+ "index" : 609768,
+ "rating" : "",
+ "score" : float,
+ "tags" : ["fulp", "interview", "tom", "zj"],
+ "title" : "ZJ Interviews Tom Fulp!",
+ "user" : "zj",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ NewgroundsExtractor.__init__(self, match)
+ self.user = ""
+ self.post_url = self.root + match.group(1)
- def get_page_urls(self):
- return (self.page_url,)
+ def posts(self):
+ return (self.post_url,)
-class NewgroundsVideoExtractor(NewgroundsExtractor):
- """Extractor for all videos of a newgrounds user"""
- subcategory = "video"
- filename_fmt = "{category}_{index}.{extension}"
+class NewgroundsArtExtractor(NewgroundsExtractor):
+ """Extractor for all images of a newgrounds user"""
+ subcategory = "art"
+ pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/art/?$"
+ test = ("https://tomfulp.newgrounds.com/art", {
+ "pattern": NewgroundsImageExtractor.pattern,
+ "count": ">= 3",
+ })
+
+
+class NewgroundsAudioExtractor(NewgroundsExtractor):
+ """Extractor for all audio submissions of a newgrounds user"""
+ subcategory = "audio"
+ pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/audio/?$"
+ test = ("https://tomfulp.newgrounds.com/audio", {
+ "pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3",
+ "count": ">= 4",
+ })
+
+
+class NewgroundsMoviesExtractor(NewgroundsExtractor):
+ """Extractor for all movies of a newgrounds user"""
+ subcategory = "movies"
pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$"
test = ("https://tomfulp.newgrounds.com/movies", {
- "pattern": r"ytdl:https?://www\.newgrounds\.com/portal/view/\d+",
- "count": ">= 32",
+ "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
+ "range": "1-10",
+ "count": 10,
})
- def get_page_urls(self):
- return self._pagination(self.root + "/movies/page/1")
- def parse_page_data(self, page_url):
- return {
- "url" : "ytdl:" + page_url,
- "index": text.parse_int(page_url.rpartition("/")[2]),
- }
+class NewgroundsUserExtractor(NewgroundsExtractor):
+ """Extractor for a newgrounds user profile"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/?$"
+ test = (
+ ("https://tomfulp.newgrounds.com", {
+ "pattern": "https://tomfulp.newgrounds.com/art$",
+ }),
+ ("https://tomfulp.newgrounds.com", {
+ "options": (("include", "all"),),
+ "pattern": "https://tomfulp.newgrounds.com/(art|audio|movies)$",
+ "count": 3,
+ }),
+ )
+
+ def items(self):
+ base = self.user_root + "/"
+ return self._dispatch_extractors((
+ (NewgroundsArtExtractor , base + "art"),
+ (NewgroundsAudioExtractor , base + "audio"),
+ (NewgroundsMoviesExtractor, base + "movies"),
+ ), ("art",))
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 0bd858f..aae17a3 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -69,8 +69,8 @@ class NijieExtractor(AsynchronousMixin, Extractor):
"description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")),
"date" : text.parse_datetime(extr(
- '"datePublished": "', '"')[:-4] + "+0900",
- "%a %d %b %Y %I:%M:%S %p%z"),
+ '"datePublished": "', '"') + "+0900",
+ "%a %b %d %H:%M:%S %Y%z"),
"artist_id" : text.parse_int(extr(
'"sameAs": "https://nijie.info/members.php?id=', '"')),
"artist_name": keywords[1],
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 912447b..74835bf 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -15,11 +15,14 @@ from ..cache import cache
import os
import urllib.parse
+REDIRECT_URI_LOCALHOST = "http://localhost:6414/"
+REDIRECT_URI_HTTPS = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
+
class OAuthBase(Extractor):
"""Base class for OAuth Helpers"""
category = "oauth"
- redirect_uri = "http://localhost:6414/"
+ redirect_uri = REDIRECT_URI_LOCALHOST
def __init__(self, match):
Extractor.__init__(self, match)
@@ -27,7 +30,7 @@ class OAuthBase(Extractor):
def oauth_config(self, key, default=None):
return config.interpolate(
- ("extractor", self.subcategory, key), default)
+ ("extractor", self.subcategory), key, default)
def recv(self):
"""Open local HTTP server and recv callback parameters"""
@@ -163,7 +166,7 @@ class OAuthBase(Extractor):
class OAuthDeviantart(OAuthBase):
subcategory = "deviantart"
pattern = "oauth:deviantart$"
- redirect_uri = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
+ redirect_uri = REDIRECT_URI_HTTPS
def items(self):
yield Message.Version, 1
@@ -182,6 +185,7 @@ class OAuthDeviantart(OAuthBase):
class OAuthFlickr(OAuthBase):
subcategory = "flickr"
pattern = "oauth:flickr$"
+ redirect_uri = REDIRECT_URI_HTTPS
def __init__(self, match):
OAuthBase.__init__(self, match)
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 9b13391..1e52559 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
from ..cache import memcache
import collections
import json
@@ -33,13 +33,15 @@ class PatreonExtractor(Extractor):
PatreonExtractor._warning = False
for post in self.posts():
- yield Message.Directory, post
-
ids = set()
post["num"] = 0
content = post.get("content")
postfile = post.get("post_file")
+ yield Message.Directory, post
+ yield Message.Metadata, text.nameext_from_url(
+ post["creator"].get("image_url", ""), post)
+
for image in post["images"]:
url = image.get("download_url")
if not url:
@@ -97,8 +99,10 @@ class PatreonExtractor(Extractor):
attr["attachments"] = self._files(post, included, "attachments")
attr["date"] = text.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- attr["creator"] = self._user(
- post["relationships"]["user"]["links"]["related"])
+ user = post["relationships"]["user"]
+ attr["creator"] = (
+ self._user(user["links"]["related"]) or
+ included["user"][user["data"]["id"]])
return attr
@staticmethod
@@ -123,7 +127,10 @@ class PatreonExtractor(Extractor):
@memcache(keyarg=1)
def _user(self, url):
"""Fetch user information"""
- user = self.request(url).json()["data"]
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ return None
+ user = response.json()["data"]
attr = user["attributes"]
attr["id"] = user["id"]
attr["date"] = text.parse_datetime(
@@ -168,23 +175,28 @@ class PatreonCreatorExtractor(PatreonExtractor):
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
r"([^/?&#]+)/?")
- test = ("https://www.patreon.com/koveliana", {
- "range": "1-25",
- "count": ">= 25",
- "keyword": {
- "attachments": list,
- "comment_count": int,
- "content": str,
- "creator": dict,
- "date": "type:datetime",
- "id": int,
- "images": list,
- "like_count": int,
- "post_type": str,
- "published_at": str,
- "title": str,
- },
- })
+ test = (
+ ("https://www.patreon.com/koveliana", {
+ "range": "1-25",
+ "count": ">= 25",
+ "keyword": {
+ "attachments" : list,
+ "comment_count": int,
+ "content" : str,
+ "creator" : dict,
+ "date" : "type:datetime",
+ "id" : int,
+ "images" : list,
+ "like_count" : int,
+ "post_type" : str,
+ "published_at" : str,
+ "title" : str,
+ },
+ }),
+ ("https://www.patreon.com/kovelianot", {
+ "exception": exception.NotFoundError,
+ }),
+ )
def __init__(self, match):
PatreonExtractor.__init__(self, match)
@@ -192,9 +204,12 @@ class PatreonCreatorExtractor(PatreonExtractor):
def posts(self):
url = "{}/{}".format(self.root, self.creator)
- page = self.request(url).text
+ page = self.request(url, notfound="creator").text
campaign_id = text.extract(page, "/campaign/", "/")[0]
+ if not campaign_id:
+ raise exception.NotFoundError("creator")
+
url = self._build_url("posts", (
"&sort=-published_at"
"&filter[is_draft]=false"
@@ -221,19 +236,26 @@ class PatreonUserExtractor(PatreonExtractor):
class PatreonPostExtractor(PatreonExtractor):
"""Extractor for media from a single post"""
subcategory = "post"
- pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
- r"/posts/[^/?&#]*?(\d+)")
- test = ("https://www.patreon.com/posts/precious-metal-23563293", {
- "count": 4,
- })
+ pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)"
+ test = (
+ ("https://www.patreon.com/posts/precious-metal-23563293", {
+ "count": 4,
+ }),
+ ("https://www.patreon.com/posts/er1-28201153", {
+ "count": 1,
+ }),
+ ("https://www.patreon.com/posts/not-found-123", {
+ "exception": exception.NotFoundError,
+ }),
+ )
def __init__(self, match):
PatreonExtractor.__init__(self, match)
- self.post_id = match.group(1)
+ self.slug = match.group(1)
def posts(self):
- url = "{}/posts/{}".format(self.root, self.post_id)
- page = self.request(url).text
+ url = "{}/posts/{}".format(self.root, self.slug)
+ page = self.request(url, notfound="post").text
data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
post = json.loads(data + "}")["post"]
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index 8456f97..a6456da 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -117,7 +117,7 @@ class PhotobucketImageExtractor(Extractor):
(("https://s271.photobucket.com/user/lakerfanryan"
"/media/Untitled-3-1.jpg.html"), {
"url": "3b647deeaffc184cc48c89945f67574559c9051f",
- "keyword": "a2de4e60d584912537b8025c01bdd1d20bdea735",
+ "keyword": "69732741b2b351db7ecaa77ace2fdb39f08ca5a3",
}),
(("https://s271.photobucket.com/user/lakerfanryan"
"/media/IsotopeswBros.jpg.html?sort=3&o=2"), {
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 2775dac..5f50245 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -65,7 +65,7 @@ class PiczelUserExtractor(PiczelExtractor):
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$"
test = ("https://piczel.tv/gallery/Maximumwarp", {
- "count": ">= 50",
+ "count": ">= 45",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index e36a82b..bcdd082 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -119,8 +119,9 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
directory_fmt = ("{category}", "related {original_pin[id]}")
pattern = BASE_PATTERN + r"/pin/([^/?#&]+).*#related$"
test = ("https://www.pinterest.com/pin/858146903966145189/#related", {
- "range": "31-50",
- "count": 20,
+ "range": "31-70",
+ "count": 40,
+ "archive": False,
})
def metadata(self):
@@ -138,8 +139,9 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
"{board[name]}", "related")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+).*#related$"
test = ("https://www.pinterest.com/g1952849/test-/#related", {
- "range": "31-50",
- "count": 20,
+ "range": "31-70",
+ "count": 40,
+ "archive": False,
})
def pins(self):
@@ -241,7 +243,7 @@ class PinterestAPI():
if response.status_code == 404 or response.history:
resource = self.extractor.subcategory.rpartition("-")[2]
raise exception.NotFoundError(resource)
- self.extractor.log.debug("%s", response.text)
+ self.extractor.log.debug("Server response: %s", response.text)
raise exception.StopExtraction("API request failed")
def _pagination(self, resource, options):
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index d32f245..7901149 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -318,16 +318,22 @@ class PixivSearchExtractor(PixivExtractor):
archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "search", "{search[word]}")
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/search\.php\?([^#]+)")
+ r"/(?:(?:en/)?tags/([^/?&#]+)(?:/[^/?&#]+)?/?"
+ r"|search\.php)(?:\?([^#]+))?")
test = (
+ ("https://www.pixiv.net/en/tags/Original", {
+ "range": "1-10",
+ "count": 10,
+ }),
+ ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"),
("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
("https://touch.pixiv.net/search.php?word=Original"),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
- self.query = match.group(1)
- self.word = self.sort = self.target = None
+ self.word, self.query = match.groups()
+ self.sort = self.target = None
def works(self):
return self.api.search_illust(self.word, self.sort, self.target)
@@ -335,9 +341,12 @@ class PixivSearchExtractor(PixivExtractor):
def get_metadata(self, user=None):
query = text.parse_query(self.query)
- if "word" not in query:
- raise exception.StopExtraction("Missing search term")
- self.word = query["word"]
+ if self.word:
+ self.word = text.unquote(self.word)
+ else:
+ if "word" not in query:
+ raise exception.StopExtraction("Missing search term")
+ self.word = query["word"]
sort = query.get("order", "date_d")
sort_map = {
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 2bb66ac..6862559 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, extractor, exception
import datetime
+import time
import json
import re
@@ -47,13 +48,22 @@ class PlurkExtractor(Extractor):
"""Return an iterable with a 'plurk's comments"""
url = "https://www.plurk.com/Responses/get"
data = {"plurk_id": plurk["id"], "count": "200"}
+ headers = {
+ "Origin": self.root,
+ "Referer": self.root,
+ "X-Requested-With": "XMLHttpRequest",
+ }
while True:
- info = self.request(url, method="POST", data=data).json()
+ info = self.request(
+ url, method="POST", headers=headers, data=data).json()
yield from info["responses"]
if not info["has_newer"]:
return
- data["from_response_id"] = info["responses"][-1]["id"]
+ elif info["has_newer"] < 200:
+ del data["count"]
+ time.sleep(1)
+ data["from_response_id"] = info["responses"][-1]["id"] + 1
@staticmethod
def _load(data):
@@ -81,9 +91,9 @@ class PlurkTimelineExtractor(PlurkExtractor):
user_id, pos = text.extract(page, '"user_id":', ',')
plurks = self._load(text.extract(page, "_PLURKS = ", ";\n", pos)[0])
- url = "https://www.plurk.com/TimeLine/getPlurks"
- data = {"user_id": user_id.strip()}
headers = {"Referer": url, "X-Requested-With": "XMLHttpRequest"}
+ data = {"user_id": user_id.strip()}
+ url = "https://www.plurk.com/TimeLine/getPlurks"
while plurks:
yield from plurks
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
new file mode 100644
index 0000000..6d89151
--- /dev/null
+++ b/gallery_dl/extractor/realbooru.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://realbooru.com/"""
+
+from . import booru
+
+
+class RealbooruExtractor(booru.XmlParserMixin,
+ booru.GelbooruPageMixin,
+ booru.BooruExtractor):
+ """Base class for realbooru extractors"""
+ category = "realbooru"
+ api_url = "https://realbooru.com/index.php"
+ post_url = "https://realbooru.com/index.php?page=post&s=view&id={}"
+ pool_url = "https://realbooru.com/index.php?page=pool&s=show&id={}"
+
+ def __init__(self, match):
+ super().__init__(match)
+ self.params.update({"page": "dapi", "s": "post", "q": "index"})
+
+
+class RealbooruTagExtractor(booru.TagMixin, RealbooruExtractor):
+ """Extractor for images from realbooru.com based on search-tags"""
+ pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
+ r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
+ test = ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
+ "count": 64,
+ })
+
+
+class RealbooruPoolExtractor(booru.GelbooruPoolMixin, RealbooruExtractor):
+ """Extractor for image-pools from realbooru.com"""
+ pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
+ r"\?page=pool&s=show&id=(?P<pool>\d+)")
+ test = ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
+ "count": 3,
+ })
+
+
+class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
+ """Extractor for single images from realbooru.com"""
+ pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
+ r"\?page=post&s=view&id=(?P<post>\d+)")
+ test = ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
+ "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
+ "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
+ "options": (("tags", True),),
+ "keyword": {
+ "tags_general" : str,
+ "tags_metadata": "tagme",
+ "tags_model" : "jennifer_lawrence",
+ },
+ })
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index ecce003..656148e 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -60,14 +60,16 @@ class RedditExtractor(Extractor):
def _urls(self, submissions):
for submission, comments in submissions:
- self._visited.add(submission["id"])
- if not submission["is_self"]:
- yield submission["url"], submission
+ if submission:
+ self._visited.add(submission["id"])
- for url in text.extract_iter(
- submission["selftext_html"] or "", ' href="', '"'):
- yield url, submission
+ if not submission["is_self"]:
+ yield submission["url"], submission
+
+ for url in text.extract_iter(
+ submission["selftext_html"] or "", ' href="', '"'):
+ yield url, submission
if comments:
for comment in comments:
@@ -130,15 +132,14 @@ class RedditSubmissionExtractor(RedditExtractor):
r")/([a-z0-9]+)")
test = (
("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
- "pattern": r"https://",
- "count": 3,
- }),
- # ignore submission comments (#429)
- ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
- "options": (("comments", 0),),
"pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg",
"count": 1,
}),
+ ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
+ "options": (("comments", 500),),
+ "pattern": r"https://",
+ "count": 3,
+ }),
("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
@@ -186,7 +187,7 @@ class RedditAPI():
def __init__(self, extractor):
self.extractor = extractor
- self.comments = text.parse_int(extractor.config("comments", 500))
+ self.comments = text.parse_int(extractor.config("comments", 0))
self.morecomments = extractor.config("morecomments", False)
self.refresh_token = extractor.config("refresh-token")
self.log = extractor.log
@@ -298,17 +299,24 @@ class RedditAPI():
while True:
data = self._call(endpoint, params)["data"]
- for submission in data["children"]:
- submission = submission["data"]
- if (date_min <= submission["created_utc"] <= date_max and
- id_min <= self._decode(submission["id"]) <= id_max):
- if submission["num_comments"] and self.comments:
- try:
- yield self.submission(submission["id"])
- except exception.AuthorizationError:
- pass
- else:
- yield submission, None
+ for child in data["children"]:
+ kind = child["kind"]
+ post = child["data"]
+
+ if (date_min <= post["created_utc"] <= date_max and
+ id_min <= self._decode(post["id"]) <= id_max):
+
+ if kind == "t3":
+ if post["num_comments"] and self.comments:
+ try:
+ yield self.submission(post["id"])
+ except exception.AuthorizationError:
+ pass
+ else:
+ yield post, None
+
+ elif kind == "t1" and self.comments:
+ yield None, (post,)
if not data["after"]:
return
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index 736173f..bde0b5d 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -24,7 +24,7 @@ class SenmangaChapterExtractor(Extractor):
("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
"url": "5f95140ff511d8497e2ec08fa7267c6bb231faec",
"keyword": "705d941a150765edb33cd2707074bd703a93788c",
- "content": "0e37b1995708ffc175f2e175d91a518e6948c379",
+ "content": "556a16d5ca3441d7a5807b6b5ac06ec458a3e4ba",
}),
("http://raw.senmanga.com/Love-Lab/2016-03/1", {
"url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de",
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index c4597af..2c9746e 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -21,7 +21,6 @@ class SexcomExtractor(Extractor):
root = "https://www.sex.com"
def items(self):
- self.session.headers["Referer"] = self.root
yield Message.Version, 1
yield Message.Directory, self.metadata()
for pin in map(self._parse_pin, self.pins()):
@@ -59,6 +58,7 @@ class SexcomExtractor(Extractor):
extr = text.extract_from(response.text)
data = {}
+ data["_http_headers"] = {"Referer": url}
data["thumbnail"] = extr('itemprop="thumbnail" content="', '"')
data["type"] = extr('<h1>' , '<').rstrip(" -").strip().lower()
data["title"] = text.unescape(extr('itemprop="name">' , '<'))
@@ -123,10 +123,12 @@ class SexcomPinExtractor(SexcomExtractor):
# gif
("https://www.sex.com/pin/11465040-big-titted-hentai-gif/", {
"url": "98a82c5ae7a65c8228e1405ac740f80d4d556de1",
+ "content": "a54b37eb39d565094c54ad7d21244fe8f978fb14",
}),
# video
- ("https://www.sex.com/pin/55748381/", {
- "pattern": "https://www.sex.com/video/stream/776238/hd",
+ ("https://www.sex.com/pin/55748341/", {
+ "pattern": "https://www.sex.com/video/stream/776229/hd",
+ "content": "e1a5834869163e2c4d1ca2677f5b7b367cf8cfff",
}),
# pornhub embed
("https://www.sex.com/pin/55847384-very-nicely-animated/", {
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index be29dcf..0c13825 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -154,13 +154,14 @@ class SmugmugPathExtractor(SmugmugExtractor):
"pattern": "smugmug:album:6VRT8G$",
}),
# custom domain
- ("smugmug:www.creativedogportraits.com/PortfolioGallery/", {
- "pattern": "smugmug:album:txWXzs$",
+ ("smugmug:www.sitkapics.com/TREES-and-TRAILS/", {
+ "pattern": "smugmug:album:ct8Nds$",
}),
- ("smugmug:www.creativedogportraits.com/", {
- "pattern": "smugmug:album:txWXzs$",
+ ("smugmug:www.sitkapics.com/", {
+ "pattern": r"smugmug:album:\w{6}$",
+ "count": ">= 14",
}),
- ("smugmug:https://www.creativedogportraits.com/"),
+ ("smugmug:https://www.sitkapics.com/"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 998eed4..1d37419 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -156,8 +156,8 @@ class TumblrExtractor(Extractor):
invalid = types - POST_TYPES
if invalid:
types = types & POST_TYPES
- self.log.warning('invalid post types: "%s"',
- '", "'.join(sorted(invalid)))
+ self.log.warning("Invalid post types: '%s'",
+ "', '".join(sorted(invalid)))
return types
@staticmethod
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index dfafc1f..8ef966f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,13 +11,14 @@
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache, memcache
+import json
import re
class TwitterExtractor(Extractor):
"""Base class for twitter extractors"""
category = "twitter"
- directory_fmt = ("{category}", "{user}")
+ directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
root = "https://twitter.com"
@@ -26,6 +27,7 @@ class TwitterExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
+ self._user_dict = None
self.logged_in = False
self.retweets = self.config("retweets", True)
self.content = self.config("content", False)
@@ -37,23 +39,18 @@ class TwitterExtractor(Extractor):
def items(self):
self.login()
+ metadata = self.metadata()
yield Message.Version, 1
- yield Message.Directory, self.metadata()
for tweet in self.tweets():
data = self._data_from_tweet(tweet)
-
- if not self.retweets and data["retweet_id"]:
+ if not data or not self.retweets and data["retweet_id"]:
continue
-
- images = text.extract_iter(
- tweet, 'data-image-url="', '"')
- for data["num"], url in enumerate(images, 1):
- text.nameext_from_url(url, data)
- urls = [url + size for size in self.sizes]
- yield Message.Urllist, urls, data
+ data.update(metadata)
if self.videos and "-videoContainer" in tweet:
+ yield Message.Directory, data
+
if self.videos == "ytdl":
data["extension"] = None
url = "ytdl:{}/{}/status/{}".format(
@@ -70,9 +67,19 @@ class TwitterExtractor(Extractor):
data["num"] = 1
yield Message.Url, url, data
+ elif "data-image-url=" in tweet:
+ yield Message.Directory, data
+
+ images = text.extract_iter(
+ tweet, 'data-image-url="', '"')
+ for data["num"], url in enumerate(images, 1):
+ text.nameext_from_url(url, data)
+ urls = [url + size for size in self.sizes]
+ yield Message.Urllist, urls, data
+
def metadata(self):
"""Return general metadata"""
- return {"user": self.user}
+ return {}
def tweets(self):
"""Yield HTML content of all relevant tweets"""
@@ -113,11 +120,33 @@ class TwitterExtractor(Extractor):
"tweet_id" : text.parse_int(extr('data-tweet-id="' , '"')),
"retweet_id": text.parse_int(extr('data-retweet-id="', '"')),
"retweeter" : extr('data-retweeter="' , '"'),
- "user" : extr('data-screen-name="', '"'),
- "username" : extr('data-name="' , '"'),
- "user_id" : text.parse_int(extr('data-user-id="' , '"')),
- "date" : text.parse_timestamp(extr('data-time="', '"')),
+ "author" : {
+ "name" : extr('data-screen-name="', '"'),
+ "nick" : text.unescape(extr('data-name="' , '"')),
+ "id" : text.parse_int(extr('data-user-id="' , '"')),
+ },
}
+
+ if not self._user_dict:
+ if data["retweet_id"]:
+ for user in json.loads(text.unescape(extr(
+ 'data-reply-to-users-json="', '"'))):
+ if user["screen_name"] == data["retweeter"]:
+ break
+ else:
+ self.log.warning("Unable to extract user info")
+ return None
+ self._user_dict = {
+ "name": user["screen_name"],
+ "nick": text.unescape(user["name"]),
+ "id" : text.parse_int(user["id_str"]),
+ }
+ else:
+ self._user_dict = data["author"]
+
+ data["user"] = self._user_dict
+ data["date"] = text.parse_timestamp(extr('data-time="', '"'))
+
if self.content:
content = extr('<div class="js-tweet-text-container">', '\n</div>')
if '<img class="Emoji ' in content:
@@ -125,6 +154,7 @@ class TwitterExtractor(Extractor):
content = text.unescape(text.remove_html(content, "", ""))
cl, _, cr = content.rpartition("pic.twitter.com/")
data["content"] = cl if cl and len(cr) < 16 else content
+
return data
def _video_from_tweet(self, tweet_id):
@@ -185,7 +215,7 @@ class TwitterExtractor(Extractor):
if "min_position" in data:
position = data["min_position"]
- if position == max_position:
+ if position == max_position or position is None:
return
else:
position = text.parse_int(text.extract(
@@ -204,7 +234,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
("https://twitter.com/supernaturepics", {
"range": "1-40",
"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
- "keyword": "7210d679606240405e0cf62cbc67596e81a7a250",
+ "keyword": "37f4d35affd733d458d3b235b4a55f619a86f794",
}),
("https://mobile.twitter.com/supernaturepics?p=i"),
)
@@ -262,13 +292,13 @@ class TwitterTweetExtractor(TwitterExtractor):
test = (
("https://twitter.com/supernaturepics/status/604341487988576256", {
"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
- "keyword": "1b8afb93cc04a9f44d89173f8facc61c3a6caf91",
+ "keyword": "3fa3623e8d9a204597238e2f1f6433da19c63b4a",
"content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
}),
# 4 images
("https://twitter.com/perrypumas/status/894001459754180609", {
"url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",
- "keyword": "43d98ab448193f0d4f30aa571a4b6bda9b6a5692",
+ "keyword": "49165725116ac52193a3861e8f5534e47a706b62",
}),
# video
("https://twitter.com/perrypumas/status/1065692031626829824", {
@@ -278,7 +308,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# content with emoji, newlines, hashtags (#338)
("https://twitter.com/yumi_san0112/status/1151144618936823808", {
"options": (("content", True),),
- "keyword": "b133464b73aec33871521ab021a3166204194285",
+ "keyword": "0b7a3d05607b480c1412dfd85f8606478313e7bf",
}),
# Reply to another tweet (#403)
("https://twitter.com/tyson_hesse/status/1103767554424598528", {
@@ -295,9 +325,6 @@ class TwitterTweetExtractor(TwitterExtractor):
TwitterExtractor.__init__(self, match)
self.tweet_id = match.group(2)
- def metadata(self):
- return {"user": self.user, "tweet_id": self.tweet_id}
-
def tweets(self):
url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
cookies = {"app_shell_visited": "1"}
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 6cc5911..a24d3fe 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -29,10 +29,25 @@ class VscoExtractor(Extractor):
self.user = match.group(1).lower()
def items(self):
+ videos = self.config("videos", True)
yield Message.Version, 1
yield Message.Directory, {"user": self.user}
for img in self.images():
- url = "https://" + (img.get("video_url") or img["responsive_url"])
+
+ if img["is_video"]:
+ if not videos:
+ continue
+ url = "https://" + img["video_url"]
+ else:
+ base = img["responsive_url"].partition("/")[2]
+ cdn, _, path = base.partition("/")
+ if cdn.startswith("aws"):
+ url = "https://image-{}.vsco.co/{}".format(cdn, path)
+ elif cdn.isdecimal():
+ url = "https://image.vsco.co/" + base
+ else:
+ url = "https://" + img["responsive_url"]
+
data = text.nameext_from_url(url, {
"id" : img["_id"],
"user" : self.user,
@@ -66,10 +81,20 @@ class VscoExtractor(Extractor):
while True:
data = self.request(url, params=params, headers=headers).json()
- if not data.get(key):
+ medias = data.get(key)
+ if not medias:
return
- yield from data[key]
- params["page"] += 1
+
+ if "cursor" in params:
+ for media in medias:
+ yield media[media["type"]]
+ cursor = data.get("next_cursor")
+ if not cursor:
+ return
+ params["cursor"] = cursor
+ else:
+ yield from medias
+ params["page"] += 1
@staticmethod
def _transform_media(media):
@@ -89,9 +114,9 @@ class VscoUserExtractor(VscoExtractor):
pattern = BASE_PATTERN + r"(?:/images(?:/\d+)?)?/?(?:$|[?#])"
test = (
("https://vsco.co/missuri/images/1", {
+ "pattern": r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/vsco\w+",
"range": "1-80",
"count": 80,
- "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+",
}),
("https://vsco.co/missuri"),
)
@@ -102,12 +127,19 @@ class VscoUserExtractor(VscoExtractor):
tkn = data["users"]["currentUser"]["tkn"]
sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"])
+ site = data["medias"]["bySiteId"][sid]
+
+ url = "{}/api/3.0/medias/profile".format(self.root)
+ params = {
+ "site_id" : sid,
+ "limit" : "14",
+ "show_only": "0",
+ "cursor" : site["nextCursor"],
+ }
- url = "{}/api/2.0/medias".format(self.root)
- params = {"page": 2, "size": "30", "site_id": sid}
return self._pagination(url, params, tkn, "media", (
- data["medias"]["byId"][mid]["media"]
- for mid in data["medias"]["bySiteId"][sid]["medias"]["1"]
+ data["medias"]["byId"][media[media["type"]]]["media"]
+ for media in site["medias"]
))
@@ -118,9 +150,9 @@ class VscoCollectionExtractor(VscoExtractor):
archive_fmt = "c_{user}_{id}"
pattern = BASE_PATTERN + r"/collection/"
test = ("https://vsco.co/vsco/collection/1", {
+ "pattern": r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/vsco\w+\.\w+",
"range": "1-80",
"count": 80,
- "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+",
})
def images(self):
@@ -136,7 +168,7 @@ class VscoCollectionExtractor(VscoExtractor):
return self._pagination(url, params, tkn, "medias", (
data["medias"]["byId"][mid]["media"]
for mid in data
- ["collections"]["byCollectionId"][cid]["collection"]["1"]
+ ["collections"]["byCollectionId"][cid]["byPage"]["1"]["collection"]
))
@@ -146,7 +178,7 @@ class VscoImageExtractor(VscoExtractor):
pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)"
test = (
("https://vsco.co/erenyildiz/media/5d34b93ef632433030707ce2", {
- "url": "faa214d10f859f374ad91da3f7547d2439f5af08",
+ "url": "a45f9712325b42742324b330c348b72477996031",
"content": "1394d070828d82078035f19a92f404557b56b83f",
"keyword": {
"id" : "5d34b93ef632433030707ce2",
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 09a166c..737c253 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -33,8 +33,8 @@ class WallhavenSearchExtractor(WallhavenExtractor):
("https://wallhaven.cc/search?q=touhou"),
(("https://wallhaven.cc/search?q=id%3A87"
"&categories=111&purity=100&sorting=date_added&order=asc&page=3"), {
- "count": 5,
- "url": "d477b68a534c3416d506ae1f159b25debab64678",
+ "pattern": r"https://w.wallhaven.cc/full/\w\w/wallhaven-\w+\.\w+",
+ "count": "<= 10",
}),
)
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 0f4ebd2..49fa082 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.weibo.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
import json
@@ -124,7 +124,7 @@ class WeiboStatusExtractor(WeiboExtractor):
}),
# unavailable video (#427)
("https://m.weibo.cn/status/4268682979207023", {
- "count": 0,
+ "exception": exception.NotFoundError,
}),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
@@ -136,7 +136,8 @@ class WeiboStatusExtractor(WeiboExtractor):
def statuses(self):
url = "{}/detail/{}".format(self.root, self.status_id)
- page = self.request(url).text
- data = json.loads(text.extract(
- page, " var $render_data = [", "][0] || {};")[0])
- return (data["status"],)
+ page = self.request(url, notfound="status").text
+ data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
+ if not data:
+ raise exception.NotFoundError("status")
+ return (json.loads(data)["status"],)
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 463733f..ac289df 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -93,7 +93,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
- "url": "f92d55669fa949491c26a5437527adb14b35b8cc",
+ "url": "228426a9d32b5bba9d659944c6b0ba73883af33f",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index e253b7f..80a3614 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -6,86 +6,91 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.xvideos.com/"""
+"""Extractors for https://www.xvideos.com/"""
-from .common import Extractor, Message
-from .. import text, exception
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
import json
-class XvideosExtractor(Extractor):
+class XvideosBase():
"""Base class for xvideos extractors"""
category = "xvideos"
root = "https://www.xvideos.com"
-class XvideosGalleryExtractor(XvideosExtractor):
- """Extractor for user profile galleries from xvideos.com"""
+class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
+ """Extractor for user profile galleries on xvideos.com"""
subcategory = "gallery"
- directory_fmt = ("{category}", "{user[name]}", "{title}")
- filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
- archive_fmt = "{gallery_id}_{num}"
+ directory_fmt = ("{category}", "{user[name]}",
+ "{gallery[id]} {gallery[title]}")
+ filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}"
+ archive_fmt = "{gallery[id]}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
- r"/profiles/([^/?&#]+)/photos/(\d+)")
+ r"/(?:profiles|amateur-channels|model-channels)"
+ r"/([^/?&#]+)/photos/(\d+)")
test = (
- (("https://www.xvideos.com/profiles"
- "/pervertedcouple/photos/751031/random_stuff"), {
+ ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
"url": "4f0d992e5dc39def2c3ac8e099d17bf09e76e3c7",
- "keyword": "65979d63a69576cf692b41d5fbbd995cc40a51b9",
- }),
- ("https://www.xvideos.com/profiles/pervertedcouple/photos/751032/", {
- "exception": exception.NotFoundError,
+ "keyword": {
+ "gallery": {
+ "id" : 751031,
+ "title": "Random Stuff",
+ "tags" : list,
+ },
+ "user": {
+ "id" : 20245371,
+ "name" : "pervertedcouple",
+ "display" : "Pervertedcouple",
+ "sex" : "Woman",
+ "description": str,
+ },
+ },
}),
+ ("https://www.xvideos.com/amateur-channels/pervertedcouple/photos/12"),
+ ("https://www.xvideos.com/model-channels/pervertedcouple/photos/12"),
)
def __init__(self, match):
- XvideosExtractor.__init__(self, match)
- self.user, self.gid = match.groups()
-
- def items(self):
- url = "{}/profiles/{}/photos/{}".format(self.root, self.user, self.gid)
- page = self.request(url, notfound=self.subcategory).text
- data = self.get_metadata(page)
- imgs = self.get_images(page)
- data["count"] = len(imgs)
- yield Message.Version, 1
- yield Message.Directory, data
- for url in imgs:
- data["num"] = text.parse_int(url.rsplit("_", 2)[1])
- data["extension"] = url.rpartition(".")[2]
- yield Message.Url, url, data
-
- def get_metadata(self, page):
- """Collect metadata for extractor-job"""
- data = text.extract_all(page, (
- ("userid" , '"id_user":', ','),
- ("display", '"display":"', '"'),
- ("title" , '"title":"', '"'),
- ("descr" , '<small class="mobile-hide">', '</small>'),
- ("tags" , '<em>Tagged:</em>', '<'),
- ))[0]
+ self.user, self.gallery_id = match.groups()
+ url = "{}/profiles/{}/photos/{}".format(
+ self.root, self.user, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ user = {
+ "id" : text.parse_int(extr('"id_user":', ',')),
+ "display": extr('"display":"', '"'),
+ "sex" : extr('"sex":"', '"'),
+ "name" : self.user,
+ }
+ title = extr('"title":"', '"')
+ user["description"] = extr(
+ '<small class="mobile-hide">', '</small>').strip()
+ tags = extr('<em>Tagged:</em>', '<').strip()
return {
- "user": {
- "id": text.parse_int(data["userid"]),
- "name": self.user,
- "display": data["display"],
- "description": data["descr"].strip(),
+ "user": user,
+ "gallery": {
+ "id" : text.parse_int(self.gallery_id),
+ "title": text.unescape(title),
+ "tags" : text.unescape(tags).split(", ") if tags else [],
},
- "tags": text.unescape(data["tags"] or "").strip().split(", "),
- "title": text.unescape(data["title"]),
- "gallery_id": text.parse_int(self.gid),
}
@staticmethod
- def get_images(page):
+ def images(page):
"""Return a list of all image urls for this gallery"""
- return list(text.extract_iter(
- page, '<a class="embed-responsive-item" href="', '"'))
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, '<a class="embed-responsive-item" href="', '"')
+ ]
-class XvideosUserExtractor(XvideosExtractor):
- """Extractor for user profiles from xvideos.com"""
+class XvideosUserExtractor(XvideosBase, Extractor):
+ """Extractor for user profiles on xvideos.com"""
subcategory = "user"
categorytransfer = True
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
@@ -93,16 +98,13 @@ class XvideosUserExtractor(XvideosExtractor):
test = (
("https://www.xvideos.com/profiles/pervertedcouple", {
"url": "a413f3e60d6d3a2de79bd44fa3b7a9c03db4336e",
- "keyword": "a796760d34732adc7ec52a8feb057515209a2ca6",
- }),
- ("https://www.xvideos.com/profiles/niwehrwhernvh", {
- "exception": exception.NotFoundError,
+ "keyword": "335a3304941ff2e666c0201e9122819b61b34adb",
}),
("https://www.xvideos.com/profiles/pervertedcouple#_tabPhotos"),
)
def __init__(self, match):
- XvideosExtractor.__init__(self, match)
+ Extractor.__init__(self, match)
self.user = match.group(1)
def items(self):
@@ -118,17 +120,17 @@ class XvideosUserExtractor(XvideosExtractor):
galleries = [
{
- "gallery_id": text.parse_int(gid),
+ "id" : text.parse_int(gid),
"title": text.unescape(gdata["title"]),
"count": gdata["nb_pics"],
"_extractor": XvideosGalleryExtractor,
}
for gid, gdata in data["galleries"].items()
]
- galleries.sort(key=lambda x: x["gallery_id"])
+ galleries.sort(key=lambda x: x["id"])
yield Message.Version, 1
for gallery in galleries:
url = "https://www.xvideos.com/profiles/{}/photos/{}".format(
- self.user, gallery["gallery_id"])
+ self.user, gallery["id"])
yield Message.Queue, url, gallery
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 9c76336..88b6a55 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -8,6 +8,7 @@
import sys
import time
+import errno
import logging
from . import extractor, downloader, postprocessor
from . import config, text, util, output, exception
@@ -97,6 +98,10 @@ class Job():
self.update_kwdict(kwds)
self.handle_urllist(urls, kwds)
+ elif msg[0] == Message.Metadata:
+ self.update_kwdict(msg[1])
+ self.handle_metadata(msg[1])
+
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
@@ -114,6 +119,9 @@ class Job():
def handle_directory(self, kwdict):
"""Handle Message.Directory"""
+ def handle_metadata(self, kwdict):
+ """Handle Message.Metadata"""
+
def handle_queue(self, url, kwdict):
"""Handle Message.Queue"""
@@ -161,11 +169,6 @@ class Job():
if self.ulog:
self.ulog.info(url)
- @staticmethod
- def _filter(kwdict):
- """Return a copy of 'kwdict' without "private" entries"""
- return {k: v for k, v in kwdict.items() if k[0] != "_"}
-
class DownloadJob(Job):
"""Download images into appropriate directory/filename locations"""
@@ -247,6 +250,16 @@ class DownloadJob(Job):
else:
self.pathfmt.set_directory(kwdict)
+ def handle_metadata(self, kwdict):
+ """Run postprocessors with metadata from 'kwdict'"""
+ postprocessors = self.postprocessors
+
+ if postprocessors:
+ pathfmt = self.pathfmt
+ pathfmt.set_filename(kwdict)
+ for pp in postprocessors:
+ pp.run_metadata(pathfmt)
+
def handle_queue(self, url, kwdict):
if "_extractor" in kwdict:
extr = kwdict["_extractor"].from_url(url)
@@ -280,7 +293,13 @@ class DownloadJob(Job):
scheme = url.partition(":")[0]
downloader = self.get_downloader(scheme)
if downloader:
- return downloader.download(url, self.pathfmt)
+ try:
+ return downloader.download(url, self.pathfmt)
+ except OSError as exc:
+ if exc.errno == errno.ENOSPC:
+ raise
+ self.log.warning("%s: %s", exc.__class__.__name__, exc)
+ return False
self._write_unsupported(url)
return False
@@ -291,14 +310,14 @@ class DownloadJob(Job):
except KeyError:
pass
- klass = downloader.find(scheme)
- if klass and config.get(("downloader", klass.scheme, "enabled"), True):
- instance = klass(self.extractor, self.out)
+ cls = downloader.find(scheme)
+ if cls and config.get(("downloader", cls.scheme), "enabled", True):
+ instance = cls(self.extractor, self.out)
else:
instance = None
self.log.error("'%s:' URLs are not supported/enabled", scheme)
- if klass and klass.scheme == "http":
+ if cls and cls.scheme == "http":
self.downloaders["http"] = self.downloaders["https"] = instance
else:
self.downloaders[scheme] = instance
@@ -477,7 +496,10 @@ class DataJob(Job):
Job.__init__(self, url, parent)
self.file = file
self.data = []
- self.ascii = config.get(("output", "ascii"), ensure_ascii)
+ self.ascii = config.get(("output",), "ascii", ensure_ascii)
+
+ private = config.get(("output",), "private")
+ self.filter = (lambda x: x) if private else util.filter_dict
def run(self):
# collect data
@@ -492,7 +514,7 @@ class DataJob(Job):
pass
# convert numbers to string
- if config.get(("output", "num-to-str"), False):
+ if config.get(("output",), "num-to-str", False):
for msg in self.data:
util.transform_dict(msg[-1], util.number_to_string)
@@ -501,16 +523,19 @@ class DataJob(Job):
return 0
def handle_url(self, url, kwdict):
- self.data.append((Message.Url, url, self._filter(kwdict)))
+ self.data.append((Message.Url, url, self.filter(kwdict)))
def handle_urllist(self, urls, kwdict):
- self.data.append((Message.Urllist, list(urls), self._filter(kwdict)))
+ self.data.append((Message.Urllist, list(urls), self.filter(kwdict)))
def handle_directory(self, kwdict):
- self.data.append((Message.Directory, self._filter(kwdict)))
+ self.data.append((Message.Directory, self.filter(kwdict)))
+
+ def handle_metadata(self, kwdict):
+ self.data.append((Message.Metadata, self.filter(kwdict)))
def handle_queue(self, url, kwdict):
- self.data.append((Message.Queue, url, self._filter(kwdict)))
+ self.data.append((Message.Queue, url, self.filter(kwdict)))
def handle_finalize(self):
self.file.close()
diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py
index 3093a72..9ceefbf 100644
--- a/gallery_dl/oauth.py
+++ b/gallery_dl/oauth.py
@@ -115,14 +115,15 @@ class OAuth1API():
api_secret = extractor.config("api-secret", self.API_SECRET)
token = extractor.config("access-token")
token_secret = extractor.config("access-token-secret")
+ key_type = "default" if api_key == self.API_KEY else "custom"
if api_key and api_secret and token and token_secret:
- self.log.debug("Using OAuth1.0 authentication")
+ self.log.debug("Using %s OAuth1.0 authentication", key_type)
self.session = OAuth1Session(
api_key, api_secret, token, token_secret)
self.api_key = None
else:
- self.log.debug("Using api_key authentication")
+ self.log.debug("Using %s api_key authentication", key_type)
self.session = extractor.session
self.api_key = api_key
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 3118b83..34222a2 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -18,13 +18,13 @@ from . import job, version
class ConfigAction(argparse.Action):
"""Set argparse results as config values"""
def __call__(self, parser, namespace, values, option_string=None):
- namespace.options.append(((self.dest,), values))
+ namespace.options.append(((), self.dest, values))
class ConfigConstAction(argparse.Action):
"""Set argparse const values as config values"""
def __call__(self, parser, namespace, values, option_string=None):
- namespace.options.append(((self.dest,), self.const))
+ namespace.options.append(((), self.dest, self.const))
class AppendCommandAction(argparse.Action):
@@ -41,7 +41,7 @@ class DeprecatedConfigConstAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
print("warning: {} is deprecated. Use {} instead.".format(
"/".join(self.option_strings), self.choices), file=sys.stderr)
- namespace.options.append(((self.dest,), self.const))
+ namespace.options.append(((), self.dest, self.const))
class ParseAction(argparse.Action):
@@ -52,8 +52,8 @@ class ParseAction(argparse.Action):
value = json.loads(value)
except ValueError:
pass
- key = key.split(".")
- namespace.options.append((key, value))
+ key = key.split(".") # splitting an empty string becomes [""]
+ namespace.options.append((key[:-1], key[-1], value))
class Formatter(argparse.HelpFormatter):
@@ -224,12 +224,6 @@ def build_parser():
dest="verify", nargs=0, action=ConfigConstAction, const=False,
help="Disable HTTPS certificate validation",
)
- downloader.add_argument(
- "--abort-on-skip",
- action=DeprecatedConfigConstAction,
- dest="skip", nargs=0, const="abort", choices="-A/--abort",
- help=argparse.SUPPRESS,
- )
configuration = parser.add_argument_group("Configuration Options")
configuration.add_argument(
@@ -313,13 +307,6 @@ def build_parser():
help="Store downloaded files in a ZIP archive",
)
postprocessor.add_argument(
- "--exec",
- dest="postprocessors", metavar="CMD",
- action=AppendCommandAction, const={"name": "exec"},
- help=("Execute CMD for each downloaded file. "
- "Example: --exec 'magick convert {} {}.png && rm {}'"),
- )
- postprocessor.add_argument(
"--ugoira-conv",
dest="postprocessors", action="append_const", const={
"name" : "ugoira",
@@ -358,6 +345,20 @@ def build_parser():
action="append_const", const={"name": "mtime"},
help="Set file modification times according to 'date' metadata",
)
+ postprocessor.add_argument(
+ "--exec",
+ dest="postprocessors", metavar="CMD",
+ action=AppendCommandAction, const={"name": "exec"},
+ help=("Execute CMD for each downloaded file. "
+ "Example: --exec 'convert {} {}.png && rm {}'"),
+ )
+ postprocessor.add_argument(
+ "--exec-after",
+ dest="postprocessors", metavar="CMD",
+ action=AppendCommandAction, const={"name": "exec", "final": True},
+ help=("Execute CMD after all files were downloaded successfully. "
+ "Example: --exec-after 'cd {} && convert * ../doc.pdf'"),
+ )
parser.add_argument(
"urls",
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 87c5006..38e2f60 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -83,7 +83,7 @@ def initialize_logging(loglevel):
def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL):
"""Setup a new logging handler"""
- opts = config.interpolate(("output", key))
+ opts = config.interpolate(("output",), key)
if not opts:
return None
if not isinstance(opts, dict):
@@ -114,7 +114,7 @@ def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL):
def configure_logging_handler(key, handler):
"""Configure a logging handler"""
- opts = config.interpolate(("output", key))
+ opts = config.interpolate(("output",), key)
if not opts:
return
if isinstance(opts, str):
@@ -156,7 +156,7 @@ def select():
"color": ColorOutput,
"null": NullOutput,
}
- omode = config.get(("output", "mode"), "auto").lower()
+ omode = config.get(("output",), "mode", "auto").lower()
if omode in pdict:
return pdict[omode]()
elif omode == "auto":
@@ -192,7 +192,7 @@ class PipeOutput(NullOutput):
class TerminalOutput(NullOutput):
def __init__(self):
- self.short = config.get(("output", "shorten"), True)
+ self.short = config.get(("output",), "shorten", True)
if self.short:
self.width = shutil.get_terminal_size().columns - OFFSET
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index 83b42eb..70b0dfb 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -27,6 +27,10 @@ class PostProcessor():
"""Execute the postprocessor for a file"""
@staticmethod
+ def run_metadata(pathfmt):
+ """Execute the postprocessor for a file"""
+
+ @staticmethod
def run_after(pathfmt):
"""Execute postprocessor after moving a file to its target location"""
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 467ef11..bc26484 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -18,29 +18,49 @@ class MetadataPP(PostProcessor):
PostProcessor.__init__(self)
mode = options.get("mode", "json")
- ext = "txt"
-
if mode == "custom":
self.write = self._write_custom
- self.formatter = util.Formatter(options.get("format"))
+ cfmt = options.get("content-format") or options.get("format")
+ self.contentfmt = util.Formatter(cfmt).format_map
+ ext = "txt"
elif mode == "tags":
self.write = self._write_tags
+ ext = "txt"
else:
self.write = self._write_json
self.indent = options.get("indent", 4)
self.ascii = options.get("ascii", False)
ext = "json"
- self.extension = options.get("extension", ext)
+ extfmt = options.get("extension-format")
+ if extfmt:
+ self.path = self._path_format
+ self.extfmt = util.Formatter(extfmt).format_map
+ else:
+ self.path = self._path_append
+ self.extension = options.get("extension", ext)
+
+ if options.get("bypost"):
+ self.run_metadata, self.run = self.run, self.run_metadata
def run(self, pathfmt):
- path = "{}.{}".format(pathfmt.realpath, self.extension)
- with open(path, "w", encoding="utf-8") as file:
+ with open(self.path(pathfmt), "w", encoding="utf-8") as file:
self.write(file, pathfmt.kwdict)
+ def _path_append(self, pathfmt):
+ return "{}.{}".format(pathfmt.realpath, self.extension)
+
+ def _path_format(self, pathfmt):
+ kwdict = pathfmt.kwdict
+ ext = kwdict["extension"]
+ kwdict["extension"] = pathfmt.extension
+ kwdict["extension"] = pathfmt.prefix + self.extfmt(kwdict)
+ path = pathfmt.realdirectory + pathfmt.build_filename()
+ kwdict["extension"] = ext
+ return path
+
def _write_custom(self, file, kwdict):
- output = self.formatter.format_map(kwdict)
- file.write(output)
+ file.write(self.contentfmt(kwdict))
def _write_tags(self, file, kwdict):
tags = kwdict.get("tags") or kwdict.get("tag_string")
@@ -58,7 +78,7 @@ class MetadataPP(PostProcessor):
file.write("\n")
def _write_json(self, file, kwdict):
- util.dump_json(kwdict, file, self.ascii, self.indent)
+ util.dump_json(util.filter_dict(kwdict), file, self.ascii, self.indent)
__postprocessor__ = MetadataPP
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index fb51edf..48ae0be 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -78,6 +78,11 @@ def transform_dict(a, func):
a[key] = func(value)
+def filter_dict(a):
+ """Return a copy of 'a' without "private" entries"""
+ return {k: v for k, v in a.items() if k[0] != "_"}
+
+
def number_to_string(value, numbers=(int, float)):
"""Convert numbers (int, float) to string; Return everything else as is."""
return str(value) if value.__class__ in numbers else value
@@ -665,17 +670,17 @@ class PathFormat():
self.temppath = self.realpath = self.realpath[:-1]
return True
- def build_path(self):
- """Use filename metadata and directory to build a full path"""
-
- # Apply 'kwdict' to filename format string
+ def build_filename(self):
+ """Apply 'kwdict' to filename format string"""
try:
- self.filename = filename = self.clean_path(self.clean_segment(
+ return self.clean_path(self.clean_segment(
self.filename_formatter(self.kwdict)))
except Exception as exc:
raise exception.FilenameFormatError(exc)
- # Combine directory and filename to full paths
+ def build_path(self):
+ """Combine directory and filename to full paths"""
+ self.filename = filename = self.build_filename()
self.path = self.directory + filename
self.realpath = self.realdirectory + filename
if not self.temppath:
@@ -743,13 +748,13 @@ class DownloadArchive():
def __contains__(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
- key = self.keygen(kwdict)
+ key = kwdict["_archive_key"] = self.keygen(kwdict)
self.cursor.execute(
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
return self.cursor.fetchone()
def add(self, kwdict):
"""Add item described by 'kwdict' to archive"""
- key = self.keygen(kwdict)
+ key = kwdict.get("_archive_key") or self.keygen(kwdict)
self.cursor.execute(
"INSERT OR IGNORE INTO archive VALUES (?)", (key,))
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 4d73139..2ac7ceb 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.11.1"
+__version__ = "1.12.1"
diff --git a/test/test_config.py b/test/test_config.py
index 8cdb3da..a9d3f54 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -16,65 +16,125 @@ import tempfile
class TestConfig(unittest.TestCase):
def setUp(self):
- fd, self._configfile = tempfile.mkstemp()
- with os.fdopen(fd, "w") as file:
- file.write('{"a": "1", "b": {"a": 2, "c": "text"}}')
- config.load((self._configfile,))
+ config.set(() , "a", 1)
+ config.set(("b",) , "a", 2)
+ config.set(("b", "b"), "a", 3)
+ config.set(("b",) , "c", "text")
+ config.set(("b", "b"), "c", [8, 9])
def tearDown(self):
config.clear()
- os.remove(self._configfile)
def test_get(self):
- self.assertEqual(config.get(["a"]), "1")
- self.assertEqual(config.get(["b", "c"]), "text")
- self.assertEqual(config.get(["d"]), None)
- self.assertEqual(config.get(["e", "f", "g"], 123), 123)
+ self.assertEqual(config.get(() , "a") , 1)
+ self.assertEqual(config.get(("b",) , "a") , 2)
+ self.assertEqual(config.get(("b", "b"), "a") , 3)
+
+ self.assertEqual(config.get(() , "c") , None)
+ self.assertEqual(config.get(("b",) , "c") , "text")
+ self.assertEqual(config.get(("b", "b"), "c") , [8, 9])
+
+ self.assertEqual(config.get(("a",) , "g") , None)
+ self.assertEqual(config.get(("a", "a"), "g") , None)
+ self.assertEqual(config.get(("e", "f"), "g") , None)
+ self.assertEqual(config.get(("e", "f"), "g", 4), 4)
def test_interpolate(self):
- self.assertEqual(config.interpolate(["a"]), "1")
- self.assertEqual(config.interpolate(["b", "a"]), "1")
- self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
- self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
- config.set(["d"], 123)
- self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
- self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
+ self.assertEqual(config.interpolate(() , "a"), 1)
+ self.assertEqual(config.interpolate(("b",) , "a"), 1)
+ self.assertEqual(config.interpolate(("b", "b"), "a"), 1)
+
+ self.assertEqual(config.interpolate(() , "c"), None)
+ self.assertEqual(config.interpolate(("b",) , "c"), "text")
+ self.assertEqual(config.interpolate(("b", "b"), "c"), [8, 9])
+
+ self.assertEqual(config.interpolate(("a",) , "g") , None)
+ self.assertEqual(config.interpolate(("a", "a"), "g") , None)
+ self.assertEqual(config.interpolate(("e", "f"), "g") , None)
+ self.assertEqual(config.interpolate(("e", "f"), "g", 4), 4)
+
+ self.assertEqual(config.interpolate(("b",), "d", 1) , 1)
+ self.assertEqual(config.interpolate(("d",), "d", 1) , 1)
+ config.set(() , "d", 2)
+ self.assertEqual(config.interpolate(("b",), "d", 1) , 2)
+ self.assertEqual(config.interpolate(("d",), "d", 1) , 2)
+ config.set(("b",), "d", 3)
+ self.assertEqual(config.interpolate(("b",), "d", 1) , 2)
+ self.assertEqual(config.interpolate(("d",), "d", 1) , 2)
def test_set(self):
- config.set(["b", "c"], [1, 2, 3])
- config.set(["e", "f", "g"], value=234)
- self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
- self.assertEqual(config.get(["e", "f", "g"]), 234)
+ config.set(() , "c", [1, 2, 3])
+ config.set(("b",) , "c", [1, 2, 3])
+ config.set(("e", "f"), "g", value=234)
+ self.assertEqual(config.get(() , "c"), [1, 2, 3])
+ self.assertEqual(config.get(("b",) , "c"), [1, 2, 3])
+ self.assertEqual(config.get(("e", "f"), "g"), 234)
def test_setdefault(self):
- config.setdefault(["b", "c"], [1, 2, 3])
- config.setdefault(["e", "f", "g"], value=234)
- self.assertEqual(config.get(["b", "c"]), "text")
- self.assertEqual(config.get(["e", "f", "g"]), 234)
+ config.setdefault(() , "c", [1, 2, 3])
+ config.setdefault(("b",) , "c", [1, 2, 3])
+ config.setdefault(("e", "f"), "g", value=234)
+ self.assertEqual(config.get(() , "c"), [1, 2, 3])
+ self.assertEqual(config.get(("b",) , "c"), "text")
+ self.assertEqual(config.get(("e", "f"), "g"), 234)
def test_unset(self):
- config.unset(["a"])
- config.unset(["b", "c"])
- config.unset(["c", "d"])
- self.assertEqual(config.get(["a"]), None)
- self.assertEqual(config.get(["b", "a"]), 2)
- self.assertEqual(config.get(["b", "c"]), None)
+ config.unset(() , "a")
+ config.unset(("b",), "c")
+ config.unset(("a",), "d")
+ config.unset(("b",), "d")
+ config.unset(("c",), "d")
+ self.assertEqual(config.get(() , "a"), None)
+ self.assertEqual(config.get(("b",), "a"), 2)
+ self.assertEqual(config.get(("b",), "c"), None)
+ self.assertEqual(config.get(("a",), "d"), None)
+ self.assertEqual(config.get(("b",), "d"), None)
+ self.assertEqual(config.get(("c",), "d"), None)
def test_apply(self):
options = (
- (["b", "c"], [1, 2, 3]),
- (["e", "f", "g"], 234),
+ (("b",) , "c", [1, 2, 3]),
+ (("e", "f"), "g", 234),
)
- self.assertEqual(config.get(["b", "c"]), "text")
- self.assertEqual(config.get(["e", "f", "g"]), None)
+ self.assertEqual(config.get(("b",) , "c"), "text")
+ self.assertEqual(config.get(("e", "f"), "g"), None)
with config.apply(options):
- self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
- self.assertEqual(config.get(["e", "f", "g"]), 234)
-
- self.assertEqual(config.get(["b", "c"]), "text")
- self.assertEqual(config.get(["e", "f", "g"]), None)
+ self.assertEqual(config.get(("b",) , "c"), [1, 2, 3])
+ self.assertEqual(config.get(("e", "f"), "g"), 234)
+
+ self.assertEqual(config.get(("b",) , "c"), "text")
+ self.assertEqual(config.get(("e", "f"), "g"), None)
+
+ def test_load(self):
+ with tempfile.TemporaryDirectory() as base:
+ path1 = os.path.join(base, "cfg1")
+ with open(path1, "w") as file:
+ file.write('{"a": 1, "b": {"a": 2, "c": "text"}}')
+
+ path2 = os.path.join(base, "cfg2")
+ with open(path2, "w") as file:
+ file.write('{"a": 7, "b": {"a": 8, "e": "foo"}}')
+
+ config.clear()
+ config.load((path1,))
+ self.assertEqual(config.get(() , "a"), 1)
+ self.assertEqual(config.get(("b",), "a"), 2)
+ self.assertEqual(config.get(("b",), "c"), "text")
+
+ config.load((path2,))
+ self.assertEqual(config.get(() , "a"), 7)
+ self.assertEqual(config.get(("b",), "a"), 8)
+ self.assertEqual(config.get(("b",), "c"), "text")
+ self.assertEqual(config.get(("b",), "e"), "foo")
+
+ config.clear()
+ config.load((path1, path2))
+ self.assertEqual(config.get(() , "a"), 7)
+ self.assertEqual(config.get(("b",), "a"), 8)
+ self.assertEqual(config.get(("b",), "c"), "text")
+ self.assertEqual(config.get(("b",), "e"), "foo")
if __name__ == '__main__':
diff --git a/test/test_cookies.py b/test/test_cookies.py
index a786df6..4f294bf 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2017 Mike Fährmann
+# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -18,8 +18,6 @@ from os.path import join
import gallery_dl.config as config
import gallery_dl.extractor as extractor
-CKEY = ("cookies",)
-
class TestCookiejar(unittest.TestCase):
@@ -45,7 +43,7 @@ class TestCookiejar(unittest.TestCase):
config.clear()
def test_cookiefile(self):
- config.set(CKEY, self.cookiefile)
+ config.set((), "cookies", self.cookiefile)
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), 1)
@@ -63,7 +61,7 @@ class TestCookiejar(unittest.TestCase):
self._test_warning(join(self.path.name, "nothing"), FileNotFoundError)
def _test_warning(self, filename, exc):
- config.set(CKEY, filename)
+ config.set((), "cookies", filename)
log = logging.getLogger("test")
with mock.patch.object(log, "warning") as mock_warning:
cookies = extractor.find("test:").session.cookies
@@ -77,7 +75,7 @@ class TestCookiedict(unittest.TestCase):
def setUp(self):
self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
- config.set(CKEY, self.cdict)
+ config.set((), "cookies", self.cdict)
def tearDown(self):
config.clear()
@@ -112,7 +110,7 @@ class TestCookieLogin(unittest.TestCase):
}
for category, cookienames in extr_cookies.items():
cookies = {name: "value" for name in cookienames}
- config.set(CKEY, cookies)
+ config.set((), "cookies", cookies)
extr = _get_extractor(category)
with mock.patch.object(extr, "_login_impl") as mock_login:
extr.login()
diff --git a/test/test_downloader.py b/test/test_downloader.py
index 0f58d4e..a7c4ce6 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -100,7 +100,7 @@ class TestDownloaderBase(unittest.TestCase):
cls.extractor = extractor.find("test:")
cls.dir = tempfile.TemporaryDirectory()
cls.fnum = 0
- config.set(("base-directory",), cls.dir.name)
+ config.set((), "base-directory", cls.dir.name)
@classmethod
def tearDownClass(cls):
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 0ab89db..17f82c9 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -58,7 +58,7 @@ class BasePostprocessorTest(unittest.TestCase):
def setUpClass(cls):
cls.extractor = extractor.find("test:")
cls.dir = tempfile.TemporaryDirectory()
- config.set(("base-directory",), cls.dir.name)
+ config.set((), "base-directory", cls.dir.name)
@classmethod
def tearDownClass(cls):
@@ -151,8 +151,12 @@ class MetadataTest(BasePostprocessorTest):
"ascii" : True,
"indent" : 2,
"extension": "JSON",
+ }, {
+ "public" : "hello",
+ "_private" : "world",
})
+ self.assertEqual(pp.path , pp._path_append)
self.assertEqual(pp.write , pp._write_json)
self.assertEqual(pp.ascii , True)
self.assertEqual(pp.indent , 2)
@@ -167,7 +171,8 @@ class MetadataTest(BasePostprocessorTest):
self.assertEqual(self._output(m), """{
"category": "test",
"extension": "ext",
- "filename": "file"
+ "filename": "file",
+ "public": "hello"
}
""")
@@ -224,13 +229,41 @@ class MetadataTest(BasePostprocessorTest):
)
self.assertEqual(pp.write, pp._write_custom)
self.assertEqual(pp.extension, "txt")
- self.assertTrue(pp.formatter)
+ self.assertTrue(pp.contentfmt)
with patch("builtins.open", mock_open()) as m:
pp.prepare(self.pathfmt)
pp.run(self.pathfmt)
self.assertEqual(self._output(m), "bar\nNone\n")
+ def test_metadata_extfmt(self):
+ pp = self._create({
+ "extension" : "ignored",
+ "extension-format": "json",
+ })
+
+ self.assertEqual(pp.path, pp._path_format)
+
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ path = self.pathfmt.realdirectory + "file.json"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+
+ def test_metadata_extfmt_2(self):
+ pp = self._create({
+ "extension-format": "{extension!u}-data:{category:Res/ES/}",
+ })
+
+ self.pathfmt.prefix = "2."
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ path = self.pathfmt.realdirectory + "file.2.EXT-data:tESt"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+
@staticmethod
def _output(mock):
return "".join(
diff --git a/test/test_results.py b/test/test_results.py
index 6d628c3..869ff83 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -26,10 +26,9 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "hentaifox",
- "livedoor",
+ "erolord",
"mangapark",
- "yaplog",
+ "photobucket",
}
@@ -56,10 +55,11 @@ class TestExtractorResults(unittest.TestCase):
if result:
if "options" in result:
for key, value in result["options"]:
- config.set(key.split("."), value)
+ key = key.split(".")
+ config.set(key[:-1], key[-1], value)
if "range" in result:
- config.set(("image-range",), result["range"])
- config.set(("chapter-range",), result["range"])
+ config.set((), "image-range" , result["range"])
+ config.set((), "chapter-range", result["range"])
content = "content" in result
else:
content = False
@@ -79,14 +79,18 @@ class TestExtractorResults(unittest.TestCase):
pass
except exception.HttpError as exc:
exc = str(exc)
- if re.match(r"5\d\d: ", exc) or \
+ if re.match(r"'5\d\d ", exc) or \
re.search(r"\bRead timed out\b", exc):
self._skipped.append((url, exc))
self.skipTest(exc)
raise
- # test archive-id uniqueness
- self.assertEqual(len(set(tjob.archive_list)), len(tjob.archive_list))
+ if result.get("archive", True):
+ self.assertEqual(
+ len(set(tjob.archive_list)),
+ len(tjob.archive_list),
+ "archive-id uniqueness",
+ )
if tjob.queue:
# test '_extractor' entries
@@ -185,7 +189,7 @@ class ResultJob(job.DownloadJob):
self._update_url(url)
self._update_kwdict(kwdict)
self._update_archive(kwdict)
- self._update_content(url)
+ self._update_content(url, kwdict)
self.format_filename(kwdict)
def handle_directory(self, kwdict):
@@ -204,7 +208,7 @@ class ResultJob(job.DownloadJob):
def _update_kwdict(self, kwdict, to_list=True):
if to_list:
self.kwdict_list.append(kwdict.copy())
- kwdict = self._filter(kwdict)
+ kwdict = util.filter_dict(kwdict)
self.kwdict_hash.update(
json.dumps(kwdict, sort_keys=True, default=str).encode())
@@ -213,9 +217,10 @@ class ResultJob(job.DownloadJob):
self.archive_list.append(archive_id)
self.archive_hash.update(archive_id.encode())
- def _update_content(self, url):
+ def _update_content(self, url, kwdict):
if self.content:
scheme = url.partition(":")[0]
+ self.fileobj.kwdict = kwdict
self.get_downloader(scheme).download(url, self.fileobj)
@@ -281,34 +286,36 @@ def setup_test_config():
email = "gallerydl@openaliasbox.org"
config.clear()
- config.set(("cache", "file"), ":memory:")
- config.set(("downloader", "part"), False)
- config.set(("downloader", "adjust-extensions"), False)
- config.set(("extractor", "timeout"), 60)
- config.set(("extractor", "username"), name)
- config.set(("extractor", "password"), name)
- config.set(("extractor", "nijie" , "username"), email)
- config.set(("extractor", "seiga" , "username"), email)
-
- config.set(("extractor", "danbooru" , "username"), None)
- config.set(("extractor", "instagram", "username"), None)
- config.set(("extractor", "imgur" , "username"), None)
- config.set(("extractor", "twitter" , "username"), None)
-
- config.set(("extractor", "mangoxo" , "username"), "LiQiang3")
- config.set(("extractor", "mangoxo" , "password"), "5zbQF10_5u25259Ma")
-
- config.set(("extractor", "deviantart", "client-id"), "7777")
- config.set(("extractor", "deviantart", "client-secret"),
+ config.set(("cache",), "file", None)
+ config.set(("downloader",), "part", False)
+ config.set(("downloader",), "adjust-extensions", False)
+ config.set(("extractor" ,), "timeout" , 60)
+ config.set(("extractor" ,), "username", name)
+ config.set(("extractor" ,), "password", name)
+
+ config.set(("extractor", "nijie") , "username", email)
+ config.set(("extractor", "seiga") , "username", email)
+ config.set(("extractor", "danbooru") , "username", None)
+ config.set(("extractor", "instagram") , "username", None)
+ config.set(("extractor", "twitter") , "username", None)
+
+ config.set(("extractor", "newgrounds"), "username", "d1618111")
+ config.set(("extractor", "newgrounds"), "password", "d1618111")
+
+ config.set(("extractor", "mangoxo") , "username", "LiQiang3")
+ config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
+
+ config.set(("extractor", "deviantart"), "client-id", "7777")
+ config.set(("extractor", "deviantart"), "client-secret",
"ff14994c744d9208e5caeec7aab4a026")
- config.set(("extractor", "tumblr", "api-key"),
+ config.set(("extractor", "tumblr"), "api-key",
"0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
- config.set(("extractor", "tumblr", "api-secret"),
+ config.set(("extractor", "tumblr"), "api-secret",
"6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
- config.set(("extractor", "tumblr", "access-token"),
+ config.set(("extractor", "tumblr"), "access-token",
"N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
- config.set(("extractor", "tumblr", "access-token-secret"),
+ config.set(("extractor", "tumblr"), "access-token-secret",
"sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
diff --git a/test/test_util.py b/test/test_util.py
index 9b252a3..5a103cf 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -358,6 +358,21 @@ class TestOther(unittest.TestCase):
self.assertEqual(
d, {1: 123, 2: 123, 3: 0, 4: {11: 321, 12: 321, 13: 0}})
+ def test_filter_dict(self):
+ d = {}
+ r = util.filter_dict(d)
+ self.assertEqual(r, d)
+ self.assertIsNot(r, d)
+
+ d = {"foo": 123, "bar": [], "baz": None}
+ r = util.filter_dict(d)
+ self.assertEqual(r, d)
+ self.assertIsNot(r, d)
+
+ d = {"foo": 123, "_bar": [], "__baz__": None}
+ r = util.filter_dict(d)
+ self.assertEqual(r, {"foo": 123})
+
def test_number_to_string(self, f=util.number_to_string):
self.assertEqual(f(1) , "1")
self.assertEqual(f(1.0) , "1.0")