summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md22
-rw-r--r--PKG-INFO17
-rw-r--r--README.rst15
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5453
-rw-r--r--docs/gallery-dl.conf8
-rw-r--r--gallery_dl.egg-info/PKG-INFO17
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/aes.py337
-rw-r--r--gallery_dl/extractor/500px.py6
-rw-r--r--gallery_dl/extractor/8muses.py10
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/behance.py281
-rw-r--r--gallery_dl/extractor/deviantart.py13
-rw-r--r--gallery_dl/extractor/exhentai.py3
-rw-r--r--gallery_dl/extractor/fallenangels.py6
-rw-r--r--gallery_dl/extractor/foolslide.py3
-rw-r--r--gallery_dl/extractor/furaffinity.py4
-rw-r--r--gallery_dl/extractor/gfycat.py6
-rw-r--r--gallery_dl/extractor/hentaicafe.py16
-rw-r--r--gallery_dl/extractor/hentaifoundry.py129
-rw-r--r--gallery_dl/extractor/kissmanga.py222
-rw-r--r--gallery_dl/extractor/mangoxo.py4
-rw-r--r--gallery_dl/extractor/newgrounds.py87
-rw-r--r--gallery_dl/extractor/nijie.py23
-rw-r--r--gallery_dl/extractor/nozomi.py2
-rw-r--r--gallery_dl/extractor/oauth.py35
-rw-r--r--gallery_dl/extractor/paheal.py4
-rw-r--r--gallery_dl/extractor/pixiv.py2
-rw-r--r--gallery_dl/extractor/pornhub.py8
-rw-r--r--gallery_dl/extractor/reactor.py2
-rw-r--r--gallery_dl/extractor/readcomiconline.py30
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/twitter.py34
-rw-r--r--gallery_dl/extractor/weasyl.py236
-rw-r--r--gallery_dl/extractor/weibo.py55
-rw-r--r--gallery_dl/extractor/xvideos.py2
-rw-r--r--gallery_dl/job.py38
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py11
41 files changed, 1256 insertions, 898 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b368535..0a55546 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,27 @@
# Changelog
+## 1.15.1 - 2020-10-11
+### Additions
+- [hentaicafe] add `manga_id` metadata field ([#1036](https://github.com/mikf/gallery-dl/issues/1036))
+- [hentaifoundry] add support for stories ([#734](https://github.com/mikf/gallery-dl/issues/734))
+- [hentaifoundry] add `include` option
+- [newgrounds] extract image embeds ([#1033](https://github.com/mikf/gallery-dl/issues/1033))
+- [nijie] add `include` option ([#1018](https://github.com/mikf/gallery-dl/issues/1018))
+- [reactor] match URLs without subdomain ([#1053](https://github.com/mikf/gallery-dl/issues/1053))
+- [twitter] extend `retweets` option ([#1026](https://github.com/mikf/gallery-dl/issues/1026))
+- [weasyl] add extractors ([#977](https://github.com/mikf/gallery-dl/issues/977))
+### Fixes
+- [500px] update query hashes
+- [behance] fix `collection` extraction
+- [newgrounds] fix video extraction ([#1042](https://github.com/mikf/gallery-dl/issues/1042))
+- [twitter] improve twitpic extraction ([#1019](https://github.com/mikf/gallery-dl/issues/1019))
+- [weibo] handle posts with more than 9 images ([#926](https://github.com/mikf/gallery-dl/issues/926))
+- [xvideos] fix `title` extraction
+- fix crash when using `--download-archive` with `--no-skip` ([#1023](https://github.com/mikf/gallery-dl/issues/1023))
+- fix issues with `blacklist`/`whitelist` defaults ([#1051](https://github.com/mikf/gallery-dl/issues/1051), [#1056](https://github.com/mikf/gallery-dl/issues/1056))
+### Removals
+- [kissmanga] remove module
+
## 1.15.0 - 2020-09-20
### Additions
- [deviantart] support watchers-only/paid deviations ([#995](https://github.com/mikf/gallery-dl/issues/995))
diff --git a/PKG-INFO b/PKG-INFO
index 19b7f04..190cb5f 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.15.0
+Version: 1.15.1
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -119,6 +119,14 @@ Description: ==========
$ choco install gallery-dl
+ Scoop
+ ----------
+
+ Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users.
+
+ .. code:: powershell
+
+ $ scoop install gallery-dl
Usage
=====
@@ -311,7 +319,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -324,6 +332,7 @@ Description: ==========
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
+ .. _Scoop: https://scoop.sh
.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg
:target: https://pypi.org/project/gallery-dl/
diff --git a/README.rst b/README.rst
index ca01764..fa823b5 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -108,6 +108,14 @@ Windows users that have Chocolatey_ installed can install *gallery-dl* from the
$ choco install gallery-dl
+Scoop
+----------
+
+Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users.
+
+.. code:: powershell
+
+ $ scoop install gallery-dl
Usage
=====
@@ -300,7 +308,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -313,6 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
+.. _Scoop: https://scoop.sh
.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg
:target: https://pypi.org/project/gallery-dl/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 2a84a06..cbcf4bf 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-09-20" "1.15.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-10-11" "1.15.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index e37135e..aeecaa0 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-09-20" "1.15.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-10-11" "1.15.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -115,6 +115,7 @@ Note: Even if the value of the \f[I]extension\f[] key is missing or
starting. This key is therefore always available to provide
a valid filename extension.
+
.SS extractor.*.directory
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -129,6 +130,7 @@ Each individual string in such a list represents a single path
segment, which will be joined together and appended to the
\f[I]base-directory\f[] to form the complete target directory path.
+
.SS extractor.*.base-directory
.IP "Type:" 6
\f[I]Path\f[]
@@ -137,7 +139,8 @@ segment, which will be joined together and appended to the
\f[I]"./gallery-dl/"\f[]
.IP "Description:" 4
-Directory path used as the base for all download destinations.
+Directory path used as base for all download destinations.
+
.SS extractor.*.parent-directory
.IP "Type:" 6
@@ -148,9 +151,10 @@ Directory path used as the base for all download destinations.
.IP "Description:" 4
Use an extractor's current target directory as
-\f[I]base-directory <extractor.*.base-directory_>\f[]
+\f[I]base-directory\f[]
for any spawned child extractors.
+
.SS extractor.*.path-restrict
.IP "Type:" 6
\f[I]string\f[] or \f[I]object\f[]
@@ -159,15 +163,15 @@ for any spawned child extractors.
\f[I]"auto"\f[]
.IP "Example:" 4
-"/!? (){}"
.br
-{" ": "_", "/": "-", "|": "-", ":": "-", "*": "+"}
+* "/!? (){}"
.br
+* {" ": "_", "/": "-", "|": "-", ":": "-", "*": "+"}
.IP "Description:" 4
A string of characters to be replaced with the value of
.br
-\f[I]path-replace <extractor.*.path-replace_>\f[]
+\f[I]path-replace\f[]
or an object mapping invalid/unwanted characters to their replacements
.br
for generated path segment names.
@@ -186,6 +190,7 @@ depending on the local operating system
Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be
escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
+
.SS extractor.*.path-replace
.IP "Type:" 6
\f[I]string\f[]
@@ -195,7 +200,8 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
.IP "Description:" 4
The replacement character(s) for
-\f[I]path-restrict <extractor.*.path-restrict_>\f[]
+\f[I]path-restrict\f[]
+
.SS extractor.*.path-remove
.IP "Type:" 6
@@ -210,6 +216,7 @@ Set of characters to remove from generated path names.
Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be
escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
+
.SS extractor.*.skip
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -220,7 +227,7 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
.IP "Description:" 4
Controls the behavior when downloading files that have been
downloaded before, i.e. a file with the same filename already
-exists or its ID is in a \f[I]download archive <extractor.*.archive_>\f[].
+exists or its ID is in a \f[I]download archive\f[].
.br
* \f[I]true\f[]: Skip downloads
@@ -243,6 +250,7 @@ after \f[I]N\f[] consecutive skips
* \f[I]"enumerate"\f[]: Add an enumeration index to the beginning of the
filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.)
+
.SS extractor.*.sleep
.IP "Type:" 6
\f[I]float\f[]
@@ -253,6 +261,7 @@ filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.)
.IP "Description:" 4
Number of seconds to sleep before each download.
+
.SS extractor.*.sleep-extractor
.IP "Type:" 6
\f[I]float\f[]
@@ -264,6 +273,7 @@ Number of seconds to sleep before each download.
Number of seconds to sleep before handling an input URL,
i.e. before starting a new extractor.
+
.SS extractor.*.sleep-request
.IP "Type:" 6
\f[I]float\f[]
@@ -275,6 +285,7 @@ i.e. before starting a new extractor.
Minimal time interval in seconds between each HTTP request
during data extraction.
+
.SS extractor.*.username & .password
.IP "Type:" 6
\f[I]string\f[]
@@ -330,6 +341,7 @@ Note: The password values for \f[I]danbooru\f[] and \f[I]e621\f[] should be
the API keys found in your user profile, not your actual account
password.
+
.SS extractor.*.netrc
.IP "Type:" 6
\f[I]bool\f[]
@@ -340,6 +352,7 @@ password.
.IP "Description:" 4
Enable the use of \f[I].netrc\f[] authentication data.
+
.SS extractor.*.cookies
.IP "Type:" 6
\f[I]Path\f[] or \f[I]object\f[]
@@ -378,6 +391,7 @@ If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] to a cookies.txt
file and it can be opened and parsed without errors,
update its contents with cookies received during data extraction.
+
.SS extractor.*.proxy
.IP "Type:" 6
\f[I]string\f[] or \f[I]object\f[]
@@ -411,6 +425,7 @@ Example:
Note: All proxy URLs should include a scheme,
otherwise \f[I]http://\f[] is assumed.
+
.SS extractor.*.user-agent
.IP "Type:" 6
\f[I]string\f[]
@@ -421,9 +436,9 @@ otherwise \f[I]http://\f[] is assumed.
.IP "Description:" 4
User-Agent header value to be used for HTTP requests.
-Note: This option has no effect on pixiv and
-readcomiconline extractors, as these need specific values to
-function correctly.
+Note: This option has no effect on pixiv extractors,
+as these need specific values to function correctly.
+
.SS extractor.*.keywords
.IP "Type:" 6
@@ -435,6 +450,7 @@ function correctly.
.IP "Description:" 4
Additional key-value pairs to be added to each metadata dictionary.
+
.SS extractor.*.keywords-default
.IP "Type:" 6
any
@@ -446,6 +462,7 @@ any
Default value used for missing or undefined keyword names in
format strings.
+
.SS extractor.*.category-transfer
.IP "Type:" 6
\f[I]bool\f[]
@@ -458,6 +475,7 @@ Transfer an extractor's (sub)category values to all child
extractors spawned by it, to let them inherit their parent's
config options.
+
.SS extractor.*.blacklist & .whitelist
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -473,6 +491,7 @@ e.g. from \f[I]reddit\f[] or \f[I]plurk\f[].
Note: Any \f[I]blacklist\f[] setting will automatically include
\f[I]"oauth"\f[], \f[I]"recursive"\f[], and \f[I]"test"\f[].
+
.SS extractor.*.archive
.IP "Type:" 6
\f[I]Path\f[]
@@ -486,7 +505,7 @@ Note: Any \f[I]blacklist\f[] setting will automatically include
.IP "Description:" 4
File to store IDs of downloaded files in. Downloads of files
already recorded in this archive file will be
-\f[I]skipped <extractor.*.skip_>\f[].
+\f[I]skipped\f[].
The resulting archive file is not a plain text file but an SQLite3
database, as either lookup operations are significantly faster or
@@ -497,6 +516,7 @@ Note: archive paths support regular \f[I]format string\f[] replacements,
but be aware that using external inputs for building local paths
may pose a security risk.
+
.SS extractor.*.archive-format
.IP "Type:" 6
\f[I]string\f[]
@@ -507,6 +527,7 @@ may pose a security risk.
.IP "Description:" 4
An alternative \f[I]format string\f[] to build archive IDs with.
+
.SS extractor.*.postprocessors
.IP "Type:" 6
\f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects
@@ -521,8 +542,9 @@ An alternative \f[I]format string\f[] to build archive IDs with.
.IP "Description:" 4
-A list of post-processors to be applied to each downloaded file
-in the same order as they are specified.
+A list of \f[I]post-processors\f[]
+to be applied to each downloaded file in the specified order.
+
.SS extractor.*.retries
.IP "Type:" 6
@@ -535,12 +557,13 @@ in the same order as they are specified.
Maximum number of times a failed HTTP request is retried before
giving up or \f[I]-1\f[] for infinite retries.
+
.SS extractor.*.timeout
.IP "Type:" 6
-\f[I]float\f[] or \f[I]null\f[]
+\f[I]float\f[]
.IP "Default:" 9
-\f[I]30\f[]
+\f[I]30.0\f[]
.IP "Description:" 4
Amount of time (in seconds) to wait for a successful connection
@@ -549,6 +572,7 @@ and response from a remote server.
This value gets internally used as the \f[I]timeout\f[] parameter for the
\f[I]requests.request()\f[] method.
+
.SS extractor.*.verify
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -565,6 +589,7 @@ instead of the default certificates.
This value gets internally used as the \f[I]verify\f[] parameter for the
\f[I]requests.request()\f[] method.
+
.SS extractor.*.download
.IP "Type:" 6
\f[I]bool\f[]
@@ -576,66 +601,69 @@ This value gets internally used as the \f[I]verify\f[] parameter for the
Controls whether to download media files.
Setting this to \f[I]false\f[] won't download any files, but all other
-functions (postprocessors_, \f[I]download archive\f[], etc.)
+functions (\f[I]postprocessors\f[], \f[I]download archive\f[], etc.)
will be executed as normal.
+
.SS extractor.*.image-range
.IP "Type:" 6
\f[I]string\f[]
.IP "Example:" 4
-"10-20",
.br
-"-5, 10, 30-50, 100-"
+* "10-20"
.br
+* "-5, 10, 30-50, 100-"
.IP "Description:" 4
Index-range(s) specifying which images to download.
Note: The index of the first image is \f[I]1\f[].
+
.SS extractor.*.chapter-range
.IP "Type:" 6
\f[I]string\f[]
.IP "Description:" 4
-Like \f[I]image-range <extractor.*.image-range_>\f[],
+Like \f[I]image-range\f[],
but applies to delegated URLs like manga-chapters, etc.
+
.SS extractor.*.image-filter
.IP "Type:" 6
\f[I]string\f[]
.IP "Example:" 4
-"width >= 1200 and width/height > 1.2",
.br
-"re.search(r'foo(bar)+', description)"
+* "width >= 1200 and width/height > 1.2"
.br
+* "re.search(r'foo(bar)+', description)"
.IP "Description:" 4
-Python expression controlling which images to download.
-.br
-Files for which the expression evaluates to \f[I]False\f[]
+Python expression controlling which files to download.
+
+Files for which the expression evaluates to \f[I]False\f[] are ignored.
.br
-are ignored.
-Available keys are the filename-specific ones listed
+Available keys are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[].
.br
-by \f[I]-K\f[] or \f[I]-j\f[].
+
.SS extractor.*.chapter-filter
.IP "Type:" 6
\f[I]string\f[]
.IP "Example:" 4
-"lang == 'en'"
.br
-"language == 'French' and 10 <= chapter < 20"
+* "lang == 'en'"
.br
+* "language == 'French' and 10 <= chapter < 20"
.IP "Description:" 4
-Like \f[I]image-filter <extractor.*.image-filter_>\f[],
+Like \f[I]image-filter\f[],
but applies to delegated URLs like manga-chapters, etc.
+
.SS extractor.*.image-unique
.IP "Type:" 6
\f[I]bool\f[]
@@ -647,6 +675,7 @@ but applies to delegated URLs like manga-chapters, etc.
Ignore image URLs that have been encountered before during the
current extractor run.
+
.SS extractor.*.chapter-unique
.IP "Type:" 6
\f[I]bool\f[]
@@ -655,9 +684,10 @@ current extractor run.
\f[I]false\f[]
.IP "Description:" 4
-Like \f[I]image-unique <extractor.*.image-unique_>\f[],
+Like \f[I]image-unique\f[],
but applies to delegated URLs like manga-chapters, etc.
+
.SS extractor.*.date-format
.IP "Type:" 6
\f[I]string\f[]
@@ -671,6 +701,7 @@ date-min and date-max.
See \f[I]strptime\f[] for a list of formatting directives.
+
.SH EXTRACTOR-SPECIFIC OPTIONS
.SS extractor.artstation.external
.IP "Type:" 6
@@ -682,6 +713,7 @@ See \f[I]strptime\f[] for a list of formatting directives.
.IP "Description:" 4
Try to follow external URLs of embedded players.
+
.SS extractor.aryion.recursive
.IP "Type:" 6
\f[I]bool\f[]
@@ -698,6 +730,7 @@ descend into subfolders
.br
* \f[I]false\f[]: Get posts from "Latest Updates" pages
+
.SS extractor.blogger.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -708,6 +741,7 @@ descend into subfolders
.IP "Description:" 4
Download embedded videos hosted on https://www.blogger.com/
+
.SS extractor.danbooru.ugoira
.IP "Type:" 6
\f[I]bool\f[]
@@ -723,6 +757,7 @@ Controls the download target for Ugoira posts.
.br
* \f[I]false\f[]: Converted video files
+
.SS extractor.deviantart.extra
.IP "Type:" 6
\f[I]bool\f[]
@@ -736,6 +771,7 @@ description texts and journals.
Note: Enabling this option also enables deviantart.metadata_.
+
.SS extractor.deviantart.flat
.IP "Type:" 6
\f[I]bool\f[]
@@ -755,6 +791,10 @@ favorites-collections and transfer any further work to other
extractors (\f[I]folder\f[] or \f[I]collection\f[]), which will then
create individual subdirectories for each of them.
+Note: Going through all gallery folders will not be able to
+fetch deviations which aren't in any folder.
+
+
.SS extractor.deviantart.folders
.IP "Type:" 6
\f[I]bool\f[]
@@ -769,6 +809,7 @@ folders a deviation is present in.
Note: Gathering this information requires a lot of API calls.
Use with caution.
+
.SS extractor.deviantart.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -788,6 +829,7 @@ Possible values are
You can use \f[I]"all"\f[] instead of listing all values separately.
+
.SS extractor.deviantart.journals
.IP "Type:" 6
\f[I]string\f[]
@@ -805,6 +847,7 @@ Selects the output format of journal entries.
.br
* \f[I]"none"\f[]: Don't download journals.
+
.SS extractor.deviantart.mature
.IP "Type:" 6
\f[I]bool\f[]
@@ -819,6 +862,7 @@ This option simply sets the \f[I]mature_content\f[] parameter for API
calls to either \f[I]"true"\f[] or \f[I]"false"\f[] and does not do any other
form of content filtering.
+
.SS extractor.deviantart.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -827,9 +871,9 @@ form of content filtering.
\f[I]false\f[]
.IP "Description:" 4
-Request extended metadata for deviation objects to additionally
-provide \f[I]description\f[], \f[I]tags\f[], \f[I]license\f[] and \f[I]is_watching\f[]
-fields.
+Request extended metadata for deviation objects to additionally provide
+\f[I]description\f[], \f[I]tags\f[], \f[I]license\f[] and \f[I]is_watching\f[] fields.
+
.SS extractor.deviantart.original
.IP "Type:" 6
@@ -845,6 +889,7 @@ Setting this option to \f[I]"images"\f[] only downloads original
files if they are images and falls back to preview versions for
everything else (archives, etc.).
+
.SS extractor.deviantart.quality
.IP "Type:" 6
\f[I]integer\f[]
@@ -856,6 +901,7 @@ everything else (archives, etc.).
JPEG quality level of newer images for which
an original file download is not available.
+
.SS extractor.deviantart.refresh-token
.IP "Type:" 6
\f[I]string\f[]
@@ -865,14 +911,15 @@ an original file download is not available.
.IP "Description:" 4
The \f[I]refresh-token\f[] value you get from
-\f[I]linking your DeviantArt account to gallery-dl <OAuth_>\f[].
+\f[I]linking your DeviantArt account to gallery-dl\f[].
Using a \f[I]refresh-token\f[] allows you to access private or otherwise
not publicly available deviations.
Note: The \f[I]refresh-token\f[] becomes invalid
-\f[I]after 3 months <https://www.deviantart.com/developers/authentication#refresh>\f[]
-or whenever your \f[I]cache file <cache.file_>\f[] is deleted or cleared.
+\f[I]after 3 months\f[]
+or whenever your \f[I]cache file\f[] is deleted or cleared.
+
.SS extractor.deviantart.wait-min
.IP "Type:" 6
@@ -884,6 +931,7 @@ or whenever your \f[I]cache file <cache.file_>\f[] is deleted or cleared.
.IP "Description:" 4
Minimum wait time in seconds before API requests.
+
.SS extractor.exhentai.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -900,6 +948,7 @@ depending on the input URL
.br
* \f[I]"exhentai.org"\f[]: Use \f[I]exhentai.org\f[] for all URLs
+
.SS extractor.exhentai.limits
.IP "Type:" 6
\f[I]bool\f[] or \f[I]integer\f[]
@@ -914,6 +963,7 @@ and stop extraction when they are exceeded.
If this value is an \f[I]integer\f[], it gets used as the limit maximum
instead of the value listed on \f[I]https://e-hentai.org/home.php\f[]
+
.SS extractor.exhentai.original
.IP "Type:" 6
\f[I]bool\f[]
@@ -924,6 +974,7 @@ instead of the value listed on \f[I]https://e-hentai.org/home.php\f[]
.IP "Description:" 4
Download full-sized original images if available.
+
.SS extractor.exhentai.wait-min & .wait-max
.IP "Type:" 6
\f[I]float\f[]
@@ -939,6 +990,7 @@ ExHentai detects and blocks automated downloaders.
seconds between \f[I]wait-min\f[] and \f[I]wait-max\f[] after
each image to prevent getting blocked.
+
.SS extractor.flickr.access-token & .access-token-secret
.IP "Type:" 6
\f[I]string\f[]
@@ -948,7 +1000,8 @@ each image to prevent getting blocked.
.IP "Description:" 4
The \f[I]access_token\f[] and \f[I]access_token_secret\f[] values you get
-from \f[I]linking your Flickr account to gallery-dl <OAuth_>\f[].
+from \f[I]linking your Flickr account to gallery-dl\f[].
+
.SS extractor.flickr.videos
.IP "Type:" 6
@@ -960,6 +1013,7 @@ from \f[I]linking your Flickr account to gallery-dl <OAuth_>\f[].
.IP "Description:" 4
Extract and download videos.
+
.SS extractor.flickr.size-max
.IP "Type:" 6
\f[I]integer\f[] or \f[I]string\f[]
@@ -978,6 +1032,7 @@ Sets the maximum allowed size for downloaded images.
(\f[I]"Original"\f[], \f[I]"Large"\f[], ... or \f[I]"o"\f[], \f[I]"k"\f[], \f[I]"h"\f[],
\f[I]"l"\f[], ...) to use as an upper limit.
+
.SS extractor.furaffinity.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -997,6 +1052,7 @@ Possible values are
You can use \f[I]"all"\f[] instead of listing all values separately.
+
.SS extractor.gelbooru.api
.IP "Type:" 6
\f[I]bool\f[]
@@ -1010,6 +1066,7 @@ Enable use of Gelbooru's API.
Set this value to false if the API has been disabled to switch
to manual information extraction.
+
.SS extractor.gfycat.format
.IP "Type:" 6
\f[I]string\f[]
@@ -1025,6 +1082,27 @@ If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[]
and \f[I]"gif"\f[] (in that order) will be tried instead, until an
available format is found.
+
+.SS extractor.hentaifoundry.include
+.IP "Type:" 6
+\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"pictures"\f[]
+
+.IP "Example:" 4
+"scraps,stories" or ["scraps", "stories"]
+
+.IP "Description:" 4
+A (comma-separated) list of subcategories to include
+when processing a user profile.
+
+Possible values are
+\f[I]"pictures"\f[], \f[I]"scraps"\f[], \f[I]"stories"\f[], \f[I]"favorite"\f[].
+
+You can use \f[I]"all"\f[] instead of listing all values separately.
+
+
.SS extractor.hitomi.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -1034,8 +1112,8 @@ available format is found.
.IP "Description:" 4
Try to extract
-\f[I]artist\f[], \f[I]group\f[], \f[I]parody\f[], and \f[I]characters\f[]
-metadata.
+\f[I]artist\f[], \f[I]group\f[], \f[I]parody\f[], and \f[I]characters\f[] metadata.
+
.SS extractor.imgur.mp4
.IP "Type:" 6
@@ -1055,6 +1133,7 @@ Controls whether to choose the GIF or MP4 version of an animation.
.br
* \f[I]"always"\f[]: Always choose MP4.
+
.SS extractor.inkbunny.orderby
.IP "Type:" 6
\f[I]string\f[]
@@ -1065,9 +1144,10 @@ Controls whether to choose the GIF or MP4 version of an animation.
.IP "Description:" 4
Value of the \f[I]orderby\f[] parameter for submission searches.
-(See \f[I]API#Search <https://wiki.inkbunny.net/wiki/API#Search>\f[]
+(See \f[I]API#Search\f[]
for details)
+
.SS extractor.instagram.highlights
.IP "Type:" 6
\f[I]bool\f[]
@@ -1079,6 +1159,7 @@ for details)
Include *Story Highlights* when downloading a user profile.
(requires authentication)
+
.SS extractor.instagram.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -1089,6 +1170,7 @@ Include *Story Highlights* when downloading a user profile.
.IP "Description:" 4
Download video files.
+
.SS extractor.khinsider.format
.IP "Type:" 6
\f[I]string\f[]
@@ -1105,20 +1187,6 @@ or a (comma-separated) list to select multiple formats.
If the selected format is not available,
the first in the list gets chosen (usually mp3).
-.SS extractor.kissmanga.captcha
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "Default:" 9
-\f[I]"stop"\f[]
-
-.IP "Description:" 4
-Controls how to handle redirects to CAPTCHA pages.
-
-.br
-* \f[I]"stop\f[]: Stop the current extractor run.
-.br
-* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait.
.SS extractor.newgrounds.include
.IP "Type:" 6
@@ -1139,6 +1207,24 @@ Possible values are
You can use \f[I]"all"\f[] instead of listing all values separately.
+
+.SS extractor.nijie.include
+.IP "Type:" 6
+\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"illustration,doujin"\f[]
+
+.IP "Description:" 4
+A (comma-separated) list of subcategories to include
+when processing a user profile.
+
+Possible values are
+\f[I]"illustration"\f[], \f[I]"doujin"\f[], \f[I]"favorite"\f[].
+
+You can use \f[I]"all"\f[] instead of listing all values separately.
+
+
.SS extractor.oauth.browser
.IP "Type:" 6
\f[I]bool\f[]
@@ -1147,14 +1233,15 @@ You can use \f[I]"all"\f[] instead of listing all values separately.
\f[I]true\f[]
.IP "Description:" 4
-Controls how a user is directed to an OAuth authorization site.
+Controls how a user is directed to an OAuth authorization page.
.br
* \f[I]true\f[]: Use Python's \f[I]webbrowser.open()\f[] method to automatically
-open the URL in the user's browser.
+open the URL in the user's default browser.
.br
* \f[I]false\f[]: Ask the user to copy & paste an URL from the terminal.
+
.SS extractor.oauth.cache
.IP "Type:" 6
\f[I]bool\f[]
@@ -1164,7 +1251,8 @@ open the URL in the user's browser.
.IP "Description:" 4
Store tokens received during OAuth authorizations
-in \f[I]cache <cache.file_>\f[].
+in \f[I]cache\f[].
+
.SS extractor.oauth.port
.IP "Type:" 6
@@ -1181,6 +1269,7 @@ of the port specified here. You'll have to manually adjust the
port number in your browser's address bar when using a different
port than the default.
+
.SS extractor.photobucket.subalbums
.IP "Type:" 6
\f[I]bool\f[]
@@ -1191,6 +1280,7 @@ port than the default.
.IP "Description:" 4
Download subalbums.
+
.SS extractor.pinterest.sections
.IP "Type:" 6
\f[I]bool\f[]
@@ -1201,6 +1291,7 @@ Download subalbums.
.IP "Description:" 4
Include pins from board sections.
+
.SS extractor.pixiv.user.avatar
.IP "Type:" 6
\f[I]bool\f[]
@@ -1211,6 +1302,7 @@ Include pins from board sections.
.IP "Description:" 4
Download user avatars.
+
.SS extractor.pixiv.ugoira
.IP "Type:" 6
\f[I]bool\f[]
@@ -1224,9 +1316,10 @@ Download Pixiv's Ugoira animations or ignore them.
These animations come as a \f[I].zip\f[] file containing all
animation frames in JPEG format.
-Use an \f[I]ugoira\f[] post processor to convert them
+Use an ugoira post processor to convert them
to watchable videos. (Example__)
+
.SS extractor.plurk.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -1237,6 +1330,7 @@ to watchable videos. (Example__)
.IP "Description:" 4
Also search Plurk comments for URLs.
+
.SS extractor.reactor.wait-min & .wait-max
.IP "Type:" 6
\f[I]float\f[]
@@ -1248,6 +1342,7 @@ Also search Plurk comments for URLs.
Minimum and maximum wait time in seconds between HTTP requests
during the extraction process.
+
.SS extractor.readcomiconline.captcha
.IP "Type:" 6
\f[I]string\f[]
@@ -1263,6 +1358,7 @@ Controls how to handle redirects to CAPTCHA pages.
.br
* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait.
+
.SS extractor.reddit.comments
.IP "Type:" 6
\f[I]integer\f[]
@@ -1282,6 +1378,7 @@ appear to be 200 and 500 respectively.
The value \f[I]0\f[] ignores all comments and significantly reduces the
time required when scanning a subreddit.
+
.SS extractor.reddit.morecomments
.IP "Type:" 6
\f[I]bool\f[]
@@ -1295,6 +1392,7 @@ stubs in the base comment tree.
This requires 1 additional API call for every 100 extra comments.
+
.SS extractor.reddit.date-min & .date-max
.IP "Type:" 6
\f[I]Date\f[]
@@ -1305,6 +1403,7 @@ This requires 1 additional API call for every 100 extra comments.
.IP "Description:" 4
Ignore all submissions posted before/after this date.
+
.SS extractor.reddit.id-min & .id-max
.IP "Type:" 6
\f[I]string\f[]
@@ -1313,8 +1412,8 @@ Ignore all submissions posted before/after this date.
"6kmzv2"
.IP "Description:" 4
-Ignore all submissions posted before/after the submission with
-this ID.
+Ignore all submissions posted before/after the submission with this ID.
+
.SS extractor.reddit.recursion
.IP "Type:" 6
@@ -1329,12 +1428,12 @@ linked to in the initial set of submissions.
This value sets the maximum recursion depth.
Special values:
-
.br
* \f[I]0\f[]: Recursion is disabled
.br
* \f[I]-1\f[]: Infinite recursion (don't do this)
+
.SS extractor.reddit.refresh-token
.IP "Type:" 6
\f[I]string\f[]
@@ -1344,7 +1443,7 @@ Special values:
.IP "Description:" 4
The \f[I]refresh-token\f[] value you get from
-\f[I]linking your Reddit account to gallery-dl <OAuth_>\f[].
+\f[I]linking your Reddit account to gallery-dl\f[].
Using a \f[I]refresh-token\f[] allows you to access private or otherwise
not publicly available subreddits, given that your account is
@@ -1352,6 +1451,7 @@ authorized to do so,
but requests to the reddit API are going to be rate limited
at 600 requests every 10 minutes/600 seconds.
+
.SS extractor.reddit.videos
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -1371,6 +1471,7 @@ video extraction and download
.br
* \f[I]false\f[]: Ignore videos
+
.SS extractor.redgifs.format
.IP "Type:" 6
\f[I]string\f[]
@@ -1387,6 +1488,7 @@ If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[]
and \f[I]"gif"\f[] (in that order) will be tried instead, until an
available format is found.
+
.SS extractor.sankaku.wait-min & .wait-max
.IP "Type:" 6
\f[I]float\f[]
@@ -1401,6 +1503,7 @@ Sankaku Channel responds with \f[I]429 Too Many Requests\f[] if it
receives too many HTTP requests in a certain amount of time.
Waiting a few seconds between each request tries to prevent that.
+
.SS extractor.smugmug.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -1411,6 +1514,7 @@ Waiting a few seconds between each request tries to prevent that.
.IP "Description:" 4
Download video files.
+
.SS extractor.tumblr.avatar
.IP "Type:" 6
\f[I]bool\f[]
@@ -1421,6 +1525,7 @@ Download video files.
.IP "Description:" 4
Download blog avatars.
+
.SS extractor.tumblr.date-min & .date-max
.IP "Type:" 6
\f[I]Date\f[]
@@ -1431,6 +1536,7 @@ Download blog avatars.
.IP "Description:" 4
Ignore all posts published before/after this date.
+
.SS extractor.tumblr.external
.IP "Type:" 6
\f[I]bool\f[]
@@ -1442,6 +1548,7 @@ Ignore all posts published before/after this date.
Follow external URLs (e.g. from "Link" posts) and try to extract
images from them.
+
.SS extractor.tumblr.inline
.IP "Type:" 6
\f[I]bool\f[]
@@ -1452,6 +1559,7 @@ images from them.
.IP "Description:" 4
Search posts for inline images and videos.
+
.SS extractor.tumblr.reblogs
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -1468,6 +1576,7 @@ Search posts for inline images and videos.
* \f[I]"same-blog"\f[]: Skip reblogged posts unless the original post
is from the same blog
+
.SS extractor.tumblr.posts
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -1486,6 +1595,7 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[]
You can use \f[I]"all"\f[] instead of listing all types separately.
+
.SS extractor.twitter.quoted
.IP "Type:" 6
\f[I]bool\f[]
@@ -1496,6 +1606,7 @@ You can use \f[I]"all"\f[] instead of listing all types separately.
.IP "Description:" 4
Fetch media from quoted Tweets.
+
.SS extractor.twitter.replies
.IP "Type:" 6
\f[I]bool\f[]
@@ -1506,6 +1617,7 @@ Fetch media from quoted Tweets.
.IP "Description:" 4
Fetch media from replies to other Tweets.
+
.SS extractor.twitter.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -1516,6 +1628,10 @@ Fetch media from replies to other Tweets.
.IP "Description:" 4
Fetch media from Retweets.
+If this value is \f[I]"original"\f[], metadata for these files
+will be taken from the original Tweets, not the Retweets.
+
+
.SS extractor.twitter.twitpic
.IP "Type:" 6
\f[I]bool\f[]
@@ -1524,7 +1640,8 @@ Fetch media from Retweets.
\f[I]false\f[]
.IP "Description:" 4
-Extract \f[I]TwitPic <https://twitpic.com/>\f[] embeds.
+Extract \f[I]TwitPic\f[] embeds.
+
.SS extractor.twitter.videos
.IP "Type:" 6
@@ -1543,6 +1660,7 @@ Control video download behavior.
.br
* \f[I]false\f[]: Skip video Tweets
+
.SS extractor.vsco.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -1553,6 +1671,7 @@ Control video download behavior.
.IP "Description:" 4
Download video files.
+
.SS extractor.wallhaven.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -1561,11 +1680,12 @@ Download video files.
\f[I]null\f[]
.IP "Description:" 4
-Your \f[I]API Key <https://wallhaven.cc/settings/account>\f[] to use
+Your \f[I]API Key\f[] to use
your account's browsing settings and default filters when searching.
See https://wallhaven.cc/help/api for more information.
+
.SS extractor.weibo.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -1576,6 +1696,7 @@ See https://wallhaven.cc/help/api for more information.
.IP "Description:" 4
Extract media from retweeted posts.
+
.SS extractor.weibo.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -1586,6 +1707,7 @@ Extract media from retweeted posts.
.IP "Description:" 4
Download video files.
+
.SS extractor.[booru].tags
.IP "Type:" 6
\f[I]bool\f[]
@@ -1599,6 +1721,7 @@ and provide them as \f[I]tags_<type>\f[] metadata fields.
Note: This requires 1 additional HTTP request for each post.
+
.SS extractor.[manga-extractor].chapter-reverse
.IP "Type:" 6
\f[I]bool\f[]
@@ -1614,6 +1737,7 @@ Reverse the order of chapter URLs extracted from manga pages.
.br
* \f[I]false\f[]: Start with the first chapter
+
.SH DOWNLOADER OPTIONS
.SS downloader.*.enabled
.IP "Type:" 6
@@ -1625,6 +1749,7 @@ Reverse the order of chapter URLs extracted from manga pages.
.IP "Description:" 4
Enable/Disable this downloader module.
+
.SS downloader.*.filesize-min & .filesize-max
.IP "Type:" 6
\f[I]string\f[]
@@ -1643,6 +1768,7 @@ Possible values are valid integer or floating-point numbers
optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[].
These suffixes are case-insensitive.
+
.SS downloader.*.mtime
.IP "Type:" 6
\f[I]bool\f[]
@@ -1654,6 +1780,7 @@ These suffixes are case-insensitive.
Use \f[I]Last-Modified\f[] HTTP response headers
to set file modification times.
+
.SS downloader.*.part
.IP "Type:" 6
\f[I]bool\f[]
@@ -1672,6 +1799,7 @@ resuming incomplete downloads.
* \f[I]false\f[]: Do not use \f[I].part\f[] files and write data directly
into the actual output files.
+
.SS downloader.*.part-directory
.IP "Type:" 6
\f[I]Path\f[]
@@ -1686,6 +1814,7 @@ Missing directories will be created as needed.
If this value is \f[I]null\f[], \f[I].part\f[] files are going to be stored
alongside the actual output files.
+
.SS downloader.*.rate
.IP "Type:" 6
\f[I]string\f[]
@@ -1703,6 +1832,7 @@ Possible values are valid integer or floating-point numbers
optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[].
These suffixes are case-insensitive.
+
.SS downloader.*.retries
.IP "Type:" 6
\f[I]integer\f[]
@@ -1711,9 +1841,10 @@ These suffixes are case-insensitive.
\f[I]extractor.*.retries\f[]
.IP "Description:" 4
-Maximum number of retries during file downloads
+Maximum number of retries during file downloads,
or \f[I]-1\f[] for infinite retries.
+
.SS downloader.*.timeout
.IP "Type:" 6
\f[I]float\f[] or \f[I]null\f[]
@@ -1724,6 +1855,7 @@ or \f[I]-1\f[] for infinite retries.
.IP "Description:" 4
Connection timeout during file downloads.
+
.SS downloader.*.verify
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -1734,6 +1866,7 @@ Connection timeout during file downloads.
.IP "Description:" 4
Certificate validation during file downloads.
+
.SS downloader.http.adjust-extensions
.IP "Type:" 6
\f[I]bool\f[]
@@ -1745,6 +1878,7 @@ Certificate validation during file downloads.
Check the file headers of \f[I]jpg\f[], \f[I]png\f[], and \f[I]gif\f[] files
and adjust their filename extensions if they do not match.
+
.SS downloader.ytdl.format
.IP "Type:" 6
\f[I]string\f[]
@@ -1757,6 +1891,7 @@ Video \f[I]format selection
<https://github.com/ytdl-org/youtube-dl#format-selection>\f[]
directly passed to youtube-dl.
+
.SS downloader.ytdl.forward-cookies
.IP "Type:" 6
\f[I]bool\f[]
@@ -1767,6 +1902,7 @@ directly passed to youtube-dl.
.IP "Description:" 4
Forward cookies to youtube-dl.
+
.SS downloader.ytdl.logging
.IP "Type:" 6
\f[I]bool\f[]
@@ -1776,13 +1912,12 @@ Forward cookies to youtube-dl.
.IP "Description:" 4
Route youtube-dl's output through gallery-dl's logging system.
-.br
Otherwise youtube-dl will write its output directly to stdout/stderr.
-.br
Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in
\f[I]downloader.ytdl.raw-options\f[] to \f[I]true\f[] to suppress all output.
+
.SS downloader.ytdl.outtmpl
.IP "Type:" 6
\f[I]string\f[]
@@ -1791,7 +1926,7 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in
\f[I]null\f[]
.IP "Description:" 4
-The \f[I]Output Template <https://github.com/ytdl-org/youtube-dl#output-template>\f[]
+The \f[I]Output Template\f[]
used to generate filenames for files downloaded with youtube-dl.
Special values:
@@ -1805,6 +1940,7 @@ Note: An output template other than \f[I]null\f[] might
cause unexpected results in combination with other options
(e.g. \f[I]"skip": "enumerate"\f[])
+
.SS downloader.ytdl.raw-options
.IP "Type:" 6
\f[I]object\f[]
@@ -1821,11 +1957,11 @@ cause unexpected results in combination with other options
.IP "Description:" 4
Additional options passed directly to the \f[I]YoutubeDL\f[] constructor.
-.br
+
All available options can be found in \f[I]youtube-dl's docstrings
-.br
<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>\f[].
+
.SH OUTPUT OPTIONS
.SS output.mode
.IP "Type:" 6
@@ -1848,6 +1984,7 @@ Controls the output string format and status indicators.
.br
* \f[I]"auto"\f[]: Automatically choose the best suitable output mode
+
.SS output.shorten
.IP "Type:" 6
\f[I]bool\f[]
@@ -1859,6 +1996,7 @@ Controls the output string format and status indicators.
Controls whether the output strings should be shortened to fit
on one console line.
+
.SS output.progress
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -1880,6 +2018,7 @@ multiple URLs as arguments.
as a custom \f[I]format string\f[]. Possible replacement keys are
\f[I]current\f[], \f[I]total\f[] and \f[I]url\f[].
+
.SS output.log
.IP "Type:" 6
\f[I]string\f[] or \f[I]Logging Configuration\f[]
@@ -1893,6 +2032,7 @@ Configuration for standard logging output to stderr.
If this is a simple \f[I]string\f[], it specifies
the format string for logging messages.
+
.SS output.logfile
.IP "Type:" 6
\f[I]Path\f[] or \f[I]Logging Configuration\f[]
@@ -1903,6 +2043,7 @@ the format string for logging messages.
.IP "Description:" 4
File to write logging output to.
+
.SS output.unsupportedfile
.IP "Type:" 6
\f[I]Path\f[] or \f[I]Logging Configuration\f[]
@@ -1915,6 +2056,7 @@ File to write external URLs unsupported by *gallery-dl* to.
The default format string here is \f[I]"{message}"\f[].
+
.SS output.num-to-str
.IP "Type:" 6
\f[I]bool\f[]
@@ -1926,6 +2068,7 @@ The default format string here is \f[I]"{message}"\f[].
Convert numeric values (\f[I]integer\f[] or \f[I]float\f[]) to \f[I]string\f[]
before outputting them as JSON.
+
.SH POSTPROCESSOR OPTIONS
.SS classify.mapping
.IP "Type:" 6
@@ -1949,6 +2092,7 @@ be stored in them.
Files with an extension not listed will be ignored and stored
in their default location.
+
.SS compare.action
.IP "Type:" 6
\f[I]string\f[]
@@ -1963,7 +2107,8 @@ The action to take when files do not compare as equal.
* \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one
.br
* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new
-version like \f[I]skip = "enumerate" <extractor.*.skip_>\f[]
+version like \f[I]skip = "enumerate"\f[]
+
.SS compare.shallow
.IP "Type:" 6
@@ -1975,6 +2120,7 @@ version like \f[I]skip = "enumerate" <extractor.*.skip_>\f[]
.IP "Description:" 4
Only compare file sizes. Do not read and compare their content.
+
.SS exec.async
.IP "Type:" 6
\f[I]bool\f[]
@@ -1986,15 +2132,16 @@ Only compare file sizes. Do not read and compare their content.
Controls whether to wait for a subprocess to finish
or to let it run asynchronously.
+
.SS exec.command
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
.IP "Example:" 4
-"convert {} {}.png && rm {}",
.br
-["echo", "{user[account]}", "{id}"]
+* "convert {} {}.png && rm {}"
.br
+* ["echo", "{user[account]}", "{id}"]
.IP "Description:" 4
The command to run.
@@ -2012,6 +2159,7 @@ Each element of this list is treated as a \f[I]format string\f[] using
the files' metadata as well as \f[I]{_path}\f[], \f[I]{_directory}\f[],
and \f[I]{_filename}\f[].
+
.SS exec.final
.IP "Type:" 6
\f[I]bool\f[]
@@ -2024,6 +2172,7 @@ Controls whether to execute \f[I]exec.command\f[] for each
downloaded file or only once after all files
have been downloaded successfully.
+
.SS metadata.mode
.IP "Type:" 6
\f[I]string\f[]
@@ -2043,6 +2192,7 @@ Select how to write metadata.
* \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[]
to a file's metadata dictionary
+
.SS metadata.directory
.IP "Type:" 6
\f[I]string\f[]
@@ -2057,6 +2207,7 @@ to a file's metadata dictionary
Directory where metadata files are stored in relative to the
current target location for file downloads.
+
.SS metadata.extension
.IP "Type:" 6
\f[I]string\f[]
@@ -2068,15 +2219,16 @@ current target location for file downloads.
Filename extension for metadata files that will be appended to the
original file names.
+
.SS metadata.extension-format
.IP "Type:" 6
\f[I]string\f[]
.IP "Example:" 4
-"{extension}.json",
.br
-"json"
+* "{extension}.json"
.br
+* "json"
.IP "Description:" 4
Custom format string to build filename extensions for metadata
@@ -2084,6 +2236,7 @@ files with, which will replace the original filename extensions.
Note: \f[I]metadata.extension\f[] is ignored if this option is set.
+
.SS metadata.content-format
.IP "Type:" 6
\f[I]string\f[]
@@ -2096,6 +2249,7 @@ Custom format string to build the content of metadata files with.
Note: Only applies for \f[I]"mode": "custom"\f[].
+
.SS mtime.key
.IP "Type:" 6
\f[I]string\f[]
@@ -2109,6 +2263,7 @@ Name of the metadata field whose value should be used.
This value must either be a UNIX timestamp or a
\f[I]datetime\f[] object.
+
.SS ugoira.extension
.IP "Type:" 6
\f[I]string\f[]
@@ -2119,6 +2274,7 @@ This value must either be a UNIX timestamp or a
.IP "Description:" 4
Filename extension for the resulting video files.
+
.SS ugoira.ffmpeg-args
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -2132,6 +2288,7 @@ Filename extension for the resulting video files.
.IP "Description:" 4
Additional FFmpeg command-line arguments.
+
.SS ugoira.ffmpeg-location
.IP "Type:" 6
\f[I]Path\f[]
@@ -2142,6 +2299,7 @@ Additional FFmpeg command-line arguments.
.IP "Description:" 4
Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use.
+
.SS ugoira.ffmpeg-output
.IP "Type:" 6
\f[I]bool\f[]
@@ -2152,6 +2310,7 @@ Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use.
.IP "Description:" 4
Show FFmpeg output.
+
.SS ugoira.ffmpeg-twopass
.IP "Type:" 6
\f[I]bool\f[]
@@ -2162,6 +2321,7 @@ Show FFmpeg output.
.IP "Description:" 4
Enable Two-Pass encoding.
+
.SS ugoira.framerate
.IP "Type:" 6
\f[I]string\f[]
@@ -2180,6 +2340,7 @@ based on delays between frames.
.br
* \f[I]null\f[] or an empty \f[I]string\f[]: Don't set an explicit frame rate.
+
.SS ugoira.keep-files
.IP "Type:" 6
\f[I]bool\f[]
@@ -2190,6 +2351,7 @@ based on delays between frames.
.IP "Description:" 4
Keep ZIP archives after conversion.
+
.SS ugoira.libx264-prevent-odd
.IP "Type:" 6
\f[I]bool\f[]
@@ -2201,7 +2363,7 @@ Keep ZIP archives after conversion.
Prevent \f[I]"width/height not divisible by 2"\f[] errors
when using \f[I]libx264\f[] or \f[I]libx265\f[] encoders
by applying a simple cropping filter. See this \f[I]Stack Overflow
-thread <https://stackoverflow.com/questions/20847674>\f[]
+thread\f[]
for more information.
This option, when \f[I]libx264/5\f[] is used, automatically
@@ -2209,17 +2371,6 @@ adds \f[I]["-vf", "crop=iw-mod(iw\\\\,2):ih-mod(ih\\\\,2)"]\f[]
to the list of FFmpeg command-line arguments
to reduce an odd width/height by 1 pixel and make them even.
-.SS zip.compression
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "Default:" 9
-\f[I]"store"\f[]
-
-.IP "Description:" 4
-Compression method to use when writing the archive.
-
-Possible values are \f[I]"store"\f[], \f[I]"zip"\f[], \f[I]"bzip2"\f[], \f[I]"lzma"\f[].
.SS zip.extension
.IP "Type:" 6
@@ -2231,6 +2382,7 @@ Possible values are \f[I]"store"\f[], \f[I]"zip"\f[], \f[I]"bzip2"\f[], \f[I]"lz
.IP "Description:" 4
Filename extension for the created ZIP archive.
+
.SS zip.keep-files
.IP "Type:" 6
\f[I]bool\f[]
@@ -2241,6 +2393,7 @@ Filename extension for the created ZIP archive.
.IP "Description:" 4
Keep the actual files after writing them to a ZIP archive.
+
.SS zip.mode
.IP "Type:" 6
\f[I]string\f[]
@@ -2261,6 +2414,7 @@ This greatly reduces the chance a ZIP archive gets corrupted in
case the Python interpreter gets shut down unexpectedly
(power outage, SIGKILL) but is also a lot slower.
+
.SH MISCELLANEOUS OPTIONS
.SS cache.file
.IP "Type:" 6
@@ -2279,6 +2433,7 @@ cookies and API tokens across gallery-dl invocations.
Set this option to \f[I]null\f[] or an invalid path to disable
this cache.
+
.SS ciphers
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -2293,9 +2448,10 @@ this cache.
* \f[I]false\f[]: Leave the default cipher list as is
.br
* Any \f[I]string\f[]: Replace urllib3's default ciphers with these
-(See \f[I]SSLContext.set_ciphers() <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.set_ciphers>\f[]
+(See \f[I]SSLContext.set_ciphers()\f[]
for details)
+
.SS pyopenssl
.IP "Type:" 6
\f[I]bool\f[]
@@ -2304,18 +2460,19 @@ for details)
\f[I]false\f[]
.IP "Description:" 4
-Use \f[I]pyOpenSSL <https://www.pyopenssl.org/en/stable/>\f[]-backed
+Use \f[I]pyOpenSSL\f[]-backed
SSL-support.
+
.SH API TOKENS & IDS
.SS extractor.deviantart.client-id & .client-secret
.IP "Type:" 6
\f[I]string\f[]
-.IP "How To:" 4
+.IP "How To:" 4
.br
* login and visit DeviantArt's
-\f[I]Applications & Keys <https://www.deviantart.com/developers/apps>\f[]
+\f[I]Applications & Keys\f[]
section
.br
* click "Register Application"
@@ -2332,20 +2489,21 @@ Submission Policy, and Terms of Service.
application and put them in your configuration file
as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[]
.br
-* clear your \f[I]cache <cache.file_>\f[] (\f[I]--clear-cache\f[]) to delete
+* clear your \f[I]cache\f[] (\f[I]--clear-cache\f[]) to delete
the \f[I]access-token\f[] from the previous \f[I]client-id\f[]
.br
-* get a new \f[I]refresh-token <extractor.deviantart.refresh-token_>\f[]
+* get a new \f[I]refresh-token\f[]
if necessary
+
.SS extractor.flickr.api-key & .api-secret
.IP "Type:" 6
\f[I]string\f[]
-.IP "How To:" 4
+.IP "How To:" 4
.br
-* login and \f[I]Create an App <https://www.flickr.com/services/apps/create/apply/>\f[]
-in Flickr's \f[I]App Garden <https://www.flickr.com/services/>\f[]
+* login and \f[I]Create an App\f[]
+in Flickr's \f[I]App Garden\f[]
.br
* click "APPLY FOR A NON-COMMERCIAL KEY"
.br
@@ -2355,21 +2513,14 @@ and click "SUBMIT"
* copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration
file
-.SS extractor.pawoo.access-token
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "How To
-:" 4
-
.SS extractor.reddit.client-id & .user-agent
.IP "Type:" 6
\f[I]string\f[]
-.IP "How To:" 4
+.IP "How To:" 4
.br
-* login and visit the \f[I]apps <https://www.reddit.com/prefs/apps/>\f[]
+* login and visit the \f[I]apps\f[]
section of your account's preferences
.br
* click the "are you a developer? create an app..." button
@@ -2384,15 +2535,16 @@ section of your account's preferences
* use "\f[I]Python:<application name>:v1.0 (by /u/<username>)\f[]" as
user-agent and replace \f[I]<application name>\f[] and \f[I]<username>\f[]
accordingly (see Reddit's
-\f[I]API access rules <https://github.com/reddit/reddit/wiki/API>\f[])
+\f[I]API access rules\f[])
+
.SS extractor.smugmug.api-key & .api-secret
.IP "Type:" 6
\f[I]string\f[]
-.IP "How To:" 4
+.IP "How To:" 4
.br
-* login and \f[I]Apply for an API Key <https://api.smugmug.com/api/developer/apply>\f[]
+* login and \f[I]Apply for an API Key\f[]
.br
* use a random name and description,
set "Type" to "Application", "Platform" to "All",
@@ -2403,14 +2555,15 @@ and "Use" to "Non-Commercial"
* copy \f[I]API Key\f[] and \f[I]API Secret\f[]
and put them in your configuration file
+
.SS extractor.tumblr.api-key & .api-secret
.IP "Type:" 6
\f[I]string\f[]
-.IP "How To:" 4
+.IP "How To:" 4
.br
* login and visit Tumblr's
-\f[I]Applications <https://www.tumblr.com/oauth/apps>\f[] section
+\f[I]Applications\f[] section
.br
* click "Register application"
.br
@@ -2425,18 +2578,19 @@ callback URL"
* copy your \f[I]OAuth Consumer Key\f[] and \f[I]Secret Key\f[]
and put them in your configuration file
+
.SH CUSTOM TYPES
.SS Date
.IP "Type:" 6
\f[I]string\f[] or \f[I]integer\f[]
.IP "Example:" 4
-"2019-01-01T00:00:00",
.br
-"2019" with "%Y" as \f[I]date-format\f[],
+* "2019-01-01T00:00:00"
.br
-1546297200
+* "2019" with "%Y" as \f[I]date-format\f[]
.br
+* 1546297200
.IP "Description:" 4
A \f[I]Date\f[] value represents a specific point in time.
@@ -2446,26 +2600,27 @@ A \f[I]Date\f[] value represents a specific point in time.
.br
* If given as \f[I]integer\f[], it is interpreted as UTC timestamp.
+
.SS Path
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
.IP "Example:" 4
-"file.ext",
.br
-"~/path/to/file.ext",
+* "file.ext"
.br
-"$HOME/path/to/file.ext",
+* "~/path/to/file.ext"
.br
-["$HOME", "path", "to", "file.ext"]
+* "$HOME/path/to/file.ext"
.br
+* ["$HOME", "path", "to", "file.ext"]
.IP "Description:" 4
A \f[I]Path\f[] is a \f[I]string\f[] representing the location of a file
or directory.
-Simple \f[I]tilde expansion <https://docs.python.org/3/library/os.path.html#os.path.expanduser>\f[]
-and \f[I]environment variable expansion <https://docs.python.org/3/library/os.path.html#os.path.expandvars>\f[]
+Simple \f[I]tilde expansion\f[]
+and \f[I]environment variable expansion\f[]
is supported.
In Windows environments, backslashes (\f[I]"\\"\f[]) can, in addition to
@@ -2475,11 +2630,11 @@ they themselves have to be escaped.
The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as
\f[I]"C:\\\\path\\\\to\\\\file.ext"\f[] if you want to use backslashes.
+
.SS Logging Configuration
.IP "Type:" 6
\f[I]object\f[]
-
.IP "Example:" 4
.. code::
@@ -2490,6 +2645,8 @@ The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as
"encoding": "ascii"
}
+.. code::
+
{
"level": "debug",
"format": {
@@ -2511,19 +2668,20 @@ Extended logging output configuration.
or a dictionary with format strings for each loglevel.
In addition to the default
-\f[I]LogRecord attributes <https://docs.python.org/3/library/logging.html#logrecord-attributes>\f[],
+\f[I]LogRecord attributes\f[],
it is also possible to access the current
-\f[I]extractor <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/extractor/common.py#L24>\f[]
-and \f[I]job <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/job.py#L19>\f[]
-objects as well as their attributes
-(e.g. \f[I]"{extractor.url}"\f[])
+\f[I]extractor\f[],
+\f[I]job\f[],
+\f[I]path\f[],
+and keywords objects and their attributes, for example
+\f[I]"{extractor.url}"\f[], \f[I]"{path.filename}"\f[], \f[I]"{keywords.title}"\f[]
.br
* Default: \f[I]"[{name}][{levelname}] {message}"\f[]
.br
* format-date
.br
* Format string for \f[I]{asctime}\f[] fields in logging messages
-(see \f[I]strftime() directives <https://docs.python.org/3/library/time.html#time.strftime>\f[])
+(see \f[I]strftime() directives\f[])
.br
* Default: \f[I]"%Y-%m-%d %H:%M:%S"\f[]
.br
@@ -2542,7 +2700,7 @@ objects as well as their attributes
.br
* Mode in which the file is opened;
use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append
-(see \f[I]open() <https://docs.python.org/3/library/functions.html#open>\f[])
+(see \f[I]open()\f[])
.br
* Default: \f[I]"w"\f[]
.br
@@ -2555,14 +2713,18 @@ use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append
Note: path, mode and encoding are only applied when configuring
logging output to a file.
+
.SS Postprocessor Configuration
.IP "Type:" 6
\f[I]object\f[]
-
.IP "Example:" 4
.. code::
+{ "name": "mtime" }
+
+.. code::
+
{
"name": "zip",
"compression": "store",
@@ -2570,17 +2732,34 @@ logging output to a file.
"whitelist": ["mangadex", "exhentai", "nhentai"]
}
-
.IP "Description:" 4
-An object with the \f[I]name\f[] of a post-processor and its options.
-
-See \f[I]Postprocessor Options\f[] for a list of all available
-post-processors and their respective options.
-
-You can also set a \f[I]whitelist\f[] or \f[I]blacklist\f[] to
+An \f[I]object\f[] containing a \f[I]"name"\f[] attribute specifying the
+post-processor type, as well as any of its \f[I]options\f[].
+It is also possible set a \f[I]"whitelist"\f[] or \f[I]"blacklist"\f[] to
only enable or disable a post-processor for the specified
extractor categories.
+The available post-processor types are
+
+\f[I]classify\f[]
+Categorize files by filename extension
+\f[I]compare\f[]
+Compare versions of the same file and replace/enumerate them on mismatch
+.br
+(requires \f[I]downloader.*.part\f[] = \f[I]true\f[] and \f[I]extractor.*.skip\f[] = \f[I]false\f[])
+.br
+\f[I]exec\f[]
+Execute external commands
+\f[I]metadata\f[]
+Write metadata to separate files
+\f[I]mtime\f[]
+Set file modification time according to its metadata
+\f[I]ugoira\f[]
+Convert Pixiv Ugoira to WebM using \f[I]FFmpeg\f[]
+\f[I]zip\f[]
+Store files in a ZIP archive
+
+
.SH BUGS
https://github.com/mikf/gallery-dl/issues
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 2d7b0ff..ecb9f9b 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -88,10 +88,6 @@
"highlights": false,
"videos": true
},
- "kissmanga":
- {
- "captcha": "stop"
- },
"nijie":
{
"username": null,
@@ -115,10 +111,6 @@
"wait-min": 3.0,
"wait-max": 6.0
},
- "readcomiconline":
- {
- "captcha": "stop"
- },
"reddit":
{
"comments": 0,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index fc9f14b..3207269 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.15.0
+Version: 1.15.1
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -119,6 +119,14 @@ Description: ==========
$ choco install gallery-dl
+ Scoop
+ ----------
+
+ Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users.
+
+ .. code:: powershell
+
+ $ scoop install gallery-dl
Usage
=====
@@ -311,7 +319,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -324,6 +332,7 @@ Description: ==========
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
+ .. _Scoop: https://scoop.sh
.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg
:target: https://pypi.org/project/gallery-dl/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 648e273..c2e5cb4 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -12,7 +12,6 @@ docs/gallery-dl-example.conf
docs/gallery-dl.conf
gallery_dl/__init__.py
gallery_dl/__main__.py
-gallery_dl/aes.py
gallery_dl/cache.py
gallery_dl/cloudflare.py
gallery_dl/config.py
@@ -91,7 +90,6 @@ gallery_dl/extractor/issuu.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/khinsider.py
-gallery_dl/extractor/kissmanga.py
gallery_dl/extractor/komikcast.py
gallery_dl/extractor/konachan.py
gallery_dl/extractor/lineblog.py
@@ -156,6 +154,7 @@ gallery_dl/extractor/vanillarock.py
gallery_dl/extractor/vsco.py
gallery_dl/extractor/wallhaven.py
gallery_dl/extractor/warosu.py
+gallery_dl/extractor/weasyl.py
gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py
deleted file mode 100644
index a45f50e..0000000
--- a/gallery_dl/aes.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This is a stripped down version of youtube-dl's aes module.
-# All credit for this code goes to the authors of the youtube-dl project.
-# https://ytdl-org.github.io/youtube-dl/
-# https://github.com/ytdl-org/youtube-dl/
-
-import base64
-from math import ceil
-
-BLOCK_SIZE_BYTES = 16
-
-
-def aes_cbc_decrypt(data, key, iv):
- """
- Decrypt with aes in CBC mode
-
- @param {int[]} data cipher
- @param {int[]} key 16/24/32-Byte cipher key
- @param {int[]} iv 16-Byte IV
- @returns {int[]} decrypted data
- """
- expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- decrypted_data = []
- previous_cipher_block = iv
- for i in range(block_count):
- block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- block += [0] * (BLOCK_SIZE_BYTES - len(block))
-
- decrypted_block = aes_decrypt(block, expanded_key)
- decrypted_data += xor(decrypted_block, previous_cipher_block)
- previous_cipher_block = block
- decrypted_data = decrypted_data[:len(data)]
-
- return decrypted_data
-
-
-def aes_cbc_decrypt_text(data, key, iv):
- """
- Decrypt with aes in CBC mode
-
- @param {string} data base64 encoded cipher
- @param {int[]} key 16/24/32-Byte cipher key
- @param {int[]} iv 16-Byte IV
- @returns {string} decrypted data as utf8 encoded string
- """
- data = base64.standard_b64decode(bytes(data, "ascii"))
- charcodes = aes_cbc_decrypt(list(data), key, iv)
- last = charcodes[-1]
- if last <= 16:
- charcodes = charcodes[:-last]
- return bytes(charcodes).decode()
-
-
-def key_expansion(data):
- """
- Generate key schedule
-
- @param {int[]} data 16/24/32-Byte cipher key
- @returns {int[]} 176/208/240-Byte expanded key
- """
- data = data[:] # copy
- rcon_iteration = 1
- key_size_bytes = len(data)
- expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
-
- while len(data) < expanded_key_size_bytes:
- temp = data[-4:]
- temp = key_schedule_core(temp, rcon_iteration)
- rcon_iteration += 1
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- for _ in range(3):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- if key_size_bytes == 32:
- temp = data[-4:]
- temp = sub_bytes(temp)
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- if key_size_bytes == 32:
- rounds = 3
- elif key_size_bytes == 24:
- rounds = 2
- else:
- rounds = 0
- for _ in range(rounds):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
- data = data[:expanded_key_size_bytes]
-
- return data
-
-
-def aes_decrypt(data, expanded_key):
- """
- Decrypt one block with aes
-
- @param {int[]} data 16-Byte cipher
- @param {int[]} expanded_key 176/208/240-Byte expanded key
- @returns {int[]} 16-Byte state
- """
- rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
- for i in range(rounds, 0, -1):
- data = xor(
- data,
- expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- )
- if i != rounds:
- data = mix_columns_inv(data)
- data = shift_rows_inv(data)
- data = sub_bytes_inv(data)
- data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
-
- return data
-
-
-RCON = (
- 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
-)
-SBOX = (
- 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
- 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
- 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
- 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
- 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
- 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
- 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
- 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
- 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
- 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
- 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
- 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
- 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
- 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
- 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
- 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
- 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
- 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
- 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
- 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
- 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
- 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
- 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
- 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
- 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
- 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
- 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
- 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
- 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
- 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
- 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
- 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
-)
-SBOX_INV = (
- 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
- 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
- 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
- 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
- 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
- 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
- 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
- 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
- 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
- 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
- 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
- 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
- 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
- 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
- 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
- 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
- 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
- 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
- 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
- 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
- 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
- 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
- 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
- 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
- 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
- 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
- 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
- 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
- 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
- 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
- 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
- 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
-)
-MIX_COLUMN_MATRIX = (
- (0x2, 0x3, 0x1, 0x1),
- (0x1, 0x2, 0x3, 0x1),
- (0x1, 0x1, 0x2, 0x3),
- (0x3, 0x1, 0x1, 0x2),
-)
-MIX_COLUMN_MATRIX_INV = (
- (0xE, 0xB, 0xD, 0x9),
- (0x9, 0xE, 0xB, 0xD),
- (0xD, 0x9, 0xE, 0xB),
- (0xB, 0xD, 0x9, 0xE),
-)
-RIJNDAEL_EXP_TABLE = (
- 0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF,
- 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
- 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4,
- 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
- 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26,
- 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
- 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC,
- 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
- 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7,
- 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
- 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F,
- 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
- 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0,
- 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
- 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC,
- 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
- 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2,
- 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
- 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0,
- 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
- 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E,
- 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
- 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF,
- 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
- 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09,
- 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
- 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91,
- 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
- 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C,
- 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
- 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD,
- 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01,
-)
-RIJNDAEL_LOG_TABLE = (
- 0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6,
- 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
- 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef,
- 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
- 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a,
- 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
- 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24,
- 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
- 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94,
- 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
- 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62,
- 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
- 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42,
- 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
- 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca,
- 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
- 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74,
- 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
- 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5,
- 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
- 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec,
- 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
- 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86,
- 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
- 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc,
- 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
- 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47,
- 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
- 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89,
- 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
- 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18,
- 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07,
-)
-
-
-def sub_bytes(data):
- return [SBOX[x] for x in data]
-
-
-def sub_bytes_inv(data):
- return [SBOX_INV[x] for x in data]
-
-
-def rotate(data):
- return data[1:] + [data[0]]
-
-
-def key_schedule_core(data, rcon_iteration):
- data = rotate(data)
- data = sub_bytes(data)
- data[0] = data[0] ^ RCON[rcon_iteration]
- return data
-
-
-def xor(data1, data2):
- return [x ^ y for x, y in zip(data1, data2)]
-
-
-def rijndael_mul(a, b):
- if a == 0 or b == 0:
- return 0
- return RIJNDAEL_EXP_TABLE[
- (RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF
- ]
-
-
-def mix_column(data, matrix):
- data_mixed = []
- for row in range(4):
- mixed = 0
- for column in range(4):
- # xor is (+) and (-)
- mixed ^= rijndael_mul(data[column], matrix[row][column])
- data_mixed.append(mixed)
- return data_mixed
-
-
-def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
- data_mixed = []
- for i in range(4):
- column = data[i * 4: (i + 1) * 4]
- data_mixed += mix_column(column, matrix)
- return data_mixed
-
-
-def mix_columns_inv(data):
- return mix_columns(data, MIX_COLUMN_MATRIX_INV)
-
-
-def shift_rows_inv(data):
- data_shifted = []
- for column in range(4):
- for row in range(4):
- data_shifted.append(data[((column - row) & 0b11) * 4 + row])
- return data_shifted
-
-
-__all__ = ['key_expansion', 'aes_cbc_decrypt', 'aes_cbc_decrypt_text']
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 4dc4f0d..fd973c3 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -166,7 +166,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
}
gallery = self._request_graphql(
"GalleriesDetailQueryRendererQuery", variables,
- "1afc7dede86ff73456b4defbc5aeb593e330b990943d114cbef7da5be0d7ce2f",
+ "fd367cacf9bebcdc0620bd749dbd8fc9b0ccbeb54fc76b8b4b95e66a8c0cba49",
)["gallery"]
self._photos = gallery["photos"]
@@ -194,8 +194,8 @@ class _500pxGalleryExtractor(_500pxExtractor):
variables["cursor"] = photos["pageInfo"]["endCursor"]
photos = self._request_graphql(
"GalleriesDetailPaginationContainerQuery", variables,
- "3fcbc9ea1589f31c86fc43a0a02c2163"
- "cab070f9d376651f270de9f30f031539",
+ "457c66d976f56863c81795f03e98cb54"
+ "3c7c6cdae7abeab8fe9e8e8a67479fa9",
)["galleryByOwnerIdAndSlugOrToken"]["photos"]
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index fafb785..b248735 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -94,12 +94,12 @@ class _8musesAlbumExtractor(Extractor):
if albums:
for album in albums:
url = self.root + "/comics/album/" + album["permalink"]
- album = {
- "url" : url,
- "name" : album["name"],
- "private": album["isPrivate"],
+ yield Message.Queue, url, {
+ "url" : url,
+ "name" : album["name"],
+ "private" : album["isPrivate"],
+ "_extractor": _8musesAlbumExtractor,
}
- yield Message.Queue, url, album
if data["page"] >= data["pages"]:
return
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 53bc726..b8e39bc 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -59,7 +59,6 @@ modules = [
"kabeuchi",
"keenspot",
"khinsider",
- "kissmanga",
"komikcast",
"konachan",
"lineblog",
@@ -118,6 +117,7 @@ modules = [
"vsco",
"wallhaven",
"warosu",
+ "weasyl",
"webtoons",
"weibo",
"wikiart",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 1126615..be498bc 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -31,8 +31,14 @@ class BehanceExtractor(Extractor):
def _update(data):
# compress data to simple lists
if data["fields"] and isinstance(data["fields"][0], dict):
- data["fields"] = [field["name"] for field in data["fields"]]
- data["owners"] = [owner["display_name"] for owner in data["owners"]]
+ data["fields"] = [
+ field.get("name") or field.get("label")
+ for field in data["fields"]
+ ]
+ data["owners"] = [
+ owner.get("display_name") or owner.get("displayName")
+ for owner in data["owners"]
+ ]
tags = data.get("tags") or ()
if tags and isinstance(tags[0], dict):
@@ -101,7 +107,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
cookies = {
"_evidon_consent_cookie":
'{"consent_date":"2019-01-31T09:41:15.132Z"}',
- "bcp": "815b5eee-8bdf-4898-ac79-33c2bcc0ed19",
+ "bcp": "4c34489d-914c-46cd-b44c-dfd0e661136d",
"gk_suid": "66981391",
"gki": '{"feature_project_view":false,'
'"feature_discover_login_prompt":false,'
@@ -184,14 +190,267 @@ class BehanceCollectionExtractor(BehanceExtractor):
self.collection_id = match.group(1)
def galleries(self):
- url = "{}/collection/{}/a".format(self.root, self.collection_id)
- params = {"offset": 0}
- headers = {"X-Requested-With": "XMLHttpRequest"}
+ url = self.root + "/v3/graphql"
+ headers = {
+ "Origin" : self.root,
+ "Referer": self.root + "/collection/" + self.collection_id,
+ "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
+ "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ cookies = {
+ "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
+ "gk_suid": "66981391",
+ "ilo0" : "true",
+ }
+
+ query = """
+query GetMoodboardItemsAndRecommendations(
+ $id: Int!
+ $firstItem: Int!
+ $afterItem: String
+ $shouldGetRecommendations: Boolean!
+ $shouldGetItems: Boolean!
+ $shouldGetMoodboardFields: Boolean!
+ ) {
+ viewer @include(if: $shouldGetMoodboardFields) {
+ isOptedOutOfRecommendations
+ }
+ moodboard(id: $id) {
+ ...moodboardFields @include(if: $shouldGetMoodboardFields)
+
+ items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems)
+ {
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ nodes {
+ ...nodesFields
+ }
+ }
+
+ recommendedItems(first: 80) @include(if: $shouldGetRecommendations) {
+ nodes {
+ ...nodesFields
+ fetchSource
+ }
+ }
+ }
+ }
+
+ fragment moodboardFields on Moodboard {
+ id
+ label
+ privacy
+ followerCount
+ isFollowing
+ projectCount
+ url
+ isOwner
+ owners {
+ id
+ displayName
+ url
+ firstName
+ location
+ locationUrl
+ images {
+ size_50 {
+ url
+ }
+ size_100 {
+ url
+ }
+ size_115 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_138 {
+ url
+ }
+ size_276 {
+ url
+ }
+ }
+ }
+ }
+
+ fragment projectFields on Project {
+ id
+ isOwner
+ publishedOn
+ matureAccess
+ hasMatureContent
+ modifiedOn
+ name
+ url
+ isPrivate
+ slug
+ fields {
+ label
+ }
+ colors {
+ r
+ g
+ b
+ }
+ owners {
+ url
+ displayName
+ id
+ location
+ locationUrl
+ isProfileOwner
+ images {
+ size_50 {
+ url
+ }
+ size_100 {
+ url
+ }
+ size_115 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_138 {
+ url
+ }
+ size_276 {
+ url
+ }
+ }
+ }
+ covers {
+ size_original {
+ url
+ }
+ size_max_808 {
+ url
+ }
+ size_808 {
+ url
+ }
+ size_404 {
+ url
+ }
+ size_202 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_115 {
+ url
+ }
+ }
+ stats {
+ views {
+ all
+ }
+ appreciations {
+ all
+ }
+ comments {
+ all
+ }
+ }
+ }
+
+ fragment exifDataValueFields on exifDataValue {
+ id
+ label
+ value
+ searchValue
+ }
+
+ fragment nodesFields on MoodboardItem {
+ id
+ entityType
+ width
+ height
+ flexWidth
+ flexHeight
+ images {
+ size
+ url
+ }
+
+ entity {
+ ... on Project {
+ ...projectFields
+ }
+
+ ... on ImageModule {
+ project {
+ ...projectFields
+ }
+
+ exifData {
+ lens {
+ ...exifDataValueFields
+ }
+ software {
+ ...exifDataValueFields
+ }
+ makeAndModel {
+ ...exifDataValueFields
+ }
+ focalLength {
+ ...exifDataValueFields
+ }
+ iso {
+ ...exifDataValueFields
+ }
+ location {
+ ...exifDataValueFields
+ }
+ flash {
+ ...exifDataValueFields
+ }
+ exposureMode {
+ ...exifDataValueFields
+ }
+ shutterSpeed {
+ ...exifDataValueFields
+ }
+ aperture {
+ ...exifDataValueFields
+ }
+ }
+ }
+
+ ... on MediaCollectionComponent {
+ project {
+ ...projectFields
+ }
+ }
+ }
+ }
+"""
+ variables = {
+ "afterItem": "MAo=",
+ "firstItem": 40,
+ "id" : self.collection_id,
+ "shouldGetItems" : True,
+ "shouldGetMoodboardFields": False,
+ "shouldGetRecommendations": False,
+ }
+ data = {"query": query, "variables": variables}
while True:
- data = self.request(url, params=params, headers=headers).json()
- for item in data["items"]:
- yield item["project"]
- if len(data["items"]) < 40:
+ items = self.request(
+ url, method="POST", headers=headers,
+ cookies=cookies, json=data,
+ ).json()["data"]["moodboard"]["items"]
+
+ for node in items["nodes"]:
+ yield node["entity"]
+
+ if not items["pageInfo"]["hasNextPage"]:
return
- params["offset"] += len(data["items"])
+ variables["afterItem"] = items["pageInfo"]["endCursor"]
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index a0f4d1c..9cceaee 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -262,9 +262,11 @@ class DeviantartExtractor(Extractor):
return folder
raise exception.NotFoundError("folder")
- def _folder_urls(self, folders, category):
- url = "{}/{}/{}/0/".format(self.root, self.user, category)
- return [(url + folder["name"], folder) for folder in folders]
+ def _folder_urls(self, folders, category, extractor):
+ base = "{}/{}/{}/0/".format(self.root, self.user, category)
+ for folder in folders:
+ folder["_extractor"] = extractor
+ yield base + folder["name"], folder
def _update_content_default(self, deviation, content):
public = "premium_folder_data" not in deviation
@@ -450,7 +452,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
if self.flat and not self.group:
return self.api.gallery_all(self.user, self.offset)
folders = self.api.gallery_folders(self.user)
- return self._folder_urls(folders, "gallery")
+ return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
class DeviantartFolderExtractor(DeviantartExtractor):
@@ -589,7 +591,8 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
self.api.collections(self.user, folder["folderid"])
for folder in folders
)
- return self._folder_urls(folders, "favourites")
+ return self._folder_urls(
+ folders, "favourites", DeviantartCollectionExtractor)
class DeviantartCollectionExtractor(DeviantartExtractor):
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index cb4df11..06b5ba2 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -392,6 +392,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
def items(self):
self.login()
yield Message.Version, 1
+ data = {"_extractor": ExhentaiGalleryExtractor}
while True:
last = None
@@ -402,7 +403,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
if url == last:
continue
last = url
- yield Message.Queue, url, {}
+ yield Message.Queue, url, data
if 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page:
return
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
index a2d8c04..44863a9 100644
--- a/gallery_dl/extractor/fallenangels.py
+++ b/gallery_dl/extractor/fallenangels.py
@@ -66,9 +66,9 @@ class FallenangelsMangaExtractor(MangaExtractor):
category = "fallenangels"
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
test = (
- ("http://manga.fascans.com/manga/trinity-seven", {
- "url": "293057f264de6c438b979bd1c3de4719568db452",
- "keyword": "50e0374dba60734230e4284b5ffdadef5104ae62",
+ ("https://manga.fascans.com/manga/chronos-ruler", {
+ "url": "eea07dd50f5bc4903aa09e2cc3e45c7241c9a9c2",
+ "keyword": "c414249525d4c74ad83498b3c59a813557e59d7e",
}),
("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", {
"url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 0ab42db..bf925b6 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -173,8 +173,7 @@ EXTRACTORS = {
),
"test-manga":
("https://sensescans.com/reader/series/yotsubato/", {
- "url": "305e6eb6160e3bb90c3de39ff5fb7c971e052087",
- "keyword": "562fb5a7362a4cb43d59d5c8a6ea8080fc65cf99",
+ "count": ">= 3",
}),
},
"_ckey": "chapterclass",
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 950a174..2a5ef6e 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -236,7 +236,9 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
test = (
("https://www.furaffinity.net/view/21835115/", {
- "url": "d80254eb4fba654597b4df8320d55916e11ba375",
+ "pattern": r"https://d\d*\.facdn\.net/(download/)?art/mirlinthloth"
+ r"/music/1488278723/1480267446.mirlinthloth_dj_fennmink"
+ r"_-_bude_s_4_ever\.mp3",
"keyword": {
"artist" : "mirlinthloth",
"artist_url" : "mirlinthloth",
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index ac1bca3..ba2fe5d 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -100,13 +100,13 @@ class GfycatImageExtractor(GfycatExtractor):
"gfyName": "GrayGenerousCowrie",
"gfyNumber": "755075459",
"title": "Bottom's up",
- "userName": "jackson3oh3",
+ "username": "jackson3oh3",
"createDate": 1495884169,
"md5": "a4796e05b0db9ba9ce5140145cd318aa",
"width": 400,
"height": 224,
- "frameRate": 23,
- "numFrames": 158,
+ "frameRate": 23.0,
+ "numFrames": 158.0,
"views": int,
},
}),
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py
index 1ab71d6..833135e 100644
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -50,17 +50,17 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
# single chapter
("https://hentai.cafe/hazuki-yuuto-summer-blues/", {
"url": "f8e24a07d6fbb7c6a6ec5ad8ad8faf2436f8751b",
- "keyword": "eb9f98544098c961bd8cf5dbe69e6da51c4fb2f6",
+ "keyword": "5af1c570bb5f533a32b3375f9cdaa17a0152ba67",
}),
# multi-chapter
("https://hentai.cafe/saitom-saitom-box/", {
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
- "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb",
+ "keyword": "3c28517d356cac6acbd9895c9eeefae505304078",
}),
# new-style URL
("https://hentai.cafe/hc.fyi/2782", {
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
- "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb",
+ "keyword": "3c28517d356cac6acbd9895c9eeefae505304078",
}),
# foolslide URL
("https://hentai.cafe/manga/series/saitom-box/", {
@@ -80,12 +80,14 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
chapters.reverse()
return chapters
- tags , pos = text.extract(page, "<p>Tags: ", "</br>")
+ url , pos = text.extract(page, '<link rel="canonical" href="', '"')
+ tags , pos = text.extract(page, "<p>Tags: ", "</br>", pos)
artist, pos = text.extract(page, "\nArtists: ", "</br>", pos)
manga , pos = text.extract(page, "/manga/read/", "/", pos)
data = {
- "tags" : text.split_html(tags)[::2],
- "artist": text.split_html(artist),
+ "manga_id": text.parse_int(url.rpartition("/")[2]),
+ "tags" : text.split_html(tags)[::2],
+ "artist" : text.split_html(artist),
}
HentaicafeChapterExtractor._data(manga).update(data)
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 6e82091..5eb46b6 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -34,7 +34,7 @@ class HentaifoundryExtractor(Extractor):
yield Message.Directory, data
self.set_filters()
- for page_url in util.advance(self.get_image_pages(), self.start_post):
+ for page_url in util.advance(self._pagination(), self.start_post):
image = self.get_image_metadata(page_url)
image.update(data)
yield Message.Url, image["src"], image
@@ -50,13 +50,12 @@ class HentaifoundryExtractor(Extractor):
self.request(self.root + "/?enterAgree=1")
return {"user": self.user}
- def get_image_pages(self):
- """Yield urls of all relevant image pages"""
+ def _pagination(self, begin='thumbTitle"><a href="', end='"'):
num = self.start_page
while True:
page = self.request("{}/page/{}".format(self.page_url, num)).text
- yield from text.extract_iter(page, 'thumbTitle"><a href="', '"')
+ yield from text.extract_iter(page, begin, end)
if 'class="pager"' not in page or 'class="last hidden"' in page:
return
@@ -90,6 +89,33 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
+ def get_story_metadata(self, html):
+ """Collect url and metadata for a story"""
+ extr = text.extract_from(html)
+ data = {
+ "user" : self.user,
+ "title" : text.unescape(extr(
+ "<div class='titlebar'>", "</a>").rpartition(">")[2]),
+ "author" : text.unescape(extr('alt="', '"')),
+ "date" : text.parse_datetime(extr(
+ ">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
+ "status" : extr("class='indent'>", "<"),
+ }
+
+ for c in ("Chapters", "Words", "Comments", "Views", "Rating"):
+ data[c.lower()] = text.parse_int(extr(
+ ">" + c + ":</span>", "<").replace(",", ""))
+
+ data["description"] = text.unescape(extr(
+ "class='storyDescript'>", "<div"))
+ path = extr('href="', '"')
+ data["src"] = self.root + path
+ data["index"] = text.parse_int(path.rsplit("/", 2)[1])
+ data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
+ "class='ratings_box'", "</div>"), "title='", "'")]
+
+ return text.nameext_from_url(data["src"], data)
+
def set_filters(self):
"""Set site-internal filters to show all images"""
token = text.unquote(text.extract(
@@ -127,19 +153,41 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
"""Extractor for all images of a hentai-foundry-user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/(?:pictures/user/([^/]+)(?:/page/(\d+))?/?$"
- r"|user/([^/]+)/profile)")
+ r"/user/([^/]+)/profile")
+ test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match, match.group(1))
+
+ def items(self):
+ user = "/user/" + self.user
+ return self._dispatch_extractors((
+ (HentaifoundryPicturesExtractor ,
+ self.root + "/pictures" + user),
+ (HentaifoundryScrapsExtractor,
+ self.root + "/pictures" + user + "/scraps"),
+ (HentaifoundryStoriesExtractor,
+ self.root + "/stories" + user),
+ (HentaifoundryFavoriteExtractor,
+ self.root + user + "/faves/pictures"),
+ ), ("pictures",))
+
+
+class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
+ """Extractor for all pictures of a hentaifoundry user"""
+ subcategory = "pictures"
+ pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+ r"/pictures/user/([^/]+)(?:/page/(\d+))?/?$")
test = (
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
- ("https://www.hentai-foundry.com/user/Tenpura/profile"),
)
def __init__(self, match):
HentaifoundryExtractor.__init__(
- self, match, match.group(1) or match.group(3), match.group(2))
+ self, match, match.group(1), match.group(2))
self.page_url = "{}/pictures/user/{}".format(self.root, self.user)
def get_job_metadata(self):
@@ -284,3 +332,68 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
def skip(self, _):
return 0
+
+
+class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
+ """Extractor for stories of a hentai-foundry user"""
+ subcategory = "stories"
+ pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+ r"/stories/user/([^/]+)(?:/page/(\d+))?/?$")
+ test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
+ "count": ">= 35",
+ "keyword": {
+ "author" : "SnowWolf35",
+ "chapters" : int,
+ "comments" : int,
+ "date" : "type:datetime",
+ "description": str,
+ "index" : int,
+ "rating" : int,
+ "ratings" : list,
+ "status" : "re:(Inc|C)omplete",
+ "title" : str,
+ "user" : "SnowWolf35",
+ "views" : int,
+ "words" : int,
+ },
+ })
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match, match.group(1))
+ self.page_url = "{}/stories/user/{}".format(self.root, self.user)
+
+ def items(self):
+ self.get_job_metadata()
+ self.set_filters()
+ stories = self._pagination('<div class="storyRow">', '</tr></table>')
+ for story_html in util.advance(stories, self.start_post):
+ story = self.get_story_metadata(story_html)
+ yield Message.Directory, story
+ yield Message.Url, story["src"], story
+
+
+class HentaifoundryStoryExtractor(HentaifoundryExtractor):
+ """Extractor for a hentaifoundry story"""
+ subcategory = "story"
+ pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+ r"/stories/user/([^/]+)/(\d+)")
+ test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
+ "/26416/Overwatch-High-Chapter-Voting-Location"), {
+ "url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
+ "keyword": {"title": "Overwatch High Chapter Voting Location"},
+ })
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match, match.group(1))
+ self.index = match.group(2)
+
+ def items(self):
+ story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
+ self.root, self.user, self.index)
+ page = self.request(story_url).text
+ story = self.get_story_metadata(page)
+ yield Message.Directory, story
+ yield Message.Url, story["src"], story
+
+ def skip(self, _):
+ return 0
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
deleted file mode 100644
index 348453d..0000000
--- a/gallery_dl/extractor/kissmanga.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2020 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract manga-chapters and entire manga from https://kissmanga.com/"""
-
-from .common import ChapterExtractor, MangaExtractor, Extractor
-from .. import text, aes, exception
-from ..cache import cache
-import hashlib
-import ast
-import re
-
-
-class RedirectMixin():
- """Detect and handle redirects to CAPTCHA pages"""
-
- def request(self, url, **kwargs):
- while True:
- response = Extractor.request(self, url, **kwargs)
- if not response.history or "/AreYouHuman" not in response.url:
- return response
- if self.config("captcha", "stop") == "wait":
- self.log.warning(
- "Redirect to \n%s\nVisit this URL in your browser, solve "
- "the CAPTCHA, and press ENTER to continue", response.url)
- try:
- input()
- except (EOFError, OSError):
- pass
- else:
- raise exception.StopExtraction(
- "Redirect to \n%s\nVisit this URL in your browser and "
- "solve the CAPTCHA to continue", response.url)
-
-
-class KissmangaBase(RedirectMixin):
- """Base class for kissmanga extractors"""
- category = "kissmanga"
- archive_fmt = "{chapter_id}_{page}"
- root = "https://kissmanga.com"
-
- @staticmethod
- def parse_chapter_string(data):
- """Parse 'chapter_string' value contained in 'data'"""
- data["chapter_string"] = text.unescape(data["chapter_string"])
-
- match = re.match((
- r"(?:[Vv]ol\.0*(\d+) )?"
- r"(?:[Cc]h\.)?0*(\d+)"
- r"(?:[.:]0*(\d+))?"
- r"(?: *[:-]? *(.+))?"
- ), data["chapter_string"])
-
- if not match:
- match = re.match((
- r".+?(?: -)? ()"
- r"0*(\d+)(?:[Vv.]0*(\d+))?"
- r"(?: *[:-]? *(.+))?"
- ), data["chapter_string"])
-
- if match:
- volume, chapter, minor, title = match.groups()
- else:
- volume, chapter, minor, title = 0, 0, "", data["chapter_string"]
-
- data["volume"] = text.parse_int(volume)
- data["chapter"] = text.parse_int(chapter)
- data["chapter_minor"] = "." + minor if minor else ""
- data["title"] = title if title and title != "Read Online" else ""
- return data
-
-
-class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
- """Extractor for manga-chapters from kissmanga.com"""
- pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
- r"(/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+))")
- test = (
- ("https://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
- "url": "46e63fd63e9e16f19bc1e6c7a45dc060815642fd",
- "keyword": "1cd0b5214ac7ae4d53e2fd8fec40ceec84cd09bf",
- }),
- ("https://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
- "url": "c26be8bf9c2abacee2076979d021634092cf38f1",
- "keyword": "e1d16780df8e04076ed2b5f0637c5b710ec2f2ea",
- }),
- ("https://kissmanga.com/Manga/Monster/Monster-79?id=7608", {
- "count": 23,
- "keyword": "f433a7a8fae840e17dace316a243fa27faab86de",
- }),
- ("https://kissmanga.com/Manga/Houseki-no-Kuni/Oneshot?id=404189", {
- "count": 49,
- "keyword": "cea131c9fe9c71309b3270cd86718d4d1198c31c",
- }),
- ("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608"),
- )
-
- def __init__(self, match):
- ChapterExtractor.__init__(self, match)
- self.chapter_id = match.group(2)
- self.session.headers["Referer"] = self.root
-
- def metadata(self, page):
- title = text.extract(page, "<title>", "</title>")[0].strip()
- manga, cinfo = title.split("\n")[1:3]
- data = {
- "manga": manga.strip(),
- "chapter_string": cinfo.strip(),
- "chapter_id": text.parse_int(self.chapter_id),
- "lang": "en",
- "language": "English",
- }
- return self.parse_chapter_string(data)
-
- def images(self, page):
- self.session.headers["Referer"] = None
- try:
- key = self.build_aes_key(page)
- iv = (0xa5, 0xe8, 0xe2, 0xe9, 0xc2, 0x72, 0x1b, 0xe0,
- 0xa8, 0x4a, 0xd6, 0x60, 0xc4, 0x72, 0xc1, 0xf3)
- return [
- (aes.aes_cbc_decrypt_text(
- data, key, iv).partition("&")[0], None)
- for data in text.extract_iter(
- page, 'push(wrapKA("', '"'
- )
- ]
- except UnicodeDecodeError:
- self.log.error("Failed to decrypt image URLs")
- except (ValueError, IndexError):
- self.log.error("Failed to get AES key")
- return []
-
- def build_aes_key(self, page):
- chko = self._chko_from_external_script()
-
- for script in self._scripts(page):
- for stmt in [s.strip() for s in script.split(";")]:
-
- if stmt.startswith("var _"):
- name, _, value = stmt[4:].partition(" = ")
- name += "[0]"
- value = ast.literal_eval(value)[0]
-
- elif stmt.startswith("chko = "):
- stmt = stmt[7:]
- if stmt == name:
- chko = value
- elif stmt == "chko + " + name:
- chko = chko + value
- elif stmt == name + " + chko":
- chko = value + chko
- else:
- self.log.warning("unrecognized expression: '%s'", stmt)
-
- elif stmt.startswith("key = "):
- pass
-
- else:
- self.log.warning("unrecognized statement: '%s'", stmt)
-
- return list(hashlib.sha256(chko.encode("ascii")).digest())
-
- @staticmethod
- def _scripts(page):
- end = 0
- while True:
- pos = page.find("key = ", end)
- if pos == -1:
- return
- beg = page.rindex('<script type="text/javascript">', 0, pos) + 31
- end = page.index('</script>', pos)
- yield page[beg:end]
-
- @cache(maxage=3600)
- def _chko_from_external_script(self):
- script = self.request(self.root + "/Scripts/lo.js").text
-
- pos = script.index("var chko")
- var = text.extract(script, "=", "[", pos)[0].lstrip()
- idx = text.extract(script, "[", "]", pos)[0]
-
- pos = script.index(var)
- lst = text.extract(script, "=", ";", pos)[0]
- return ast.literal_eval(lst.strip())[int(idx)]
-
-
-class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
- """Extractor for manga from kissmanga.com"""
- chapterclass = KissmangaChapterExtractor
- pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
- r"(/Manga/[^/?&#]+/?)$")
- test = (
- ("https://kissmanga.com/Manga/Dropout", {
- "url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532",
- "keyword": "32b09711c28b481845acc32e3bb6054cfc90224d",
- }),
- ("https://kissmanga.com/manga/feng-shen-ji"), # lowercase
- )
-
- def chapters(self, page):
- results = []
- manga, pos = text.extract(page, ' class="barTitle">', '\ninformation')
- page , pos = text.extract(page, ' class="listing">', '</table>', pos)
- manga = manga.strip()
- needle = '" title="Read ' + manga + ' '
- manga = text.unescape(manga)
-
- for item in text.extract_iter(page, '<a href="', ' online">'):
- url, _, chapter = item.partition(needle)
- data = {
- "manga": manga, "chapter_string": chapter,
- "chapter_id": text.parse_int(url.rpartition("=")[2]),
- "lang": "en", "language": "English",
- }
- self.parse_chapter_string(data)
- results.append((self.root + url, data))
- return results
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index 0e04f97..5743498 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -167,6 +167,8 @@ class MangoxoChannelExtractor(MangoxoExtractor):
self.login()
num = total = 1
url = "{}/channel/{}/album/".format(self.root, self.channel_id)
+ data = {"_extractor": MangoxoAlbumExtractor}
+
yield Message.Version, 1
while True:
@@ -174,7 +176,7 @@ class MangoxoChannelExtractor(MangoxoExtractor):
for album in text.extract_iter(
page, '<a class="link black" href="', '"'):
- yield Message.Queue, album, {}
+ yield Message.Queue, album, data
if num == 1:
total = self._total_pages(page)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 19a2b92..f9dc886 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -19,8 +19,8 @@ class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors"""
category = "newgrounds"
directory_fmt = ("{category}", "{artist[:10]:J, }")
- filename_fmt = "{category}_{index}_{title}.{extension}"
- archive_fmt = "{index}"
+ filename_fmt = "{category}_{_index}_{title}.{extension}"
+ archive_fmt = "{_index}"
root = "https://www.newgrounds.com"
cookiedomain = ".newgrounds.com"
cookienames = ("NG_GG_username", "vmk1du5I8m")
@@ -44,6 +44,13 @@ class NewgroundsExtractor(Extractor):
if url:
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
+
+ for num, url in enumerate(text.extract_iter(
+ post["_comment"], 'data-smartload-src="', '"'), 1):
+ post["num"] = num
+ post["_index"] = "{}_{:>02}".format(post["index"], num)
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
else:
self.log.warning(
"Unable to get download URL for '%s'", post_url)
@@ -97,8 +104,9 @@ class NewgroundsExtractor(Extractor):
else:
data = self._extract_media_data(extr, post_url)
- data["comment"] = text.unescape(text.remove_html(extr(
- 'id="author_comments">', '</div>'), "", ""))
+ data["_comment"] = extr('id="author_comments"', '</div>')
+ data["comment"] = text.unescape(text.remove_html(
+ data["_comment"].partition(">")[2], "", ""))
data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<'))
@@ -125,33 +133,54 @@ class NewgroundsExtractor(Extractor):
"width" : text.parse_int(full('width="', '"')),
"height" : text.parse_int(full('height="', '"')),
}
- data["index"] = text.parse_int(
- data["url"].rpartition("/")[2].partition("_")[0])
+ index = data["url"].rpartition("/")[2].partition("_")[0]
+ data["index"] = text.parse_int(index)
+ data["_index"] = index
return data
@staticmethod
def _extract_audio_data(extr, url):
+ index = url.split("/")[5]
return {
"title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')),
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"),
- "index" : text.parse_int(url.split("/")[5]),
+ "index" : text.parse_int(index),
+ "_index" : index,
"rating" : "",
}
- @staticmethod
- def _extract_media_data(extr, url):
+ def _extract_media_data(self, extr, url):
+ index = url.split("/")[5]
+ title = extr('"og:title" content="', '"')
+ src = extr('{"url":"', '"')
+
+ if src:
+ src = src.replace("\\/", "/")
+ date = text.parse_datetime(extr(
+ 'itemprop="datePublished" content="', '"'))
+ else:
+ url = self.root + "/portal/video/" + index
+ headers = {
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Referer": self.root,
+ }
+ data = self.request(url, headers=headers).json()
+ src = data["sources"]["360p"][0]["src"].replace(".360p.", ".")
+ date = text.parse_timestamp(src.rpartition("?")[2])
+
return {
- "title" : text.unescape(extr('"og:title" content="', '"')),
- "url" : extr('{"url":"', '"').replace("\\/", "/"),
- "date" : text.parse_datetime(extr(
- 'itemprop="datePublished" content="', '"')),
+ "title" : text.unescape(title),
+ "url" : src,
+ "date" : date,
"description": text.unescape(extr(
'itemprop="description" content="', '"')),
"rating" : extr('class="rated-', '"'),
- "index" : text.parse_int(url.split("/")[5]),
+ "index" : text.parse_int(index),
+ "_index" : index,
}
def _pagination(self, kind):
@@ -215,6 +244,10 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", {
"url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
}),
+ ("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", {
+ "url": "84eec95e663041a80630df72719f231e157e5f5d",
+ "count": 2,
+ })
)
def __init__(self, match):
@@ -236,23 +269,21 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
r"(/(?:portal/view|audio/listen)/\d+)")
test = (
- ("https://www.newgrounds.com/portal/view/589549", {
- "url": "48d916d819c99139e6a3acbbf659a78a867d363e",
- "content": "ceb865426727ec887177d99e0d20bb021e8606ae",
+ ("https://www.newgrounds.com/portal/view/595355", {
+ "pattern": r"https://uploads\.ungrounded\.net/alternate/564000"
+ r"/564957_alternate_31\.mp4\?1359712249",
"keyword": {
- "artist" : ["psychogoldfish", "tomfulp"],
- "comment" : "re:People have been asking me how I like the ",
- "date" : "dt:2012-02-08 21:40:56",
- "description": "re:People have been asking how I like the ",
+ "artist" : ["kickinthehead", "danpaladin", "tomfulp"],
+ "comment" : "re:My fan trailer for Alien Hominid HD!",
+ "date" : "dt:2013-02-01 09:50:49",
"favorites" : int,
- "filename" : "527818_alternate_1896",
- "index" : 589549,
- "rating" : "t",
+ "filename" : "564957_alternate_31",
+ "index" : 595355,
+ "rating" : "e",
"score" : float,
- "tags" : ["newgrounds", "psychogoldfish",
- "rage", "redesign-2012"],
- "title" : "Redesign Rage",
- "user" : "psychogoldfish",
+ "tags" : ["alienhominid", "trailer"],
+ "title" : "Alien Hominid Fan Trailer",
+ "user" : "kickinthehead",
},
}),
("https://www.newgrounds.com/audio/listen/609768", {
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index aae17a3..2394acf 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -127,9 +127,25 @@ class NijieExtractor(AsynchronousMixin, Extractor):
class NijieUserExtractor(NijieExtractor):
- """Extractor for works of a nijie-user"""
+ """Extractor for nijie user profiles"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/members(?:_illust)?\.php\?id=(\d+)"
+ cookiedomain = None
+ pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
+ test = ("https://nijie.info/members.php?id=44",)
+
+ def items(self):
+ base = "{}/{{}}.php?id={}".format(self.root, self.user_id)
+ return self._dispatch_extractors((
+ (NijieIllustrationExtractor, base.format("members_illust")),
+ (NijieDoujinExtractor , base.format("members_dojin")),
+ (NijieFavoriteExtractor , base.format("user_like_illust_view")),
+ ), ("illustration", "doujin"))
+
+
+class NijieIllustrationExtractor(NijieExtractor):
+ """Extractor for all illustrations of a nijie-user"""
+ subcategory = "illustration"
+ pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)"
test = (
("https://nijie.info/members_illust.php?id=44", {
"url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e",
@@ -152,7 +168,6 @@ class NijieUserExtractor(NijieExtractor):
("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError,
}),
- ("https://nijie.info/members.php?id=44"),
)
def image_ids(self):
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index abf88cd..5e7e387 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -106,7 +106,7 @@ class NozomiPostExtractor(NozomiExtractor):
# multiple images per post
("https://nozomi.la/post/25588032.html", {
"url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
- "keyword": "0aa99cbaaeada2984a1fbf912274409c6ba106d4",
+ "keyword": "8c3a2561ccc9ad429be9850d1383a952d0b4a8ab",
"count": 7,
}),
)
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index c07c4b7..6d7b27a 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -180,16 +180,11 @@ class OAuthBase(Extractor):
self.send(msg)
def _generate_message(self, names, values):
- if len(names) == 1:
- _vh = "This value has"
- _is = "is"
- _it = "it"
- _va = "this value"
- else:
- _vh = "These values have"
- _is = "are"
- _it = "them"
- _va = "these values"
+ _vh, _va, _is, _it = (
+ ("This value has", "this value", "is", "it")
+ if len(names) == 1 else
+ ("These values have", "these values", "are", "them")
+ )
msg = "\nYour {} {}\n\n{}\n\n".format(
" and ".join("'" + n + "'" for n in names),
@@ -197,23 +192,21 @@ class OAuthBase(Extractor):
"\n".join(values),
)
- if self.cache:
- opt = self.oauth_config(names[0])
- if opt is None or opt == "cache":
- msg += _vh + " been cached and will automatically be used."
- else:
- msg += (
- "Set 'extractor.{}.{}' to \"cache\" to use {}.".format(
- self.subcategory, names[0], _it,
- )
- )
+ opt = self.oauth_config(names[0])
+ if self.cache and (opt is None or opt == "cache"):
+ msg += _vh + " been cached and will automatically be used."
else:
msg += "Put " + _va + " into your configuration file as \n"
msg += " and\n".join(
"'extractor." + self.subcategory + "." + n + "'"
for n in names
)
- msg += "."
+ if self.cache:
+ msg += (
+ "\nor set\n'extractor.{}.{}' to \"cache\""
+ .format(self.subcategory, names[0])
+ )
+ msg += "\nto use {}.".format(_it)
return msg
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 8f2d633..f08055c 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -95,8 +95,8 @@ class PahealPostExtractor(PahealExtractor):
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)")
test = ("https://rule34.paheal.net/post/view/481609", {
- "url": "d3fd0f82762716fe3fb03c9c923e61c13ce22204",
- "keyword": "35748081bfeaab48f909f4b097a4d79b2be12538",
+ "url": "a91d579be030753282f55b8cb4eeaa89c45a9116",
+ "keyword": "44154bdac3d6cf289d0d9739a566acd8b7839e50",
"content": "7b924bcf150b352ac75c9d281d061e174c851a11",
})
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index eaf97fd..ee8f9bb 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -105,7 +105,7 @@ class PixivUserExtractor(PixivExtractor):
# avatar (#595, 623)
("https://www.pixiv.net/en/users/173530", {
"options": (("avatar", True),),
- "content": "22af450d4dbaf4973d370f164f66f48c7382a6de",
+ "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
"range": "1",
}),
# deleted account
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index bbbc709..6b36cdd 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -29,9 +29,9 @@ class PornhubGalleryExtractor(PornhubExtractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/album/(\d+)"
test = (
- ("https://www.pornhub.com/album/1708982", {
+ ("https://www.pornhub.com/album/17218841", {
"pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/",
- "count": 93,
+ "count": 81,
"keyword": {
"id": int,
"num": int,
@@ -40,11 +40,11 @@ class PornhubGalleryExtractor(PornhubExtractor):
"caption": str,
"user": "Unknown",
"gallery": {
- "id" : 1708982,
+ "id" : 17218841,
"score": int,
"views": int,
"tags" : list,
- "title": "Random Hentai",
+ "title": "Hentai/Ecchi 41",
},
},
}),
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 8290d2d..e5b4b44 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -16,7 +16,7 @@ import time
import json
-BASE_PATTERN = r"(?:https?://)?([^/.]+\.reactor\.cc)"
+BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
class ReactorExtractor(SharedConfigMixin, Extractor):
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index dda4809..7030c81 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -1,20 +1,19 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract comic-issues and entire comics from https://readcomiconline.to/"""
+"""Extractors for https://readcomiconline.to/"""
-from .common import ChapterExtractor, MangaExtractor
-from .kissmanga import RedirectMixin
-from .. import text
+from .common import Extractor, ChapterExtractor, MangaExtractor
+from .. import text, exception
import re
-class ReadcomiconlineBase(RedirectMixin):
+class ReadcomiconlineBase():
"""Base class for readcomiconline extractors"""
category = "readcomiconline"
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
@@ -22,6 +21,25 @@ class ReadcomiconlineBase(RedirectMixin):
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.to"
+ def request(self, url, **kwargs):
+ """Detect and handle redirects to CAPTCHA pages"""
+ while True:
+ response = Extractor.request(self, url, **kwargs)
+ if not response.history or "/AreYouHuman" not in response.url:
+ return response
+ if self.config("captcha", "stop") == "wait":
+ self.log.warning(
+ "Redirect to \n%s\nVisit this URL in your browser, solve "
+ "the CAPTCHA, and press ENTER to continue", response.url)
+ try:
+ input()
+ except (EOFError, OSError):
+ pass
+ else:
+ raise exception.StopExtraction(
+ "Redirect to \n%s\nVisit this URL in your browser and "
+ "solve the CAPTCHA to continue", response.url)
+
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b07d024..a9252f5 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -152,7 +152,7 @@ class SankakuTagExtractor(SankakuExtractor):
test = (
("https://chan.sankakucomplex.com/?tags=bonocho", {
"count": 5,
- "pattern": r"https://cs\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
+ "pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
# respect 'page' query parameter
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 236a001..c98a300 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -110,16 +110,17 @@ class TwitterExtractor(Extractor):
twitpics = []
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
- if "//twitpic.com/" in url:
+ if "//twitpic.com/" in url and "/photos/" not in url:
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
url = text.extract(
response.text, 'name="twitter:image" value="', '"')[0]
- twitpics.append({
- "original_info": {},
- "media_url" : url,
- })
+ if url:
+ twitpics.append({
+ "original_info": {},
+ "media_url" : url,
+ })
if twitpics:
if "extended_entities" in tweet:
tweet["extended_entities"]["media"].extend(twitpics)
@@ -312,6 +313,7 @@ class TwitterSearchExtractor(TwitterExtractor):
test = ("https://twitter.com/search?q=nature", {
"range": "1-40",
"count": 40,
+ "archive": False,
})
def metadata(self):
@@ -378,6 +380,15 @@ class TwitterTweetExtractor(TwitterExtractor):
"url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98",
"content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
}),
+ # original retweets (#1026)
+ ("https://twitter.com/jessica_3978/status/1296304589591810048", {
+ "options": (("retweets", "original"),),
+ "count": 2,
+ "keyword": {
+ "tweet_id": 1296296016002547713,
+ "date" : "dt:2020-08-20 04:00:28",
+ },
+ }),
)
def __init__(self, match):
@@ -451,7 +462,8 @@ class TwitterAPI():
endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
tweets = []
for tweet in self._pagination(endpoint):
- if tweet["id_str"] == tweet_id:
+ if tweet["id_str"] == tweet_id or \
+ tweet.get("_retweet_id_str") == tweet_id:
tweets.append(tweet)
if "quoted_status_id_str" in tweet:
tweet_id = tweet["quoted_status_id_str"]
@@ -536,6 +548,7 @@ class TwitterAPI():
entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
if params is None:
params = self.params.copy()
+ original_retweets = (self.extractor.retweets == "original")
while True:
cursor = tweet = None
@@ -558,12 +571,17 @@ class TwitterAPI():
"Skipping %s (deleted)",
entry["entryId"][len(entry_tweet):])
continue
- tweet["user"] = users[tweet["user_id_str"]]
if "retweeted_status_id_str" in tweet:
retweet = tweets.get(tweet["retweeted_status_id_str"])
- if retweet:
+ if original_retweets:
+ if not retweet:
+ continue
+ retweet["_retweet_id_str"] = tweet["id_str"]
+ tweet = retweet
+ elif retweet:
tweet["author"] = users[retweet["user_id_str"]]
+ tweet["user"] = users[tweet["user_id_str"]]
yield tweet
if "quoted_status_id_str" in tweet:
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
new file mode 100644
index 0000000..a39fbf1
--- /dev/null
+++ b/gallery_dl/extractor/weasyl.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.weasyl.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/"
+
+
+class WeasylExtractor(Extractor):
+ category = "weasyl"
+ directory_fmt = ("{category}", "{owner_login}")
+ filename_fmt = "{submitid} {title}.{extension}"
+ archive_fmt = "{submitid}"
+ root = "https://www.weasyl.com"
+
+ @staticmethod
+ def populate_submission(data):
+ # Some submissions don't have content and can be skipped
+ if "submission" in data["media"]:
+ data["url"] = data["media"]["submission"][0]["url"]
+ data["date"] = text.parse_datetime(
+ data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S")
+ text.nameext_from_url(data["url"], data)
+ return True
+ return False
+
+ def request_submission(self, submitid):
+ return self.request(
+ "{}/api/submissions/{}/view".format(self.root, submitid)).json()
+
+ def retrieve_journal(self, journalid):
+ data = self.request(
+ "{}/api/journals/{}/view".format(self.root, journalid)).json()
+ data["extension"] = "html"
+ data["html"] = "text:" + data["content"]
+ data["date"] = text.parse_datetime(data["posted_at"])
+ return data
+
+ def submissions(self, owner_login, folderid=None):
+ url = "{}/api/users/{}/gallery".format(self.root, owner_login)
+ params = {
+ "nextid" : None,
+ "folderid": folderid,
+ }
+
+ while True:
+ data = self.request(url, params=params).json()
+ for submission in data["submissions"]:
+ if self.populate_submission(submission):
+ submission["folderid"] = folderid
+ # Do any submissions have more than one url? If so
+ # a urllist of the submission array urls would work.
+ yield Message.Url, submission["url"], submission
+ if not data["nextid"]:
+ return
+ params["nextid"] = data["nextid"]
+
+
+class WeasylSubmissionExtractor(WeasylExtractor):
+ subcategory = "submission"
+ pattern = BASE_PATTERN + r"(?:~[\w-]+/submissions|submission)/(\d+)"
+ test = (
+ ("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", {
+ "pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29"
+ "40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5"
+ "1/fiz-a-wesley.png",
+ "keyword": {
+ "comments" : int,
+ "date" : "dt:2012-04-20 00:38:04",
+ "description" : "<p>(flex)</p>",
+ "favorites" : int,
+ "folder_name" : "Wesley Stuff",
+ "folderid" : 2081,
+ "friends_only": False,
+ "owner" : "Fiz",
+ "owner_login" : "fiz",
+ "rating" : "general",
+ "submitid" : 2031,
+ "subtype" : "visual",
+ "tags" : list,
+ "title" : "A Wesley!",
+ "type" : "submission",
+ "views" : int,
+ },
+ }),
+ ("https://www.weasyl.com/submission/2031/a-wesley"),
+ )
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.submitid = match.group(1)
+
+ def items(self):
+ data = self.request_submission(self.submitid)
+ if self.populate_submission(data):
+ yield Message.Directory, data
+ yield Message.Url, data["url"], data
+
+
+class WeasylSubmissionsExtractor(WeasylExtractor):
+ subcategory = "submissions"
+ pattern = BASE_PATTERN + r"(?:~|submissions/)([\w-]+)/?$"
+ test = (
+ ("https://www.weasyl.com/~tanidareal", {
+ "count": ">= 200"
+ }),
+ ("https://www.weasyl.com/submissions/tanidareal"),
+ )
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.owner_login = match.group(1)
+
+ def items(self):
+ yield Message.Version, 1
+ yield Message.Directory, {"owner_login": self.owner_login}
+ yield from self.submissions(self.owner_login)
+
+
+class WeasylFolderExtractor(WeasylExtractor):
+ subcategory = "folder"
+ directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
+ pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)"
+ test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", {
+ "count": ">= 12"
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.owner_login, self.folderid = match.groups()
+
+ def items(self):
+ yield Message.Version, 1
+ iter = self.submissions(self.owner_login, self.folderid)
+ # Folder names are only on single submission api calls
+ msg, url, data = next(iter)
+ details = self.request_submission(data["submitid"])
+ yield Message.Directory, details
+ yield msg, url, data
+ yield from iter
+
+
+class WeasylJournalExtractor(WeasylExtractor):
+ subcategory = "journal"
+ filename_fmt = "{journalid} {title}.{extension}"
+ archive_fmt = "{journalid}"
+ pattern = BASE_PATTERN + r"journal/(\d+)"
+ test = ("https://www.weasyl.com/journal/17647/bbcode", {
+ "keyword": {
+ "title" : "BBCode",
+ "date" : "dt:2013-09-19 23:11:23",
+ "content": "<p><a>javascript:alert(42);</a></p>"
+ "<p>No more of that!</p>",
+ },
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.journalid = match.group(1)
+
+ def items(self):
+ data = self.retrieve_journal(self.journalid)
+ yield Message.Version, 1
+ yield Message.Directory, data
+ yield Message.Url, data["html"], data
+
+
+class WeasylJournalsExtractor(WeasylExtractor):
+ subcategory = "journals"
+ filename_fmt = "{journalid} {title}.{extension}"
+ archive_fmt = "{journalid}"
+ pattern = BASE_PATTERN + r"journals/([\w-]+)"
+ test = ("https://www.weasyl.com/journals/charmander", {
+ "count": ">= 2",
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.owner_login = match.group(1)
+
+ def items(self):
+ yield Message.Version, 1
+ yield Message.Directory, {"owner_login": self.owner_login}
+
+ url = "{}/journals/{}".format(self.root, self.owner_login)
+ page = self.request(url).text
+ for journalid in text.extract_iter(page, 'href="/journal/', '/'):
+ data = self.retrieve_journal(journalid)
+ yield Message.Url, data["html"], data
+
+
+class WeasylFavoriteExtractor(WeasylExtractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "{owner_login}", "Favorites")
+ pattern = BASE_PATTERN + r"favorites\?userid=(\d+)&feature=submit"
+ test = ("https://www.weasyl.com/favorites?userid=184616&feature=submit", {
+ "count": ">= 5",
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.userid = match.group(1)
+
+ def items(self):
+ owner_login = lastid = None
+ url = self.root + "/favorites"
+ params = {
+ "userid" : self.userid,
+ "feature": "submit",
+ }
+
+ while True:
+ page = self.request(url, params=params).text
+ pos = page.index('id="favorites-content"')
+
+ if not owner_login:
+ owner_login = text.extract(page, '<a href="/~', '"')[0]
+ yield Message.Directory, {"owner_login": owner_login}
+
+ for submitid in text.extract_iter(page, "/submissions/", "/", pos):
+ if submitid == lastid:
+ continue
+ lastid = submitid
+ submission = self.request_submission(submitid)
+ if self.populate_submission(submission):
+ yield Message.Url, submission["url"], submission
+
+ if "&amp;nextid=" not in page:
+ return
+ params["nextid"] = submitid
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 0b1b2d9..a325f87 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -47,21 +47,31 @@ class WeiboExtractor(Extractor):
file["num"] = num
yield Message.Url, file["url"], file
+ def statuses(self):
+ """Returns an iterable containing all relevant 'status' objects"""
+
+ def _status_by_id(self, status_id):
+ url = "{}/detail/{}".format(self.root, status_id)
+ page = self.request(url, fatal=False).text
+ data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
+ return json.loads(data)["status"] if data else None
+
def _files_from_status(self, status):
- images = status.pop("pics", ())
page_info = status.pop("page_info", ())
-
- for image in images:
- pid = image["pid"]
- if "large" in image:
- image = image["large"]
- geo = image.get("geo") or {}
- yield text.nameext_from_url(image["url"], {
- "url" : image["url"],
- "pid" : pid,
- "width" : text.parse_int(geo.get("width")),
- "height": text.parse_int(geo.get("height")),
- })
+ if "pics" in status:
+ if len(status["pics"]) < status["pic_num"]:
+ status = self._status_by_id(status["id"]) or status
+ for image in status.pop("pics"):
+ pid = image["pid"]
+ if "large" in image:
+ image = image["large"]
+ geo = image.get("geo") or {}
+ yield text.nameext_from_url(image["url"], {
+ "url" : image["url"],
+ "pid" : pid,
+ "width" : text.parse_int(geo.get("width")),
+ "height": text.parse_int(geo.get("height")),
+ })
if self.videos and "media_info" in page_info:
info = page_info["media_info"]
@@ -79,9 +89,6 @@ class WeiboExtractor(Extractor):
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
yield data
- def statuses(self):
- """Returns an iterable containing all relevant 'status' objects"""
-
class WeiboUserExtractor(WeiboExtractor):
"""Extractor for all images of a user on weibo.cn"""
@@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor):
while True:
data = self.request(url, params=params).json()
+ cards = data["data"]["cards"]
- for card in data["data"]["cards"]:
+ if not cards:
+ return
+ for card in cards:
if "mblog" in card:
yield card["mblog"]
-
- if not data["data"]["cards"]:
- return
params["page"] += 1
@@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor):
self.status_id = match.group(1)
def statuses(self):
- url = "{}/detail/{}".format(self.root, self.status_id)
- page = self.request(url, notfound="status").text
- data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
- if not data:
+ status = self._status_by_id(self.status_id)
+ if not status:
raise exception.NotFoundError("status")
- return (json.loads(data)["status"],)
+ return (status,)
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 2548ead..b7d116a 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -59,13 +59,13 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
+ title = extr('"title":"', '"')
user = {
"id" : text.parse_int(extr('"id_user":', ',')),
"display": extr('"display":"', '"'),
"sex" : extr('"sex":"', '"'),
"name" : self.user,
}
- title = extr('"title":"', '"')
user["description"] = extr(
'<small class="mobile-hide">', '</small>').strip()
tags = extr('<em>Tagged:</em>', '<').strip()
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 7d08b86..b62240b 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -228,7 +228,7 @@ class DownloadJob(Job):
for pp in postprocessors:
pp.prepare(pathfmt)
- if archive and kwdict in archive:
+ if archive and archive.check(kwdict):
pathfmt.fix_extension()
self.handle_skip()
return
@@ -385,8 +385,23 @@ class DownloadJob(Job):
self.sleep = config("sleep")
if not config("download", True):
+ # monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
+ archive = config("archive")
+ if archive:
+ path = util.expand_path(archive)
+ try:
+ if "{" in path:
+ path = util.Formatter(path).format_map(kwdict)
+ self.archive = util.DownloadArchive(path, self.extractor)
+ except Exception as exc:
+ self.extractor.log.warning(
+ "Failed to open download archive at '%s' ('%s: %s')",
+ path, exc.__class__.__name__, exc)
+ else:
+ self.extractor.log.debug("Using download archive '%s'", path)
+
skip = config("skip", True)
if skip:
self._skipexc = None
@@ -401,21 +416,10 @@ class DownloadJob(Job):
self._skipcnt = 0
self._skipmax = text.parse_int(smax)
else:
+ # monkey-patch methods to always return False
pathfmt.exists = lambda x=None: False
-
- archive = config("archive")
- if archive:
- path = util.expand_path(archive)
- try:
- if "{" in path:
- path = util.Formatter(path).format_map(kwdict)
- self.archive = util.DownloadArchive(path, self.extractor)
- except Exception as exc:
- self.extractor.log.warning(
- "Failed to open download archive at '%s' ('%s: %s')",
- path, exc.__class__.__name__, exc)
- else:
- self.extractor.log.debug("Using download archive '%s'", path)
+ if self.archive:
+ self.archive.check = pathfmt.exists
postprocessors = self.extractor.config_accumulate("postprocessors")
if postprocessors:
@@ -449,7 +453,7 @@ class DownloadJob(Job):
def _build_blacklist(self):
wlist = self.extractor.config("whitelist")
- if wlist:
+ if wlist is not None:
if isinstance(wlist, str):
wlist = wlist.split(",")
blist = {e.category for e in extractor._list_classes()}
@@ -457,7 +461,7 @@ class DownloadJob(Job):
return blist
blist = self.extractor.config("blacklist")
- if blist:
+ if blist is not None:
if isinstance(blist, str):
blist = blist.split(",")
blist = set(blist)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index dbebfce..3e91405 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -941,7 +941,7 @@ class DownloadArchive():
"archive-format", extractor.archive_fmt)
).format_map
- def __contains__(self, kwdict):
+ def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
key = kwdict["_archive_key"] = self.keygen(kwdict)
self.cursor.execute(
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d7e2737..81976c2 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.15.0"
+__version__ = "1.15.1"
diff --git a/test/test_results.py b/test/test_results.py
index 1380f31..a594032 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -22,19 +22,16 @@ from gallery_dl import extractor, util, job, config, exception # noqa E402
# these don't work on Travis CI
TRAVIS_SKIP = {
- "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx",
+ "exhentai", "mangafox", "dynastyscans", "nijie", "instagram", "ngomik",
"archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs",
- "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex",
- "sankakucomplex", "warosu", "fuskator", "patreon", "komikcast",
- "instagram", "ngomik",
+ "sankaku", "idolcomplex", "mangahere", "mangadex", "sankakucomplex",
+ "warosu", "fuskator", "patreon", "komikcast",
}
# temporary issues, etc.
BROKEN = {
- "8kun",
- "dynastyscans",
- "fallenangels",
"imagevenue",
+ "ngomik",
"photobucket",
}