summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md31
-rw-r--r--PKG-INFO130
-rw-r--r--README.rst128
-rw-r--r--data/completion/_gallery-dl12
-rw-r--r--data/completion/gallery-dl.fish12
-rw-r--r--data/man/gallery-dl.118
-rw-r--r--data/man/gallery-dl.conf.5623
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--gallery_dl.egg-info/PKG-INFO130
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/cookies.py3
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/behance.py5
-rw-r--r--gallery_dl/extractor/bunkr.py6
-rw-r--r--gallery_dl/extractor/common.py1
-rw-r--r--gallery_dl/extractor/danbooru.py4
-rw-r--r--gallery_dl/extractor/deviantart.py57
-rw-r--r--gallery_dl/extractor/fanbox.py20
-rw-r--r--gallery_dl/extractor/fanleaks.py127
-rw-r--r--gallery_dl/extractor/gelbooru.py4
-rw-r--r--gallery_dl/extractor/imagefap.py191
-rw-r--r--gallery_dl/extractor/kemonoparty.py12
-rw-r--r--gallery_dl/extractor/lynxchan.py16
-rw-r--r--gallery_dl/extractor/myhentaigallery.py5
-rw-r--r--gallery_dl/extractor/nitter.py4
-rw-r--r--gallery_dl/extractor/pinterest.py96
-rw-r--r--gallery_dl/extractor/pixiv.py25
-rw-r--r--gallery_dl/extractor/poipiku.py22
-rw-r--r--gallery_dl/extractor/tcbscans.py106
-rw-r--r--gallery_dl/extractor/telegraph.py25
-rw-r--r--gallery_dl/extractor/twitter.py74
-rw-r--r--gallery_dl/extractor/vk.py9
-rw-r--r--gallery_dl/extractor/zerochan.py12
-rw-r--r--gallery_dl/formatter.py13
-rw-r--r--gallery_dl/option.py19
-rw-r--r--gallery_dl/util.py89
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py30
-rw-r--r--test/test_extractor.py18
-rw-r--r--test/test_util.py87
-rw-r--r--test/test_ytdl.py20
41 files changed, 1622 insertions, 570 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 700efb1..a62a8ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
# Changelog
+## 1.24.3 - 2023-01-10
+### Additions
+- [danbooru] extract `uploader` metadata ([#3457](https://github.com/mikf/gallery-dl/issues/3457))
+- [deviantart] initial implementation of username & password login for `scraps` ([#1029](https://github.com/mikf/gallery-dl/issues/1029))
+- [fanleaks] add `post` and `model` extractors ([#3468](https://github.com/mikf/gallery-dl/issues/3468), [#3474](https://github.com/mikf/gallery-dl/issues/3474))
+- [imagefap] add `folder` extractor ([#3504](https://github.com/mikf/gallery-dl/issues/3504))
+- [lynxchan] support `bbw-chan.nl` ([#3456](https://github.com/mikf/gallery-dl/issues/3456), [#3463](https://github.com/mikf/gallery-dl/issues/3463))
+- [pinterest] support `All Pins` boards ([#2855](https://github.com/mikf/gallery-dl/issues/2855), [#3484](https://github.com/mikf/gallery-dl/issues/3484))
+- [pinterest] add `domain` option ([#3484](https://github.com/mikf/gallery-dl/issues/3484))
+- [pixiv] implement `metadata-bookmark` option ([#3417](https://github.com/mikf/gallery-dl/issues/3417))
+- [tcbscans] add `chapter` and `manga` extractors ([#3189](https://github.com/mikf/gallery-dl/issues/3189))
+- [twitter] implement `syndication=extended` ([#3483](https://github.com/mikf/gallery-dl/issues/3483))
+- implement slice notation for `range` options ([#918](https://github.com/mikf/gallery-dl/issues/918), [#2865](https://github.com/mikf/gallery-dl/issues/2865))
+- allow `filter` options to be a list of expressions
+### Fixes
+- [behance] use delay between requests ([#2507](https://github.com/mikf/gallery-dl/issues/2507))
+- [bunkr] fix URLs returned by API ([#3481](https://github.com/mikf/gallery-dl/issues/3481))
+- [fanbox] return `imageMap` files in order ([#2718](https://github.com/mikf/gallery-dl/issues/2718))
+- [imagefap] use delay between requests ([#1140](https://github.com/mikf/gallery-dl/issues/1140))
+- [imagefap] warn about redirects to `/human-verification` ([#1140](https://github.com/mikf/gallery-dl/issues/1140))
+- [kemonoparty] reject invalid/empty files ([#3510](https://github.com/mikf/gallery-dl/issues/3510))
+- [myhentaigallery] handle whitespace before title tag ([#3503](https://github.com/mikf/gallery-dl/issues/3503))
+- [poipiku] fix extraction for a different warning button style ([#3493](https://github.com/mikf/gallery-dl/issues/3493), [#3460](https://github.com/mikf/gallery-dl/issues/3460))
+- [poipiku] warn about login requirements
+- [telegraph] fix file URLs ([#3506](https://github.com/mikf/gallery-dl/issues/3506))
+- [twitter] fix crash when using `expand` and `syndication` ([#3473](https://github.com/mikf/gallery-dl/issues/3473))
+- [twitter] apply tweet type checks before uniqueness check ([#3439](https://github.com/mikf/gallery-dl/issues/3439), [#3455](https://github.com/mikf/gallery-dl/issues/3455))
+- [twitter] force `https://` for TwitPic URLs ([#3449](https://github.com/mikf/gallery-dl/issues/3449))
+- [ytdl] adapt to yt-dlp changes
+- updste and improve documentation ([#3453](https://github.com/mikf/gallery-dl/issues/3453), [#3462](https://github.com/mikf/gallery-dl/issues/3462), [#3496](https://github.com/mikf/gallery-dl/issues/3496))
+
## 1.24.2 - 2022-12-18
### Additions
- [2chen] support `.club` URLs ([#3406](https://github.com/mikf/gallery-dl/issues/3406))
diff --git a/PKG-INFO b/PKG-INFO
index 68af9dd..508ba02 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.24.2
+Version: 1.24.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -39,13 +39,16 @@ License-File: LICENSE
gallery-dl
==========
-*gallery-dl* is a command-line program to download image galleries and
-collections from several image hosting sites (see `Supported Sites`_).
-It is a cross-platform tool with many configuration options
-and powerful `filenaming capabilities <Formatting_>`_.
+*gallery-dl* is a command-line program
+to download image galleries and collections
+from several image hosting sites
+(see `Supported Sites <docs/supportedsites.md>`__).
+It is a cross-platform tool
+with many `configuration options <docs/configuration.rst>`__
+and powerful `filenaming capabilities <docs/formatting.md>`__.
-|pypi| |build| |gitter|
+|pypi| |build|
.. contents::
@@ -59,7 +62,7 @@ Dependencies
Optional
--------
-- FFmpeg_: Pixiv Ugoira to WebM conversion
+- FFmpeg_: Pixiv Ugoira conversion
- yt-dlp_ or youtube-dl_: Video downloads
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
@@ -103,9 +106,13 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.bin>`__
+
+
+Nightly Builds
+--------------
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -141,6 +148,16 @@ Scoop
scoop install gallery-dl
+Homebrew
+--------
+
+For macOS or Linux users using Homebrew:
+
+.. code:: bash
+
+ brew install gallery-dl
+
+
Usage
=====
@@ -149,9 +166,10 @@ from:
.. code:: bash
- gallery-dl [OPTION]... URL...
+ gallery-dl [OPTIONS]... URLS...
-See also :code:`gallery-dl --help`.
+Use :code:`gallery-dl --help` or see `<docs/options.md>`__
+for a full list of all command-line options.
Examples
@@ -199,13 +217,22 @@ Configuration
Configuration files for *gallery-dl* use a JSON-based file format.
-| For a (more or less) complete example with options set to their default values,
- see gallery-dl.conf_.
-| For a configuration file example with more involved settings and options,
- see gallery-dl-example.conf_.
-| A list of all available configuration options and their
- descriptions can be found in configuration.rst_.
-|
+
+Documentation
+-------------
+
+A list of all available configuration options and their descriptions
+can be found in `<docs/configuration.rst>`__.
+
+| For a default configuration file with available options set to their
+ default values, see `<docs/gallery-dl.conf>`__.
+
+| For a commented example with more involved settings and option usage,
+ see `<docs/gallery-dl-example.conf>`__.
+
+
+Locations
+---------
*gallery-dl* searches for configuration files in the following places:
@@ -214,7 +241,7 @@ Windows:
* ``%USERPROFILE%\gallery-dl\config.json``
* ``%USERPROFILE%\gallery-dl.conf``
- (``%USERPROFILE%`` usually refers to the user's home directory,
+ (``%USERPROFILE%`` usually refers to a user's home directory,
i.e. ``C:\Users\<username>\``)
Linux, macOS, etc.:
@@ -223,12 +250,13 @@ Linux, macOS, etc.:
* ``${HOME}/.config/gallery-dl/config.json``
* ``${HOME}/.gallery-dl.conf``
-Values in later configuration files will override previous ones.
+When run as `executable <Standalone Executable_>`__,
+*gallery-dl* will also look for a ``gallery-dl.conf`` file
+in the same directory as said executable.
-Command line options will override all related settings in the configuration file(s),
-e.g. using ``--write-metadata`` will enable writing metadata using the default values
-for all ``postprocessors.metadata.*`` settings, overriding any specific settings in
-configuration files.
+It is possible to use more than one configuration file at a time.
+In this case, any values from files after the first will get merged
+into the already loaded settings and potentially override previous ones.
Authentication
@@ -258,8 +286,8 @@ and optional for
``twitter``,
and ``zerochan``.
-You can set the necessary information in your configuration file
-(cf. gallery-dl.conf_)
+You can set the necessary information in your
+`configuration file <Configuration_>`__
.. code:: json
@@ -278,8 +306,8 @@ or you can provide them directly via the
.. code:: bash
- gallery-dl -u <username> -p <password> URL
- gallery-dl -o username=<username> -o password=<password> URL
+ gallery-dl -u "<username>" -p "<password>" "URL"
+ gallery-dl -o "username=<username>" -o "password=<password>" "URL"
Cookies
@@ -290,7 +318,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
cookies from a browser login session and input them into *gallery-dl*.
This can be done via the
-`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__
+`cookies <docs/configuration.rst#extractorcookies>`__
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
@@ -301,6 +329,9 @@ option in your configuration file by specifying
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__)
+- | the name of a browser to extract cookies from
+ | (supported browsers are Chromium-based ones, Firefox, and Safari)
+
For example:
.. code:: json
@@ -314,30 +345,43 @@ For example:
"cookies": {
"session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a"
}
+ },
+ "twitter": {
+ "cookies": ["firefox"]
}
}
}
-You can also specify a cookies.txt file with
-the :code:`--cookies` command-line option:
+| You can also specify a cookies.txt file with
+ the :code:`--cookies` command-line option
+| or a browser to extract cookies from with :code:`--cookies-from-browser`:
.. code:: bash
- gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
+ gallery-dl --cookies "$HOME/path/to/cookies.txt" "URL"
+ gallery-dl --cookies-from-browser firefox "URL"
OAuth
-----
-*gallery-dl* supports user authentication via OAuth_ for
-``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``,
+*gallery-dl* supports user authentication via OAuth_ for some extractors.
+This is necessary for
+``pixiv``
+and optional for
+``deviantart``,
+``flickr``,
+``reddit``,
+``smugmug``,
+``tumblr``,
and ``mastodon`` instances.
-This is mostly optional, but grants *gallery-dl* the ability
-to issue requests on your account's behalf and enables it to access resources
-which would otherwise be unavailable to a public user.
-To link your account to *gallery-dl*, start by invoking it with
-``oauth:<sitename>`` as an argument. For example:
+Linking your account to *gallery-dl* grants it the ability to issue requests
+on your account's behalf and enables it to access resources which would
+otherwise be unavailable to a public user.
+
+To do so, start by invoking it with ``oauth:<sitename>`` as an argument.
+For example:
.. code:: bash
@@ -356,13 +400,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
gallery-dl oauth:mastodon:https://mastodon.social/
-
-.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf
-.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
-.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
-.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md
-.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
-
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
.. _pip: https://pip.pypa.io/en/stable/
@@ -373,7 +410,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
-.. _pyOpenSSL: https://pyopenssl.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/README.rst b/README.rst
index b21d948..56641a4 100644
--- a/README.rst
+++ b/README.rst
@@ -2,13 +2,16 @@
gallery-dl
==========
-*gallery-dl* is a command-line program to download image galleries and
-collections from several image hosting sites (see `Supported Sites`_).
-It is a cross-platform tool with many configuration options
-and powerful `filenaming capabilities <Formatting_>`_.
+*gallery-dl* is a command-line program
+to download image galleries and collections
+from several image hosting sites
+(see `Supported Sites <docs/supportedsites.md>`__).
+It is a cross-platform tool
+with many `configuration options <docs/configuration.rst>`__
+and powerful `filenaming capabilities <docs/formatting.md>`__.
-|pypi| |build| |gitter|
+|pypi| |build|
.. contents::
@@ -22,7 +25,7 @@ Dependencies
Optional
--------
-- FFmpeg_: Pixiv Ugoira to WebM conversion
+- FFmpeg_: Pixiv Ugoira conversion
- yt-dlp_ or youtube-dl_: Video downloads
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
@@ -66,9 +69,13 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.bin>`__
+
+
+Nightly Builds
+--------------
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -104,6 +111,16 @@ Scoop
scoop install gallery-dl
+Homebrew
+--------
+
+For macOS or Linux users using Homebrew:
+
+.. code:: bash
+
+ brew install gallery-dl
+
+
Usage
=====
@@ -112,9 +129,10 @@ from:
.. code:: bash
- gallery-dl [OPTION]... URL...
+ gallery-dl [OPTIONS]... URLS...
-See also :code:`gallery-dl --help`.
+Use :code:`gallery-dl --help` or see `<docs/options.md>`__
+for a full list of all command-line options.
Examples
@@ -162,13 +180,22 @@ Configuration
Configuration files for *gallery-dl* use a JSON-based file format.
-| For a (more or less) complete example with options set to their default values,
- see gallery-dl.conf_.
-| For a configuration file example with more involved settings and options,
- see gallery-dl-example.conf_.
-| A list of all available configuration options and their
- descriptions can be found in configuration.rst_.
-|
+
+Documentation
+-------------
+
+A list of all available configuration options and their descriptions
+can be found in `<docs/configuration.rst>`__.
+
+| For a default configuration file with available options set to their
+ default values, see `<docs/gallery-dl.conf>`__.
+
+| For a commented example with more involved settings and option usage,
+ see `<docs/gallery-dl-example.conf>`__.
+
+
+Locations
+---------
*gallery-dl* searches for configuration files in the following places:
@@ -177,7 +204,7 @@ Windows:
* ``%USERPROFILE%\gallery-dl\config.json``
* ``%USERPROFILE%\gallery-dl.conf``
- (``%USERPROFILE%`` usually refers to the user's home directory,
+ (``%USERPROFILE%`` usually refers to a user's home directory,
i.e. ``C:\Users\<username>\``)
Linux, macOS, etc.:
@@ -186,12 +213,13 @@ Linux, macOS, etc.:
* ``${HOME}/.config/gallery-dl/config.json``
* ``${HOME}/.gallery-dl.conf``
-Values in later configuration files will override previous ones.
+When run as `executable <Standalone Executable_>`__,
+*gallery-dl* will also look for a ``gallery-dl.conf`` file
+in the same directory as said executable.
-Command line options will override all related settings in the configuration file(s),
-e.g. using ``--write-metadata`` will enable writing metadata using the default values
-for all ``postprocessors.metadata.*`` settings, overriding any specific settings in
-configuration files.
+It is possible to use more than one configuration file at a time.
+In this case, any values from files after the first will get merged
+into the already loaded settings and potentially override previous ones.
Authentication
@@ -221,8 +249,8 @@ and optional for
``twitter``,
and ``zerochan``.
-You can set the necessary information in your configuration file
-(cf. gallery-dl.conf_)
+You can set the necessary information in your
+`configuration file <Configuration_>`__
.. code:: json
@@ -241,8 +269,8 @@ or you can provide them directly via the
.. code:: bash
- gallery-dl -u <username> -p <password> URL
- gallery-dl -o username=<username> -o password=<password> URL
+ gallery-dl -u "<username>" -p "<password>" "URL"
+ gallery-dl -o "username=<username>" -o "password=<password>" "URL"
Cookies
@@ -253,7 +281,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
cookies from a browser login session and input them into *gallery-dl*.
This can be done via the
-`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__
+`cookies <docs/configuration.rst#extractorcookies>`__
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
@@ -264,6 +292,9 @@ option in your configuration file by specifying
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__)
+- | the name of a browser to extract cookies from
+ | (supported browsers are Chromium-based ones, Firefox, and Safari)
+
For example:
.. code:: json
@@ -277,30 +308,43 @@ For example:
"cookies": {
"session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a"
}
+ },
+ "twitter": {
+ "cookies": ["firefox"]
}
}
}
-You can also specify a cookies.txt file with
-the :code:`--cookies` command-line option:
+| You can also specify a cookies.txt file with
+ the :code:`--cookies` command-line option
+| or a browser to extract cookies from with :code:`--cookies-from-browser`:
.. code:: bash
- gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
+ gallery-dl --cookies "$HOME/path/to/cookies.txt" "URL"
+ gallery-dl --cookies-from-browser firefox "URL"
OAuth
-----
-*gallery-dl* supports user authentication via OAuth_ for
-``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``,
+*gallery-dl* supports user authentication via OAuth_ for some extractors.
+This is necessary for
+``pixiv``
+and optional for
+``deviantart``,
+``flickr``,
+``reddit``,
+``smugmug``,
+``tumblr``,
and ``mastodon`` instances.
-This is mostly optional, but grants *gallery-dl* the ability
-to issue requests on your account's behalf and enables it to access resources
-which would otherwise be unavailable to a public user.
-To link your account to *gallery-dl*, start by invoking it with
-``oauth:<sitename>`` as an argument. For example:
+Linking your account to *gallery-dl* grants it the ability to issue requests
+on your account's behalf and enables it to access resources which would
+otherwise be unavailable to a public user.
+
+To do so, start by invoking it with ``oauth:<sitename>`` as an argument.
+For example:
.. code:: bash
@@ -319,13 +363,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
gallery-dl oauth:mastodon:https://mastodon.social/
-
-.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf
-.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
-.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
-.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md
-.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
-
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
.. _pip: https://pip.pypa.io/en/stable/
@@ -336,7 +373,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
-.. _pyOpenSSL: https://pyopenssl.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 13ee2ea..1125b36 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -48,17 +48,17 @@ _arguments -C -S \
{-c,--config}'[Additional configuration files]':'<file>':_files \
--config-yaml'[==SUPPRESS==]':'<file>':_files \
{-o,--option}'[Additional "<key>=<value>" option values]':'<opt>' \
---ignore-config'[Do not read the default configuration files]' \
+--ignore-config'[Do not read default configuration files]' \
{-u,--username}'[Username to login with]':'<user>' \
{-p,--password}'[Password belonging to the given username]':'<pass>' \
--netrc'[Enable .netrc authentication data]' \
---download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it]':'<file>':_files \
+--download-archive'[Record all downloaded or skipped files in FILE and skip downloading any file already in it]':'<file>':_files \
{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \
{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \
---range'[Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"]':'<range>' \
---chapter-range'[Like "--range", but applies to manga-chapters and other delegated URLs]':'<range>' \
---filter'[Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \
---chapter-filter'[Like "--filter", but applies to manga-chapters and other delegated URLs]':'<expr>' \
+--range'[Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. "5", "8-20", or "1:24:3")]':'<range>' \
+--chapter-range'[Like "--range", but applies to manga chapters and other delegated URLs]':'<range>' \
+--filter'[Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \
+--chapter-filter'[Like "--filter", but applies to manga chapters and other delegated URLs]':'<expr>' \
--zip'[Store downloaded files in a ZIP archive]' \
--ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \
--ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 50ad132..986d9df 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -42,17 +42,17 @@ complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate v
complete -c gallery-dl -r -F -s 'c' -l 'config' -d 'Additional configuration files'
complete -c gallery-dl -r -F -l 'config-yaml' -d '==SUPPRESS=='
complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional "<key>=<value>" option values'
-complete -c gallery-dl -l 'ignore-config' -d 'Do not read the default configuration files'
+complete -c gallery-dl -l 'ignore-config' -d 'Do not read default configuration files'
complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with'
complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username'
complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data'
-complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded files in the archive file and skip downloading any file already in it'
+complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded or skipped files in FILE and skip downloading any file already in it'
complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped'
complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped'
-complete -c gallery-dl -x -l 'range' -d 'Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"'
-complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but applies to manga-chapters and other delegated URLs'
-complete -c gallery-dl -x -l 'filter' -d 'Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"'
-complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies to manga-chapters and other delegated URLs'
+complete -c gallery-dl -x -l 'range' -d 'Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. "5", "8-20", or "1:24:3")'
+complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but applies to manga chapters and other delegated URLs'
+complete -c gallery-dl -x -l 'filter' -d 'Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"'
+complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies to manga chapters and other delegated URLs'
complete -c gallery-dl -l 'zip' -d 'Store downloaded files in a ZIP archive'
complete -c gallery-dl -l 'ugoira-conv' -d 'Convert Pixiv Ugoira to WebM (requires FFmpeg)'
complete -c gallery-dl -l 'ugoira-conv-lossless' -d 'Convert Pixiv Ugoira to WebM in VP9 lossless mode'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index d85b1c9..e88dd4f 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-12-18" "1.24.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-01-10" "1.24.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -144,7 +144,7 @@ Additional configuration files
Additional '<key>=<value>' option values
.TP
.B "\-\-ignore\-config"
-Do not read the default configuration files
+Do not read default configuration files
.TP
.B "\-u, \-\-username" \f[I]USER\f[]
Username to login with
@@ -156,7 +156,7 @@ Password belonging to the given username
Enable .netrc authentication data
.TP
.B "\-\-download\-archive" \f[I]FILE\f[]
-Record all downloaded files in the archive file and skip downloading any file already in it
+Record all downloaded or skipped files in FILE and skip downloading any file already in it
.TP
.B "\-A, \-\-abort" \f[I]N\f[]
Stop current extractor run after N consecutive file downloads were skipped
@@ -165,16 +165,16 @@ Stop current extractor run after N consecutive file downloads were skipped
Stop current and parent extractor runs after N consecutive file downloads were skipped
.TP
.B "\-\-range" \f[I]RANGE\f[]
-Index-range(s) specifying which images to download. For example '5-10' or '1,3-5,10-'
+Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. '5', '8-20', or '1:24:3')
.TP
.B "\-\-chapter\-range" \f[I]RANGE\f[]
-Like '--range', but applies to manga-chapters and other delegated URLs
+Like '--range', but applies to manga chapters and other delegated URLs
.TP
.B "\-\-filter" \f[I]EXPR\f[]
-Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '-K'. Example: --filter "image_width >= 1000 and rating in ('s', 'q')"
+Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '-K'. Example: --filter "image_width >= 1000 and rating in ('s', 'q')"
.TP
.B "\-\-chapter\-filter" \f[I]EXPR\f[]
-Like '--filter', but applies to manga-chapters and other delegated URLs
+Like '--filter', but applies to manga chapters and other delegated URLs
.TP
.B "\-\-zip"
Store downloaded files in a ZIP archive
@@ -201,10 +201,10 @@ Write image tags to separate text files
Set file modification times according to 'date' metadata
.TP
.B "\-\-exec" \f[I]CMD\f[]
-Execute CMD for each downloaded file. Example: --exec 'convert {} {}.png && rm {}'
+Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"
.TP
.B "\-\-exec\-after" \f[I]CMD\f[]
-Execute CMD after all files were downloaded successfully. Example: --exec-after 'cd {} && convert * ../doc.pdf'
+Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"
.TP
.B "\-P, \-\-postprocessor" \f[I]NAME\f[]
Activate the specified post processor
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 36b2c84..ff0067b 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-12-18" "1.24.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-01-10" "1.24.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -75,16 +75,17 @@ those as makeshift comments by settings their values to arbitrary strings.
.SH EXTRACTOR OPTIONS
.SS extractor.*.filename
.IP "Type:" 6
-\f[I]string\f[] or \f[I]object\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[] (condition -> \f[I]format string\f[])
.IP "Example:" 4
-.br
-* .. code:: json
+.. code:: json
"{manga}_c{chapter}_{page:>03}.{extension}"
-.br
-* .. code:: json
+.. code:: json
{
"extension == 'mp4'": "{id}_video.{extension}",
@@ -135,16 +136,17 @@ a valid filename extension.
.SS extractor.*.directory
.IP "Type:" 6
-\f[I]list\f[] of \f[I]strings\f[] or \f[I]object\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]object\f[] (condition -> \f[I]format strings\f[])
.IP "Example:" 4
-.br
-* .. code:: json
+.. code:: json
["{category}", "{manga}", "c{chapter} - {title}"]
-.br
-* .. code:: json
+.. code:: json
{
"'nature' in content": ["Nature Pictures"],
@@ -190,7 +192,10 @@ for any spawned child extractors.
.SS extractor.*.parent-metadata
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]false\f[]
@@ -226,7 +231,10 @@ Share number of skipped downloads between parent and child extractors.
.SS extractor.*.path-restrict
.IP "Type:" 6
-\f[I]string\f[] or \f[I]object\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[] (character -> replacement character(s))
.IP "Default:" 9
\f[I]"auto"\f[]
@@ -324,7 +332,7 @@ prefixed with \f[I]\\\\?\\\f[] to work around the 260 characters path length lim
.SS extractor.*.extension-map
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (extension -> replacement)
.IP "Default:" 9
.. code:: json
@@ -343,7 +351,10 @@ A JSON \f[I]object\f[] mapping filename extensions to their replacements.
.SS extractor.*.skip
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -435,8 +446,12 @@ Specifying a username and password is required for
and optional for
.br
+* \f[I]aibooru\f[] (*)
+.br
* \f[I]aryion\f[]
.br
+* \f[I]atfbooru\f[] (*)
+.br
* \f[I]danbooru\f[] (*)
.br
* \f[I]e621\f[] (*)
@@ -475,7 +490,7 @@ These values can also be specified via the
\f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or
by using a \f[I].netrc\f[] file. (see Authentication_)
-(*) The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be
+(*) The password value for these sites should be
the API key found in your user profile, not the actual account password.
@@ -492,10 +507,12 @@ Enable the use of \f[I].netrc\f[] authentication data.
.SS extractor.*.cookies
.IP "Type:" 6
-\f[I]Path\f[] or \f[I]object\f[] or \f[I]list\f[]
-
-.IP "Default:" 9
-\f[I]null\f[]
+.br
+* \f[I]Path\f[]
+.br
+* \f[I]object\f[] (name -> value)
+.br
+* \f[I]list\f[]
.IP "Description:" 4
Source to read additional cookies from. This can be
@@ -545,14 +562,17 @@ Source to read additional cookies from. This can be
\f[I]true\f[]
.IP "Description:" 4
-If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] to a cookies.txt
+If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] of a cookies.txt
file and it can be opened and parsed without errors,
update its contents with cookies received during data extraction.
.SS extractor.*.proxy
.IP "Type:" 6
-\f[I]string\f[] or \f[I]object\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[] (scheme -> proxy)
.IP "Default:" 9
\f[I]null\f[]
@@ -580,8 +600,8 @@ Example:
"http://10.20.1.128": "http://10.10.1.10:5323"
}
-Note: All proxy URLs should include a scheme,
-otherwise \f[I]http://\f[] is assumed.
+Note: If a proxy URLs does not include a scheme,
+\f[I]http://\f[] is assumed.
.SS extractor.*.source-address
@@ -619,8 +639,9 @@ User-Agent header value to be used for HTTP requests.
Setting this value to \f[I]"browser"\f[] will try to automatically detect
and use the User-Agent used by the system's default browser.
-Note: This option has no effect on pixiv extractors,
-as these need specific values to function correctly.
+Note: This option has no effect on
+pixiv, e621, and mangadex
+extractors, as these need specific values to function correctly.
.SS extractor.*.browser
@@ -650,13 +671,13 @@ browser would use HTTP/2.
.SS extractor.*.keywords
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (name -> value)
.IP "Example:" 4
{"type": "Pixel Art", "type_id": 123}
.IP "Description:" 4
-Additional key-value pairs to be added to each metadata dictionary.
+Additional name-value pairs to be added to each metadata dictionary.
.SS extractor.*.keywords-default
@@ -696,11 +717,12 @@ with a \f[I]metadata\f[] post processor, etc.
\f[I]null\f[]
.IP "Description:" 4
-Insert a reference to the current \f[I]PathFormat\f[]
+Insert a reference to the current
+\f[I]PathFormat\f[]
data structure into metadata dictionaries as the given name.
For example, setting this option to \f[I]"gdl_path"\f[] would make it possible
-to access the current file's filename as \f[I]"[gdl_path.filename}"\f[].
+to access the current file's filename as \f[I]"{gdl_path.filename}"\f[].
.SS extractor.*.http-metadata
@@ -904,7 +926,10 @@ This value gets internally used as the \f[I]timeout\f[] parameter for the
.SS extractor.*.verify
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -947,18 +972,40 @@ Use fallback download URLs when a download fails.
.SS extractor.*.image-range
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
-.IP "Example:" 4
+.IP "Examples:" 4
.br
-* "10-20"
+* \f[I]"10-20"\f[]
.br
-* "-5, 10, 30-50, 100-"
+* \f[I]"-5, 10, 30-50, 100-"\f[]
+.br
+* \f[I]"10:21, 30:51:2, :5, 100:"\f[]
+.br
+* \f[I]["-5", "10", "30-50", "100-"]\f[]
.IP "Description:" 4
-Index-range(s) specifying which images to download.
+Index range(s) selecting which files to download.
-Note: The index of the first image is \f[I]1\f[].
+These can be specified as
+
+.br
+* index: \f[I]3\f[] (file number 3)
+.br
+* range: \f[I]2-4\f[] (files 2, 3, and 4)
+.br
+* \f[I]slice\f[]: \f[I]3:8:2\f[] (files 3, 5, and 7)
+
+Arguments for range and slice notation are optional
+.br
+and will default to begin (\f[I]1\f[]) or end (\f[I]sys.maxsize\f[]) if omitted.
+For example \f[I]5-\f[], \f[I]5:\f[], and \f[I]5::\f[] all mean "Start at file number 5".
+.br
+
+Note: The index of the first file is \f[I]1\f[].
.SS extractor.*.chapter-range
@@ -967,41 +1014,46 @@ Note: The index of the first image is \f[I]1\f[].
.IP "Description:" 4
Like \f[I]image-range\f[],
-but applies to delegated URLs like manga-chapters, etc.
+but applies to delegated URLs like manga chapters, etc.
.SS extractor.*.image-filter
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
-.IP "Example:" 4
+.IP "Examples:" 4
.br
-* "width >= 1200 and width/height > 1.2"
+* \f[I]"re.search(r'foo(bar)+', description)"\f[]
.br
-* "re.search(r'foo(bar)+', description)"
+* \f[I]["width >= 1200", "width/height > 1.2"]\f[]
.IP "Description:" 4
Python expression controlling which files to download.
-Files for which the expression evaluates to \f[I]False\f[] are ignored.
-.br
-Available keys are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[].
-.br
+A file only gets downloaded when *all* of the given expressions evaluate to \f[I]True\f[].
+
+Available values are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[].
.SS extractor.*.chapter-filter
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
-.IP "Example:" 4
+.IP "Examples:" 4
.br
-* "lang == 'en'"
+* \f[I]"lang == 'en'"\f[]
.br
-* "language == 'French' and 10 <= chapter < 20"
+* \f[I]["language == 'French'", "10 <= chapter < 20"]\f[]
.IP "Description:" 4
Like \f[I]image-filter\f[],
-but applies to delegated URLs like manga-chapters, etc.
+but applies to delegated URLs like manga chapters, etc.
.SS extractor.*.image-unique
@@ -1025,7 +1077,7 @@ current extractor run.
.IP "Description:" 4
Like \f[I]image-unique\f[],
-but applies to delegated URLs like manga-chapters, etc.
+but applies to delegated URLs like manga chapters, etc.
.SS extractor.*.date-format
@@ -1041,6 +1093,12 @@ date-min and date-max.
See \f[I]strptime\f[] for a list of formatting directives.
+Note: Despite its name, this option does **not** control how
+\f[I]{date}\f[] metadata fields are formatted.
+To use a different formatting for those values other than the default
+\f[I]%Y-%m-%d %H:%M:%S\f[], put \f[I]strptime\f[] formatting directives
+after a colon \f[I]:\f[], for example \f[I]{date:%Y%m%d}\f[].
+
.SH EXTRACTOR-SPECIFIC OPTIONS
.SS extractor.artstation.external
@@ -1096,7 +1154,7 @@ descend into subfolders
.SS extractor.bbc.width
.IP "Type:" 6
-\f[I]int\f[]
+\f[I]integer\f[]
.IP "Default:" 9
\f[I]1920\f[]
@@ -1156,14 +1214,18 @@ follow the \f[I]source\f[] and download from there if possible.
\f[I]false\f[]
.IP "Description:" 4
-Extract additional metadata (notes, artist commentary, parent, children)
+Extract additional metadata
+(notes, artist commentary, parent, children, uploader)
-Note: This requires 1 additional HTTP request for each post.
+Note: This requires 1 additional HTTP request per post.
.SS extractor.danbooru.threshold
.IP "Type:" 6
-\f[I]string\f[] or \f[I]int\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]integer\f[]
.IP "Default:" 9
\f[I]"auto"\f[]
@@ -1324,13 +1386,19 @@ belongs to a group or a regular user.
.SS extractor.deviantart.include
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"gallery"\f[]
.IP "Example:" 4
-"favorite,journal,scraps" or ["favorite", "journal", "scraps"]
+.br
+* "favorite,journal,scraps"
+.br
+* ["favorite", "journal", "scraps"]
.IP "Description:" 4
A (comma-separated) list of subcategories to include
@@ -1339,7 +1407,7 @@ when processing a user profile.
Possible values are
\f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], \f[I]"favorite"\f[].
-You can use \f[I]"all"\f[] instead of listing all values separately.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.deviantart.journals
@@ -1389,7 +1457,10 @@ Request extended metadata for deviation objects to additionally provide
.SS extractor.deviantart.original
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -1519,7 +1590,10 @@ Selects an alternative source to download files from.
.SS extractor.fanbox.embeds
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -1562,7 +1636,10 @@ Extract and download videos.
.SS extractor.flickr.size-max
.IP "Type:" 6
-\f[I]integer\f[] or \f[I]string\f[]
+.br
+* \f[I]integer\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]null\f[]
@@ -1608,13 +1685,19 @@ Follow external URLs linked in descriptions.
.SS extractor.furaffinity.include
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"gallery"\f[]
.IP "Example:" 4
-"scraps,favorite" or ["scraps", "favorite"]
+.br
+* "scraps,favorite"
+.br
+* ["scraps", "favorite"]
.IP "Description:" 4
A (comma-separated) list of subcategories to include
@@ -1623,7 +1706,7 @@ when processing a user profile.
Possible values are
\f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"favorite"\f[].
-You can use \f[I]"all"\f[] instead of listing all values separately.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.furaffinity.layout
@@ -1672,9 +1755,9 @@ even ones without a \f[I]generic:\f[] prefix.
.SS extractor.gfycat.format
.IP "Type:" 6
.br
-* \f[I]list\f[] of \f[I]strings\f[]
-.br
* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]["mp4", "webm", "mobile", "gif"]\f[]
@@ -1733,13 +1816,19 @@ Recursively download files from subfolders.
.SS extractor.hentaifoundry.include
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"pictures"\f[]
.IP "Example:" 4
-"scraps,stories" or ["scraps", "stories"]
+.br
+* "scraps,stories"
+.br
+* ["scraps", "stories"]
.IP "Description:" 4
A (comma-separated) list of subcategories to include
@@ -1748,7 +1837,7 @@ when processing a user profile.
Possible values are
\f[I]"pictures"\f[], \f[I]"scraps"\f[], \f[I]"stories"\f[], \f[I]"favorite"\f[].
-You can use \f[I]"all"\f[] instead of listing all values separately.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.hitomi.format
@@ -1769,7 +1858,10 @@ but is most likely going to fail with \f[I]403 Forbidden\f[] errors.
.SS extractor.imgur.mp4
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -1818,13 +1910,19 @@ Selects which API endpoints to use.
.SS extractor.instagram.include
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"posts"\f[]
.IP "Example:" 4
-"stories,highlights,posts" or ["stories", "highlights", "posts"]
+.br
+* "stories,highlights,posts"
+.br
+* ["stories", "highlights", "posts"]
.IP "Description:" 4
A (comma-separated) list of subcategories to include
@@ -1838,7 +1936,7 @@ Possible values are
\f[I]"highlights"\f[],
\f[I]"avatar"\f[].
-You can use \f[I]"all"\f[] instead of listing all values separately.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.instagram.previews
@@ -1884,6 +1982,8 @@ Download video files.
.IP "Description:" 4
Extract \f[I]comments\f[] metadata.
+Note: This requires 1 additional HTTP request per post.
+
.SS extractor.kemonoparty.duplicates
.IP "Type:" 6
@@ -2019,7 +2119,7 @@ The server to use for API requests.
.SS extractor.mangadex.api-parameters
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (name -> value)
.IP "Example:" 4
{"order[updatedAt]": "desc"}
@@ -2054,7 +2154,24 @@ to filter chapters by.
List of acceptable content ratings for returned chapters.
-.SS extractor.mastodon.reblogs
+.SS extractor.[mastodon].access-token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+The \f[I]access-token\f[] value you get from \f[I]linking your account to
+gallery-dl\f[].
+
+Note: gallery-dl comes with built-in tokens for \f[I]mastodon.social\f[],
+\f[I]pawoo\f[] and \f[I]baraag\f[]. For other instances, you need to obtain an
+\f[I]access-token\f[] in order to use usernames in place of numerical
+user IDs.
+
+
+.SS extractor.[mastodon].reblogs
.IP "Type:" 6
\f[I]bool\f[]
@@ -2065,7 +2182,7 @@ List of acceptable content ratings for returned chapters.
Fetch media from reblogged posts.
-.SS extractor.mastodon.replies
+.SS extractor.[mastodon].replies
.IP "Type:" 6
\f[I]bool\f[]
@@ -2076,7 +2193,7 @@ Fetch media from reblogged posts.
Fetch media from replies to other posts.
-.SS extractor.mastodon.text-posts
+.SS extractor.[mastodon].text-posts
.IP "Type:" 6
\f[I]bool\f[]
@@ -2129,13 +2246,19 @@ the next smaller one gets chosen.
.SS extractor.newgrounds.include
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"art"\f[]
.IP "Example:" 4
-"movies,audio" or ["movies", "audio"]
+.br
+* "movies,audio"
+.br
+* ["movies", "audio"]
.IP "Description:" 4
A (comma-separated) list of subcategories to include
@@ -2144,12 +2267,15 @@ when processing a user profile.
Possible values are
\f[I]"art"\f[], \f[I]"audio"\f[], \f[I]"games"\f[], \f[I]"movies"\f[].
-You can use \f[I]"all"\f[] instead of listing all values separately.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.nijie.include
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"illustration,doujin"\f[]
@@ -2161,7 +2287,7 @@ when processing a user profile.
Possible values are
\f[I]"illustration"\f[], \f[I]"doujin"\f[], \f[I]"favorite"\f[], \f[I]"nuita"\f[].
-You can use \f[I]"all"\f[] instead of listing all values separately.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.nitter.quoted
@@ -2188,7 +2314,10 @@ Fetch media from Retweets.
.SS extractor.nitter.videos
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -2254,7 +2383,7 @@ Host name / IP address to bind to during OAuth authorization.
.IP "Description:" 4
Port number to listen on during OAuth authorization.
-Note: All redirects will go to http://localhost:6414/, regardless
+Note: All redirects will go to port \f[I]6414\f[], regardless
of the port specified here. You'll have to manually adjust the
port number in your browser's address bar when using a different
port than the default.
@@ -2331,6 +2460,20 @@ Extract inline images.
Extract media from reblogged posts.
+.SS extractor.pinterest.domain
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+Specifies the domain used by \f[I]pinterest\f[] extractors.
+
+Setting this option to \f[I]"auto"\f[]
+uses the same domain as a given input URL.
+
+
.SS extractor.pinterest.sections
.IP "Type:" 6
\f[I]bool\f[]
@@ -2379,6 +2522,17 @@ Possible values are
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.pixiv.refresh-token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+The \f[I]refresh-token\f[] value you get
+from running \f[I]gallery-dl oauth:pixiv\f[] (see OAuth_) or
+by using a third-party tool like
+\f[I]gppt\f[].
+
+
.SS extractor.pixiv.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -2390,6 +2544,21 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately.
Fetch extended \f[I]user\f[] metadata.
+.SS extractor.pixiv.metadata-bookmark
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+For works bookmarked by
+\f[I]your own account\f[],
+fetch bookmark tags as \f[I]tags_bookmark\f[] metadata.
+
+Note: This requires 1 additional API call per bookmarked post.
+
+
.SS extractor.pixiv.work.related
.IP "Type:" 6
\f[I]bool\f[]
@@ -2534,7 +2703,7 @@ time required when scanning a subreddit.
Retrieve additional comments by resolving the \f[I]more\f[] comment
stubs in the base comment tree.
-This requires 1 additional API call for every 100 extra comments.
+Note: This requires 1 additional API call for every 100 extra comments.
.SS extractor.reddit.date-min & .date-max
@@ -2599,7 +2768,10 @@ at 600 requests every 10 minutes/600 seconds.
.SS extractor.reddit.videos
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -2620,9 +2792,9 @@ video extraction and download
.SS extractor.redgifs.format
.IP "Type:" 6
.br
-* \f[I]list\f[] of \f[I]strings\f[]
-.br
* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]["hd", "sd", "gif"]\f[]
@@ -2707,7 +2879,10 @@ Download thumbnails.
.SS extractor.skeb.search.filters
.IP "Type:" 6
-\f[I]list\f[] or \f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]["genre:art", "genre:voice", "genre:novel", "genre:video", "genre:music", "genre:correction"]\f[]
@@ -2822,7 +2997,10 @@ Selects how to handle exceeding the daily API rate limit.
.SS extractor.tumblr.reblogs
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -2839,13 +3017,19 @@ is from the same blog
.SS extractor.tumblr.posts
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"all"\f[]
.IP "Example:" 4
-"video,audio,link" or ["video", "audio", "link"]
+.br
+* "video,audio,link"
+.br
+* ["video", "audio", "link"]
.IP "Description:" 4
A (comma-separated) list of post types to extract images, etc. from.
@@ -2853,7 +3037,7 @@ A (comma-separated) list of post types to extract images, etc. from.
Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[],
\f[I]video\f[], \f[I]audio\f[], \f[I]photo\f[], \f[I]chat\f[].
-You can use \f[I]"all"\f[] instead of listing all types separately.
+It is possible to use \f[I]"all"\f[] instead of listing all types separately.
.SS extractor.tumblr.fallback-delay
@@ -2909,7 +3093,10 @@ See \f[I]Filters\f[] for details.
.SS extractor.twitter.cards
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]false\f[]
@@ -2993,6 +3180,8 @@ with enabled \f[I]conversations\f[] option
for each Tweet in said timeline.
Note: This requires at least 1 additional API call per initial Tweet.
+Age-restricted replies cannot be expanded when using the
+\f[I]syndication\f[] API.
.SS extractor.twitter.size
@@ -3013,13 +3202,32 @@ Known available sizes are
.SS extractor.twitter.syndication
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]false\f[]
.IP "Description:" 4
-Retrieve age-restricted content using Twitter's syndication API.
+Controls how to retrieve age-restricted content when not logged in.
+
+.br
+* \f[I]false\f[]: Skip age-restricted Tweets.
+.br
+* \f[I]true\f[]: Download using Twitter's syndication API.
+.br
+* \f[I]"extended"\f[]: Try to fetch Tweet metadata using the normal API
+in addition to the syndication API. This requires additional HTTP
+requests in some cases (e.g. when \f[I]retweets\f[]
+are enabled).
+
+Note: This does not apply to search results (including
+\f[I]timeline strategies\f[]).
+To retrieve such content from search results, you must log in and
+disable "Hide sensitive content" in your \f[I]search settings
+<https://twitter.com/settings/search>\f[].
.SS extractor.twitter.logout
@@ -3054,6 +3262,9 @@ Fetch media from pinned Tweets.
.IP "Description:" 4
Fetch media from quoted Tweets.
+If this option is enabled, gallery-dl will try to fetch
+a quoted (original) Tweet when it sees the Tweet which quotes it.
+
.SS extractor.twitter.replies
.IP "Type:" 6
@@ -3068,6 +3279,13 @@ Fetch media from replies to other Tweets.
If this value is \f[I]"self"\f[], only consider replies where
reply and original Tweet are from the same user.
+Note: Twitter will automatically expand conversations if you
+use the \f[I]/with_replies\f[] timeline while logged in. For example,
+media from Tweets which the user replied to will also be downloaded.
+
+It is possible to exclude unwanted Tweets using \f[I]image-filter
+<extractor.*.image-filter_>\f[].
+
.SS extractor.twitter.retweets
.IP "Type:" 6
@@ -3174,7 +3392,10 @@ for \f[I]twitter\f[] to a non-default value, e.g. an empty string \f[I]""\f[].
.SS extractor.twitter.videos
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -3265,7 +3486,7 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately.
.IP "Description:" 4
Extract additional metadata (tags, uploader)
-Note: This requires 1 additional HTTP request for each post.
+Note: This requires 1 additional HTTP request per post.
.SS extractor.weasyl.api-key
@@ -3312,7 +3533,12 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"home"\f[], \f[I]"feed"\f[], \f[I]"videos"\f[], \f[I]"newvideo"\f[], \f[I]"article"\f[], \f[I]"album"\f[].
+\f[I]"home"\f[],
+\f[I]"feed"\f[],
+\f[I]"videos"\f[],
+\f[I]"newvideo"\f[],
+\f[I]"article"\f[],
+\f[I]"album"\f[].
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
@@ -3422,7 +3648,7 @@ followed by \f[I]"youtube_dl"\f[] as fallback.
.SS extractor.ytdl.raw-options
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (name -> value)
.IP "Example:" 4
.. code:: json
@@ -3479,7 +3705,7 @@ Location of a youtube-dl configuration file to load options from.
.IP "Description:" 4
Extract additional metadata (date, md5, tags, ...)
-Note: This requires 1-2 additional HTTP request for each post.
+Note: This requires 1-2 additional HTTP requests per post.
.SS extractor.[booru].tags
@@ -3493,7 +3719,7 @@ Note: This requires 1-2 additional HTTP request for each post.
Categorize tags by their respective types
and provide them as \f[I]tags_<type>\f[] metadata fields.
-Note: This requires 1 additional HTTP request for each post.
+Note: This requires 1 additional HTTP request per post.
.SS extractor.[booru].notes
@@ -3506,7 +3732,7 @@ Note: This requires 1 additional HTTP request for each post.
.IP "Description:" 4
Extract overlay notes (position and text).
-Note: This requires 1 additional HTTP request for each post.
+Note: This requires 1 additional HTTP request per post.
.SS extractor.[manga-extractor].chapter-reverse
@@ -3659,7 +3885,7 @@ or \f[I]-1\f[] for infinite retries.
.SS downloader.*.timeout
.IP "Type:" 6
-\f[I]float\f[] or \f[I]null\f[]
+\f[I]float\f[]
.IP "Default:" 9
\f[I]extractor.*.timeout\f[]
@@ -3670,7 +3896,10 @@ Connection timeout during file downloads.
.SS downloader.*.verify
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]extractor.*.verify\f[]
@@ -3681,16 +3910,19 @@ Certificate validation during file downloads.
.SS downloader.*.proxy
.IP "Type:" 6
-\f[I]string\f[] or \f[I]object\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[] (scheme -> proxy)
.IP "Default:" 9
\f[I]extractor.*.proxy\f[]
.IP "Description:" 4
Proxy server used for file downloads.
-.br
-Disable the use of a proxy by explicitly setting this option to \f[I]null\f[].
-.br
+
+Disable the use of a proxy for file downloads
+by explicitly setting this option to \f[I]null\f[].
.SS downloader.http.adjust-extensions
@@ -3711,7 +3943,10 @@ contains JPEG/JFIF data.
.SS downloader.http.chunk-size
.IP "Type:" 6
-\f[I]integer\f[] or \f[I]string\f[]
+.br
+* \f[I]integer\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]32768\f[]
@@ -3729,7 +3964,7 @@ These suffixes are case-insensitive.
.SS downloader.http.headers
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (name -> value)
.IP "Example:" 4
{"Accept": "image/webp,*/*", "Referer": "https://example.org/"}
@@ -3835,7 +4070,7 @@ cause unexpected results in combination with other options
.SS downloader.ytdl.raw-options
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (name -> value)
.IP "Example:" 4
.. code:: json
@@ -3885,7 +4120,10 @@ Location of a youtube-dl configuration file to load options from.
.SH OUTPUT OPTIONS
.SS output.mode
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[] (key -> format string)
.IP "Default:" 9
\f[I]"auto"\f[]
@@ -3902,7 +4140,54 @@ Controls the output string format and status indicators.
.br
* \f[I]"color"\f[]: Suitable for terminals that understand ANSI escape codes and colors
.br
-* \f[I]"auto"\f[]: Automatically choose the best suitable output mode
+* \f[I]"auto"\f[]: \f[I]"terminal"\f[] on Windows with \f[I]output.ansi\f[] disabled,
+\f[I]"color"\f[] otherwise.
+
+It is possible to use custom output format strings
+.br
+by setting this option to an \f[I]object\f[] and specifying
+\f[I]start\f[], \f[I]success\f[], \f[I]skip\f[], \f[I]progress\f[], and \f[I]progress-total\f[].
+.br
+
+For example, the following will replicate the same output as \f[I]mode: color\f[]:
+
+.. code:: json
+
+{
+"start" : "{}",
+"success": "\\r\\u001b[1;32m{}\\u001b[0m\\n",
+"skip" : "\\u001b[2m{}\\u001b[0m\\n",
+"progress" : "\\r{0:>7}B {1:>7}B/s ",
+"progress-total": "\\r{3:>3}% {0:>7}B {1:>7}B/s "
+}
+
+\f[I]start\f[], \f[I]success\f[], and \f[I]skip\f[] are used to output the current
+filename, where \f[I]{}\f[] or \f[I]{0}\f[] is replaced with said filename.
+If a given format string contains printable characters other than that,
+their number needs to be specified as \f[I][<number>, <format string>]\f[]
+to get the correct results for \f[I]output.shorten\f[]. For example
+
+.. code:: json
+
+"start" : [12, "Downloading {}"]
+
+\f[I]progress\f[] and \f[I]progress-total\f[] are used when displaying the
+.br
+\f[I]download progress indicator\f[],
+\f[I]progress\f[] when the total number of bytes to download is unknown,
+.br
+\f[I]progress-total\f[] otherwise.
+
+For these format strings
+
+.br
+* \f[I]{0}\f[] is number of bytes downloaded
+.br
+* \f[I]{1}\f[] is number of downloaded bytes per second
+.br
+* \f[I]{2}\f[] is total number of bytes
+.br
+* \f[I]{3}\f[] is percent of bytes downloaded to total bytes
.SS output.shorten
@@ -3922,7 +4207,7 @@ with a display width greater than 1.
.SS output.colors
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (key -> ANSI color)
.IP "Default:" 9
\f[I]{"success": "1;32", "skip": "2"}\f[]
@@ -3983,7 +4268,10 @@ in the output of \f[I]-K/--list-keywords\f[] and \f[I]-j/--dump-json\f[].
.SS output.progress
.IP "Type:" 6
-\f[I]bool\f[] or \f[I]string\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -4005,13 +4293,16 @@ as a custom \f[I]format string\f[]. Possible replacement keys are
.SS output.log
.IP "Type:" 6
-\f[I]string\f[] or \f[I]Logging Configuration\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Logging Configuration\f[]
.IP "Default:" 9
\f[I]"[{name}][{levelname}] {message}"\f[]
.IP "Description:" 4
-Configuration for standard logging output to stderr.
+Configuration for logging output to stderr.
If this is a simple \f[I]string\f[], it specifies
the format string for logging messages.
@@ -4019,10 +4310,10 @@ the format string for logging messages.
.SS output.logfile
.IP "Type:" 6
-\f[I]Path\f[] or \f[I]Logging Configuration\f[]
-
-.IP "Default:" 9
-\f[I]null\f[]
+.br
+* \f[I]Path\f[]
+.br
+* \f[I]Logging Configuration\f[]
.IP "Description:" 4
File to write logging output to.
@@ -4030,10 +4321,10 @@ File to write logging output to.
.SS output.unsupportedfile
.IP "Type:" 6
-\f[I]Path\f[] or \f[I]Logging Configuration\f[]
-
-.IP "Default:" 9
-\f[I]null\f[]
+.br
+* \f[I]Path\f[]
+.br
+* \f[I]Logging Configuration\f[]
.IP "Description:" 4
File to write external URLs unsupported by *gallery-dl* to.
@@ -4056,7 +4347,7 @@ before outputting them as JSON.
.SH POSTPROCESSOR OPTIONS
.SS classify.mapping
.IP "Type:" 6
-\f[I]object\f[]
+\f[I]object\f[] (directory -> extensions)
.IP "Default:" 9
.. code:: json
@@ -4144,7 +4435,10 @@ or to let it run asynchronously.
.SS exec.command
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Example:" 4
.br
@@ -4193,8 +4487,7 @@ See \f[I]metadata.event\f[] for a list of available events.
Selects how to process metadata.
.br
-* \f[I]"json"\f[]: write metadata using \f[I]json.dump()
-<https://docs.python.org/3/library/json.html#json.dump>\f[]
+* \f[I]"json"\f[]: write metadata using \f[I]json.dump()\f[]
.br
* \f[I]"jsonl"\f[]: write metadata in \f[I]JSON Lines
<https://jsonlines.org/>\f[] format
@@ -4314,13 +4607,11 @@ After downloading all files of a post
* \f[I]object\f[] (field name -> \f[I]format string\f[])
.IP "Example:" 4
-.br
-* .. code:: json
+.. code:: json
["blocked", "watching", "status[creator][name]"]
-.br
-* .. code:: json
+.. code:: json
{
"blocked" : "***",
@@ -4341,7 +4632,10 @@ whose result is assigned to said field name.
.SS metadata.content-format
.IP "Type:" 6
-\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Example:" 4
.br
@@ -4355,6 +4649,24 @@ Custom format string to build the content of metadata files with.
Note: Only applies for \f[I]"mode": "custom"\f[].
+.SS metadata.indent
+.IP "Type:" 6
+.br
+* \f[I]integer\f[]
+.br
+* \f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]4\f[]
+
+.IP "Description:" 4
+Indentation level of JSON output.
+
+See the \f[I]indent\f[] argument of \f[I]json.dump()\f[] for further details.
+
+Note: Only applies for \f[I]"mode": "json"\f[].
+
+
.SS metadata.open
.IP "Type:" 6
\f[I]string\f[]
@@ -4369,19 +4681,7 @@ For example,
use \f[I]"a"\f[] to append to a file's content
or \f[I]"w"\f[] to truncate it.
-See the \f[I]mode\f[] parameter of \f[I]open()\f[] for further details.
-
-
-.SS metadata.private
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Include private fields,
-i.e. fields whose name starts with an underscore.
+See the \f[I]mode\f[] argument of \f[I]open()\f[] for further details.
.SS metadata.encoding
@@ -4394,7 +4694,19 @@ i.e. fields whose name starts with an underscore.
.IP "Description:" 4
Name of the encoding used to encode a file's content.
-See the \f[I]encoding\f[] parameter of \f[I]open()\f[] for further details.
+See the \f[I]encoding\f[] argument of \f[I]open()\f[] for further details.
+
+
+.SS metadata.private
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Include private fields,
+i.e. fields whose name starts with an underscore.
.SS metadata.archive
@@ -4834,8 +5146,8 @@ in Flickr's \f[I]App Garden\f[]
* fill out the form with a random name and description
and click "SUBMIT"
.br
-* copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration
-file
+* copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration file
+as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[]
.SS extractor.reddit.client-id & .user-agent
@@ -4855,9 +5167,10 @@ section of your account's preferences
.br
* copy the client id (third line, under your application's name and
"installed app") and put it in your configuration file
+as \f[I]"client-id"\f[]
.br
* use "\f[I]Python:<application name>:v1.0 (by /u/<username>)\f[]" as
-user-agent and replace \f[I]<application name>\f[] and \f[I]<username>\f[]
+\f[I]user-agent\f[] and replace \f[I]<application name>\f[] and \f[I]<username>\f[]
accordingly (see Reddit's
\f[I]API access rules\f[])
@@ -4878,6 +5191,7 @@ and "Use" to "Non-Commercial"
.br
* copy \f[I]API Key\f[] and \f[I]API Secret\f[]
and put them in your configuration file
+as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[]
.SS extractor.tumblr.api-key & .api-secret
@@ -4901,6 +5215,7 @@ callback URL"
.br
* copy your \f[I]OAuth Consumer Key\f[] and \f[I]Secret Key\f[]
and put them in your configuration file
+as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[]
.SH CUSTOM TYPES
@@ -4952,7 +5267,7 @@ A \f[I]Duration\f[] represents a span of time in seconds.
* If given as a single \f[I]float\f[], it will be used as that exact value.
.br
* If given as a \f[I]list\f[] with 2 floating-point numbers \f[I]a\f[] & \f[I]b\f[] ,
-it will be randomly chosen with uniform distribution such that \f[I]a <= N <=b\f[].
+it will be randomly chosen with uniform distribution such that \f[I]a <= N <= b\f[].
(see \f[I]random.uniform()\f[])
.br
* If given as a \f[I]string\f[], it can either represent a single \f[I]float\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 98974e9..2d2adbb 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -228,6 +228,7 @@
},
"pinterest":
{
+ "domain": "auto",
"sections": true,
"videos": true
},
@@ -236,6 +237,7 @@
"refresh-token": null,
"include": "artworks",
"metadata": false,
+ "metadata-bookmark": false,
"tags": "japanese",
"ugoira": true
},
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 03c1930..17442cc 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.24.2
+Version: 1.24.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -39,13 +39,16 @@ License-File: LICENSE
gallery-dl
==========
-*gallery-dl* is a command-line program to download image galleries and
-collections from several image hosting sites (see `Supported Sites`_).
-It is a cross-platform tool with many configuration options
-and powerful `filenaming capabilities <Formatting_>`_.
+*gallery-dl* is a command-line program
+to download image galleries and collections
+from several image hosting sites
+(see `Supported Sites <docs/supportedsites.md>`__).
+It is a cross-platform tool
+with many `configuration options <docs/configuration.rst>`__
+and powerful `filenaming capabilities <docs/formatting.md>`__.
-|pypi| |build| |gitter|
+|pypi| |build|
.. contents::
@@ -59,7 +62,7 @@ Dependencies
Optional
--------
-- FFmpeg_: Pixiv Ugoira to WebM conversion
+- FFmpeg_: Pixiv Ugoira conversion
- yt-dlp_ or youtube-dl_: Video downloads
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
@@ -103,9 +106,13 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.bin>`__
+
+
+Nightly Builds
+--------------
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -141,6 +148,16 @@ Scoop
scoop install gallery-dl
+Homebrew
+--------
+
+For macOS or Linux users using Homebrew:
+
+.. code:: bash
+
+ brew install gallery-dl
+
+
Usage
=====
@@ -149,9 +166,10 @@ from:
.. code:: bash
- gallery-dl [OPTION]... URL...
+ gallery-dl [OPTIONS]... URLS...
-See also :code:`gallery-dl --help`.
+Use :code:`gallery-dl --help` or see `<docs/options.md>`__
+for a full list of all command-line options.
Examples
@@ -199,13 +217,22 @@ Configuration
Configuration files for *gallery-dl* use a JSON-based file format.
-| For a (more or less) complete example with options set to their default values,
- see gallery-dl.conf_.
-| For a configuration file example with more involved settings and options,
- see gallery-dl-example.conf_.
-| A list of all available configuration options and their
- descriptions can be found in configuration.rst_.
-|
+
+Documentation
+-------------
+
+A list of all available configuration options and their descriptions
+can be found in `<docs/configuration.rst>`__.
+
+| For a default configuration file with available options set to their
+ default values, see `<docs/gallery-dl.conf>`__.
+
+| For a commented example with more involved settings and option usage,
+ see `<docs/gallery-dl-example.conf>`__.
+
+
+Locations
+---------
*gallery-dl* searches for configuration files in the following places:
@@ -214,7 +241,7 @@ Windows:
* ``%USERPROFILE%\gallery-dl\config.json``
* ``%USERPROFILE%\gallery-dl.conf``
- (``%USERPROFILE%`` usually refers to the user's home directory,
+ (``%USERPROFILE%`` usually refers to a user's home directory,
i.e. ``C:\Users\<username>\``)
Linux, macOS, etc.:
@@ -223,12 +250,13 @@ Linux, macOS, etc.:
* ``${HOME}/.config/gallery-dl/config.json``
* ``${HOME}/.gallery-dl.conf``
-Values in later configuration files will override previous ones.
+When run as `executable <Standalone Executable_>`__,
+*gallery-dl* will also look for a ``gallery-dl.conf`` file
+in the same directory as said executable.
-Command line options will override all related settings in the configuration file(s),
-e.g. using ``--write-metadata`` will enable writing metadata using the default values
-for all ``postprocessors.metadata.*`` settings, overriding any specific settings in
-configuration files.
+It is possible to use more than one configuration file at a time.
+In this case, any values from files after the first will get merged
+into the already loaded settings and potentially override previous ones.
Authentication
@@ -258,8 +286,8 @@ and optional for
``twitter``,
and ``zerochan``.
-You can set the necessary information in your configuration file
-(cf. gallery-dl.conf_)
+You can set the necessary information in your
+`configuration file <Configuration_>`__
.. code:: json
@@ -278,8 +306,8 @@ or you can provide them directly via the
.. code:: bash
- gallery-dl -u <username> -p <password> URL
- gallery-dl -o username=<username> -o password=<password> URL
+ gallery-dl -u "<username>" -p "<password>" "URL"
+ gallery-dl -o "username=<username>" -o "password=<password>" "URL"
Cookies
@@ -290,7 +318,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
cookies from a browser login session and input them into *gallery-dl*.
This can be done via the
-`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__
+`cookies <docs/configuration.rst#extractorcookies>`__
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
@@ -301,6 +329,9 @@ option in your configuration file by specifying
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__)
+- | the name of a browser to extract cookies from
+ | (supported browsers are Chromium-based ones, Firefox, and Safari)
+
For example:
.. code:: json
@@ -314,30 +345,43 @@ For example:
"cookies": {
"session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a"
}
+ },
+ "twitter": {
+ "cookies": ["firefox"]
}
}
}
-You can also specify a cookies.txt file with
-the :code:`--cookies` command-line option:
+| You can also specify a cookies.txt file with
+ the :code:`--cookies` command-line option
+| or a browser to extract cookies from with :code:`--cookies-from-browser`:
.. code:: bash
- gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
+ gallery-dl --cookies "$HOME/path/to/cookies.txt" "URL"
+ gallery-dl --cookies-from-browser firefox "URL"
OAuth
-----
-*gallery-dl* supports user authentication via OAuth_ for
-``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``,
+*gallery-dl* supports user authentication via OAuth_ for some extractors.
+This is necessary for
+``pixiv``
+and optional for
+``deviantart``,
+``flickr``,
+``reddit``,
+``smugmug``,
+``tumblr``,
and ``mastodon`` instances.
-This is mostly optional, but grants *gallery-dl* the ability
-to issue requests on your account's behalf and enables it to access resources
-which would otherwise be unavailable to a public user.
-To link your account to *gallery-dl*, start by invoking it with
-``oauth:<sitename>`` as an argument. For example:
+Linking your account to *gallery-dl* grants it the ability to issue requests
+on your account's behalf and enables it to access resources which would
+otherwise be unavailable to a public user.
+
+To do so, start by invoking it with ``oauth:<sitename>`` as an argument.
+For example:
.. code:: bash
@@ -356,13 +400,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
gallery-dl oauth:mastodon:https://mastodon.social/
-
-.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf
-.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
-.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
-.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md
-.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
-
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
.. _pip: https://pip.pypa.io/en/stable/
@@ -373,7 +410,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
-.. _pyOpenSSL: https://pyopenssl.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 556dc49..599a828 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -72,6 +72,7 @@ gallery_dl/extractor/erome.py
gallery_dl/extractor/exhentai.py
gallery_dl/extractor/fallenangels.py
gallery_dl/extractor/fanbox.py
+gallery_dl/extractor/fanleaks.py
gallery_dl/extractor/fantia.py
gallery_dl/extractor/fapachi.py
gallery_dl/extractor/fapello.py
@@ -180,6 +181,7 @@ gallery_dl/extractor/soundgasm.py
gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/subscribestar.py
gallery_dl/extractor/tapas.py
+gallery_dl/extractor/tcbscans.py
gallery_dl/extractor/telegraph.py
gallery_dl/extractor/test.py
gallery_dl/extractor/toyhouse.py
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index ee00bf7..f18cc47 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -980,6 +980,7 @@ def _is_path(value):
def _parse_browser_specification(
browser, profile=None, keyring=None, container=None):
+ browser = browser.lower()
if browser not in SUPPORTED_BROWSERS:
raise ValueError("unsupported browser '{}'".format(browser))
if keyring and keyring not in SUPPORTED_KEYRINGS:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 444075c..f26f6a9 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -38,6 +38,7 @@ modules = [
"exhentai",
"fallenangels",
"fanbox",
+ "fanleaks",
"fantia",
"fapello",
"fapachi",
@@ -135,6 +136,7 @@ modules = [
"speakerdeck",
"subscribestar",
"tapas",
+ "tcbscans",
"telegraph",
"toyhouse",
"tsumino",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index cf332ac..6da6175 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.behance.net/"""
+"""Extractors for https://www.behance.net/"""
from .common import Extractor, Message
from .. import text
@@ -17,6 +17,7 @@ class BehanceExtractor(Extractor):
"""Base class for behance extractors"""
category = "behance"
root = "https://www.behance.net"
+ request_interval = (2.0, 4.0)
def items(self):
for gallery in self.galleries():
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 882c2b3..8283fbc 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -56,8 +56,12 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
files = album["files"]
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ self.log.debug("Falling back to lolisafe API")
self.root = root.replace("://", "://app.", 1)
files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+ # fix file URLs (bunkr..ru -> bunkr.ru) (#3481)
+ for file in files:
+ file["file"] = file["file"].replace("bunkr..", "bunkr.", 1)
else:
for file in files:
file["file"] = file["cdn"] + "/" + file["name"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 4352aa7..ad766da 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -327,6 +327,7 @@ class Extractor():
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
+ self.log.debug("Loading cookies from '%s'", cookies)
self._cookiefile = cookiefile
elif isinstance(cookies, (list, tuple)):
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ef17176..4c93604 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -101,8 +101,8 @@ class DanbooruExtractor(BaseExtractor):
if self.extended_metadata:
template = (
- "{}/posts/{}.json"
- "?only=artist_commentary,children,notes,parent"
+ "{}/posts/{}.json?only=artist_commentary,children,notes,"
+ "parent,uploader"
)
resp = self.request(template.format(self.root, post["id"]))
post.update(resp.json())
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index aa78cfb..aeb2d0a 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -987,13 +987,9 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
_warning = True
def deviations(self):
- eclipse_api = DeviantartEclipseAPI(self)
- if self._warning:
- DeviantartScrapsExtractor._warning = False
- if not self._check_cookies(self.cookienames):
- self.log.warning(
- "No session cookies set: Unable to fetch mature scraps.")
+ self.login()
+ eclipse_api = DeviantartEclipseAPI(self)
for obj in eclipse_api.gallery_scraps(self.user, self.offset):
deviation = obj["deviation"]
deviation_uuid = eclipse_api.deviation_extended_fetch(
@@ -1004,6 +1000,17 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
yield self.api.deviation(deviation_uuid)
+ def login(self):
+ """Login and obtain session cookies"""
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(_login_impl(self, username, password))
+ elif self._warning:
+ self.log.warning(
+ "No session cookies set: Unable to fetch mature scraps.")
+ DeviantartScrapsExtractor._warning = False
+
class DeviantartFollowingExtractor(DeviantartExtractor):
"""Extractor for user's watched users"""
@@ -1513,13 +1520,47 @@ class DeviantartEclipseAPI():
return token
-@cache(maxage=100*365*24*3600, keyarg=0)
+@cache(maxage=100*365*86400, keyarg=0)
def _refresh_token_cache(token):
if token and token[0] == "#":
return None
return token
+@cache(maxage=28*86400, keyarg=1)
+def _login_impl(extr, username, password):
+ extr.log.info("Logging in as %s", username)
+
+ url = "https://www.deviantart.com/users/login"
+ page = extr.request(url).text
+
+ data = {}
+ for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
+ name, _, value = item.partition('" value="')
+ data[name] = value
+
+ challenge = data.get("challenge")
+ if challenge and challenge != "0":
+ extr.log.warning("Login requires solving a CAPTCHA")
+ extr.log.debug(challenge)
+
+ data["username"] = username
+ data["password"] = password
+ data["remember"] = "on"
+
+ extr.sleep(2.0, "login")
+ url = "https://www.deviantart.com/_sisu/do/signin"
+ response = extr.request(url, method="POST", data=data)
+
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return {
+ cookie.name: cookie.value
+ for cookie in extr.session.cookies
+ }
+
+
###############################################################################
# Journal Formats #############################################################
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index f692a90..41431dc 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -69,14 +69,28 @@ class FanboxExtractor(Extractor):
if post["type"] == "article":
post["articleBody"] = content_body.copy()
if "blocks" in content_body:
- content = []
+ content = [] # text content
+ images = [] # image IDs in 'body' order
+
append = content.append
+ append_img = images.append
for block in content_body["blocks"]:
if "text" in block:
append(block["text"])
if "links" in block:
for link in block["links"]:
append(link["url"])
+ if "imageId" in block:
+ append_img(block["imageId"])
+
+ if images and "imageMap" in content_body:
+ # reorder 'imageMap' (#2718)
+ image_map = content_body["imageMap"]
+ content_body["imageMap"] = {
+ image_id: image_map[image_id]
+ for image_id in images
+ }
+
post["content"] = "\n".join(content)
post["date"] = text.parse_datetime(post["publishedDatetime"])
@@ -294,6 +308,10 @@ class FanboxPostExtractor(FanboxExtractor):
r"Thank you for your continued support of FANBOX.$",
},
}),
+ # imageMap file order (#2718)
+ ("https://mochirong.fanbox.cc/posts/3746116", {
+ "url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062",
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py
new file mode 100644
index 0000000..466bb8c
--- /dev/null
+++ b/gallery_dl/extractor/fanleaks.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fanleaks.club/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+class FanleaksExtractor(Extractor):
+ """Base class for Fanleaks extractors"""
+ category = "fanleaks"
+ directory_fmt = ("{category}", "{model}")
+ filename_fmt = "{model_id}_{id}.{extension}"
+ archive_fmt = "{model_id}_{id}"
+ root = "https://fanleaks.club"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.model_id = match.group(1)
+
+ def extract_post(self, url):
+ extr = text.extract_from(self.request(url, notfound="post").text)
+ data = {
+ "model_id": self.model_id,
+ "model" : text.unescape(extr('text-lg">', "</a>")),
+ "id" : text.parse_int(self.id),
+ "type" : extr('type="', '"')[:5] or "photo",
+ }
+ url = extr('src="', '"')
+ yield Message.Directory, data
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class FanleaksPostExtractor(FanleaksExtractor):
+ """Extractor for individual posts on fanleak.club"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
+ test = (
+ ("https://fanleaks.club/selti/880", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/selti/images/selti_0880\.jpg"),
+ "keyword": {
+ "model_id": "selti",
+ "model" : "Selti",
+ "id" : 880,
+ "type" : "photo",
+ },
+ }),
+ ("https://fanleaks.club/daisy-keech/1038", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/daisy-keech/videos/daisy-keech_1038\.mp4"),
+ "keyword": {
+ "model_id": "daisy-keech",
+ "model" : "Daisy Keech",
+ "id" : 1038,
+ "type" : "video",
+ },
+ }),
+ ("https://fanleaks.club/hannahowo/000", {
+ "exception": exception.NotFoundError,
+ }),
+ )
+
+ def __init__(self, match):
+ FanleaksExtractor.__init__(self, match)
+ self.id = match.group(2)
+
+ def items(self):
+ url = "{}/{}/{}".format(self.root, self.model_id, self.id)
+ return self.extract_post(url)
+
+
+class FanleaksModelExtractor(FanleaksExtractor):
+ """Extractor for all posts from a fanleaks model"""
+ subcategory = "model"
+ pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
+ r"/(?!latest/?$)([^/?#]+)/?$")
+ test = (
+ ("https://fanleaks.club/hannahowo", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"),
+ "range" : "1-100",
+ "count" : 100,
+ }),
+ ("https://fanleaks.club/belle-delphine", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/belle-delphine/(images|videos)"
+ r"/belle-delphine_\d+\.\w+"),
+ "range" : "1-100",
+ "count" : 100,
+ }),
+ ("https://fanleaks.club/daisy-keech"),
+ )
+
+ def items(self):
+ page_num = 1
+ page = self.request(
+ self.root + "/" + self.model_id, notfound="model").text
+ data = {
+ "model_id": self.model_id,
+ "model" : text.unescape(
+ text.extr(page, 'mt-4">', "</h1>")),
+ "type" : "photo",
+ }
+ page_url = text.extr(page, "url: '", "'")
+ while True:
+ page = self.request("{}{}".format(page_url, page_num)).text
+ if not page:
+ return
+
+ for item in text.extract_iter(page, '<a href="/', "</a>"):
+ self.id = id = text.extr(item, "/", '"')
+ if "/icon-play.svg" in item:
+ url = "{}/{}/{}".format(self.root, self.model_id, id)
+ yield from self.extract_post(url)
+ continue
+
+ data["id"] = text.parse_int(id)
+ url = text.extr(item, 'src="', '"').replace(
+ "/thumbs/", "/", 1)
+ yield Message.Directory, data
+ yield Message.Url, url, text.nameext_from_url(url, data)
+ page_num += 1
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index d8109e1..8d73949 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -195,7 +195,7 @@ class GelbooruPostExtractor(GelbooruBase,
# notes
("https://gelbooru.com/index.php?page=post&s=view&id=5997331", {
"options": (("notes", True),),
- "keywords": {
+ "keyword": {
"notes": [
{
"body": "Look over this way when you talk~",
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 56bd048..1efbbf0 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,25 +9,37 @@
"""Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
import json
-
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
class ImagefapExtractor(Extractor):
"""Base class for imagefap extractors"""
category = "imagefap"
+ root = "https://www.imagefap.com"
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"
archive_fmt = "{gallery_id}_{image_id}"
- root = "https://www.imagefap.com"
+ request_interval = (2.0, 4.0)
def __init__(self, match):
Extractor.__init__(self, match)
self.session.headers["Referer"] = self.root
+ def request(self, url, **kwargs):
+ response = Extractor.request(self, url, **kwargs)
+
+ if response.history and response.url.endswith("/human-verification"):
+ msg = text.extr(response.text, '<div class="mt-4', '<')
+ if msg:
+ msg = " ".join(msg.partition(">")[2].split())
+ raise exception.StopExtraction("'%s'", msg)
+ self.log.warning("HTTP redirect to %s", response.url)
+
+ return response
+
class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
@@ -41,12 +53,20 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}),
- ("https://www.imagefap.com/gallery/5486966", {
+ ("https://www.imagefap.com/gallery/7876223", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
- "archive": False,
- "count": 62,
+ "keyword": {
+ "count": 44,
+ "gallery_id": 7876223,
+ "image_id": int,
+ "num": int,
+ "tags": ["big ass", "panties", "horny",
+ "pussy", "exposed", "outdoor"],
+ "title": "Kelsi Monroe in lingerie",
+ "uploader": "BdRachel",
+ },
+ "count": 44,
}),
("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -118,12 +138,20 @@ class ImagefapImageExtractor(ImagefapExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)"
test = (
- ("https://www.imagefap.com/photo/1369341772/", {
+ ("https://www.imagefap.com/photo/1962981893", {
"pattern": r"https://cdnh?\.imagefap\.com"
- r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
+ r"/images/full/65/196/1962981893\.jpg",
+ "keyword": {
+ "date": "21/08/2014",
+ "gallery_id": 7876223,
+ "height": 1600,
+ "image_id": 1962981893,
+ "title": "Kelsi Monroe in lingerie",
+ "uploader": "BdRachel",
+ "width": 1066,
+ },
}),
- ("https://beta.imagefap.com/photo/1369341772/"),
+ ("https://beta.imagefap.com/photo/1962981893"),
)
def __init__(self, match):
@@ -159,61 +187,70 @@ class ImagefapImageExtractor(ImagefapExtractor):
})
-class ImagefapUserExtractor(ImagefapExtractor):
- """Extractor for all galleries from a user at imagefap.com"""
- subcategory = "user"
- categorytransfer = True
- pattern = (BASE_PATTERN +
- r"/(?:profile(?:\.php\?user=|/)([^/?#]+)"
- r"|usergallery\.php\?userid=(\d+))")
+class ImagefapFolderExtractor(ImagefapExtractor):
+ """Extractor for imagefap user folders"""
+ subcategory = "folder"
+ pattern = (BASE_PATTERN + r"/(?:organizer/|"
+ r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
+ r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
test = (
- ("https://www.imagefap.com/profile/LucyRae/galleries", {
- "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a",
+ ("https://www.imagefap.com/organizer/409758", {
+ "pattern": r"https://www\.imagefap\.com/gallery/7876223",
+ "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+ "count": 1,
}),
- ("https://www.imagefap.com/usergallery.php?userid=1862791", {
- "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a",
+ (("https://www.imagefap.com/usergallery.php"
+ "?userid=1981976&folderid=409758"), {
+ "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+ }),
+ (("https://www.imagefap.com/usergallery.php"
+ "?user=BdRachel&folderid=409758"), {
+ "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+ }),
+ ("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
+ "pattern": ImagefapGalleryExtractor.pattern,
+ "range": "1-40",
+ }),
+ (("https://www.imagefap.com/usergallery.php"
+ "?userid=1981976&folderid=-1"), {
+ "pattern": ImagefapGalleryExtractor.pattern,
+ "range": "1-40",
+ }),
+ (("https://www.imagefap.com/usergallery.php"
+ "?user=BdRachel&folderid=-1"), {
+ "pattern": ImagefapGalleryExtractor.pattern,
+ "range": "1-40",
}),
- ("https://www.imagefap.com/profile.php?user=LucyRae"),
- ("https://beta.imagefap.com/profile.php?user=LucyRae"),
)
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
- self.user, self.user_id = match.groups()
+ self._id, user, profile, self.folder_id = match.groups()
+ self.user = user or profile
def items(self):
- for folder_id in self.folders():
- for gallery_id, name in self.galleries(folder_id):
- url = "{}/gallery/{}".format(self.root, gallery_id)
- data = {
- "gallery_id": text.parse_int(gallery_id),
- "title" : text.unescape(name),
- "_extractor": ImagefapGalleryExtractor,
- }
- yield Message.Queue, url, data
-
- def folders(self):
- """Return a list of folder_ids of a specific user"""
- if self.user:
- url = "{}/profile/{}/galleries".format(self.root, self.user)
- else:
- url = "{}/usergallery.php?userid={}".format(
- self.root, self.user_id)
-
- response = self.request(url)
- self.user = response.url.split("/")[-2]
- folders = text.extr(response.text, ' id="tgl_all" value="', '"')
- return folders.rstrip("|").split("|")
+ for gallery_id, name in self.galleries(self.folder_id):
+ url = "{}/gallery/{}".format(self.root, gallery_id)
+ data = {
+ "gallery_id": gallery_id,
+ "title" : text.unescape(name),
+ "_extractor": ImagefapGalleryExtractor,
+ }
+ yield Message.Queue, url, data
def galleries(self, folder_id):
- """Yield gallery_ids of a folder"""
+ """Yield gallery IDs and titles of a folder"""
if folder_id == "-1":
- url = "{}/profile/{}/galleries?folderid=-1".format(
- self.root, self.user)
+ if self._id:
+ url = "{}/usergallery.php?userid={}&folderid=-1".format(
+ self.root, self.user)
+ else:
+ url = "{}/profile/{}/galleries?folderid=-1".format(
+ self.root, self.user)
else:
url = "{}/organizer/{}/".format(self.root, folder_id)
- params = {"page": 0}
+ params = {"page": 0}
while True:
extr = text.extract_from(self.request(url, params=params).text)
cnt = 0
@@ -228,3 +265,53 @@ class ImagefapUserExtractor(ImagefapExtractor):
if cnt < 25:
break
params["page"] += 1
+
+
+class ImagefapUserExtractor(ImagefapExtractor):
+ """Extractor for an imagefap user profile"""
+ subcategory = "user"
+ pattern = (BASE_PATTERN +
+ r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
+ r"|usergallery\.php\?userid=(\d+))(?:$|#)")
+ test = (
+ ("https://www.imagefap.com/profile/BdRachel", {
+ "pattern": ImagefapFolderExtractor.pattern,
+ "count": ">= 18",
+ }),
+ ("https://www.imagefap.com/usergallery.php?userid=1862791", {
+ "pattern": r"https://www\.imagefap\.com"
+ r"/profile/LucyRae/galleries\?folderid=-1",
+ "count": 1,
+ }),
+ ("https://www.imagefap.com/profile/BdRachel/galleries"),
+ ("https://www.imagefap.com/profile.php?user=BdRachel"),
+ ("https://beta.imagefap.com/profile.php?user=BdRachel"),
+ )
+
+ def __init__(self, match):
+ ImagefapExtractor.__init__(self, match)
+ self.user, self.user_id = match.groups()
+
+ def items(self):
+ data = {"_extractor": ImagefapFolderExtractor}
+
+ for folder_id in self.folders():
+ if folder_id == "-1":
+ url = "{}/profile/{}/galleries?folderid=-1".format(
+ self.root, self.user)
+ else:
+ url = "{}/organizer/{}/".format(self.root, folder_id)
+ yield Message.Queue, url, data
+
+ def folders(self):
+ """Return a list of folder IDs of a user"""
+ if self.user:
+ url = "{}/profile/{}/galleries".format(self.root, self.user)
+ else:
+ url = "{}/usergallery.php?userid={}".format(
+ self.root, self.user_id)
+
+ response = self.request(url)
+ self.user = response.url.split("/")[-2]
+ folders = text.extr(response.text, ' id="tgl_all" value="', '"')
+ return folders.rstrip("|").split("|")
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 8a61728..541e427 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -67,6 +67,7 @@ class KemonopartyExtractor(Extractor):
headers["Referer"] = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
+ post["_http_validate"] = _validate
post["date"] = text.parse_datetime(
post["published"] or post["added"],
"%a, %d %b %Y %H:%M:%S %Z")
@@ -197,6 +198,11 @@ class KemonopartyExtractor(Extractor):
return dms
+def _validate(response):
+ return (response.headers["content-length"] != "9" and
+ response.content != b"not found")
+
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
@@ -309,6 +315,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968"
r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
}),
+ # invalid file (#3510)
+ ("https://kemono.party/patreon/user/19623797/post/29035449", {
+ "pattern": r"907ba78b4545338d3539683e63ecb51c"
+ r"f51c10adc9dabd86e92bd52339f298b9\.txt",
+ "content": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
+ }),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index bbcf9c0..85e8bb1 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -17,9 +17,13 @@ class LynxchanExtractor(BaseExtractor):
BASE_PATTERN = LynxchanExtractor.update({
+ "bbw-chan": {
+ "root": "https://bbw-chan.nl",
+ "pattern": r"bbw-chan\.nl",
+ },
"kohlchan": {
"root": "https://kohlchan.net",
- "pattern": r"kohlchan\.net"
+ "pattern": r"kohlchan\.net",
},
"endchan": {
"root": None,
@@ -37,6 +41,11 @@ class LynxchanThreadExtractor(LynxchanExtractor):
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
test = (
+ ("https://bbw-chan.nl/bbwdraw/res/499.html", {
+ "pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$",
+ "count": ">= 352",
+ }),
+ ("https://bbw-chan.nl/bbwdraw/res/489.html"),
("https://kohlchan.net/a/res/4594.html", {
"pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
"count": ">= 80",
@@ -78,6 +87,11 @@ class LynxchanBoardExtractor(LynxchanExtractor):
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
test = (
+ ("https://bbw-chan.nl/bbwdraw/", {
+ "pattern": LynxchanThreadExtractor.pattern,
+ "count": ">= 148",
+ }),
+ ("https://bbw-chan.nl/bbwdraw/2.html"),
("https://kohlchan.net/a/", {
"pattern": LynxchanThreadExtractor.pattern,
"count": ">= 100",
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 3dbd5fc..5dc4cb6 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -44,7 +44,10 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
extr = text.extract_from(page)
split = text.split_html
- title = extr('<div class="comic-description">\n<h1>', '</h1>')
+ title = extr('<div class="comic-description">\n', '</h1>').lstrip()
+ if title.startswith("<h1>"):
+ title = title[len("<h1>"):]
+
if not title:
raise exception.NotFoundError("gallery")
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index dfe78ae..f9c6abf 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -451,7 +451,7 @@ class NitterTweetExtractor(NitterExtractor):
}),
# age-restricted (#2354)
("https://nitter.unixfox.eu/mightbecurse/status/1492954264909479936", {
- "keywords": {"date": "dt:2022-02-13 20:10:09"},
+ "keyword": {"date": "dt:2022-02-13 20:10:00"},
"count": 1,
}),
)
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index f786be6..63b16ce 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,13 @@ class PinterestExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
+
+ domain = self.config("domain")
+ if not domain or domain == "auto" :
+ self.root = text.root_from_url(match.group(0))
+ else:
+ self.root = text.ensure_http_scheme(domain)
+
self.api = PinterestAPI(self)
def items(self):
@@ -142,7 +149,7 @@ class PinterestBoardExtractor(PinterestExtractor):
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)"
- "/(?!_saved|_created)([^/?#&]+)/?$")
+ "/(?!_saved|_created|pins/)([^/?#&]+)/?$")
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
@@ -151,7 +158,7 @@ class PinterestBoardExtractor(PinterestExtractor):
# board with sections (#835)
("https://www.pinterest.com/g1952849/stuff/", {
"options": (("sections", True),),
- "count": 5,
+ "count": 4,
}),
# secret board (#1055)
("https://www.pinterest.de/g1952849/secret/", {
@@ -194,11 +201,11 @@ class PinterestUserExtractor(PinterestExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$"
test = (
- ("https://www.pinterest.de/g1952849/", {
+ ("https://www.pinterest.com/g1952849/", {
"pattern": PinterestBoardExtractor.pattern,
"count": ">= 2",
}),
- ("https://www.pinterest.de/g1952849/_saved/"),
+ ("https://www.pinterest.com/g1952849/_saved/"),
)
def __init__(self, match):
@@ -213,15 +220,38 @@ class PinterestUserExtractor(PinterestExtractor):
yield Message.Queue, self.root + url, board
+class PinterestAllpinsExtractor(PinterestExtractor):
+ """Extractor for a user's 'All Pins' feed"""
+ subcategory = "allpins"
+ directory_fmt = ("{category}", "{user}")
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$"
+ test = ("https://www.pinterest.com/g1952849/pins/", {
+ "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
+ r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
+ "count": 7,
+ })
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.user = text.unquote(match.group(1))
+
+ def metadata(self):
+ return {"user": self.user}
+
+ def pins(self):
+ return self.api.user_pins(self.user)
+
+
class PinterestCreatedExtractor(PinterestExtractor):
"""Extractor for a user's created pins"""
subcategory = "created"
directory_fmt = ("{category}", "{user}")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$"
- test = ("https://www.pinterest.com/amazon/_created", {
+ test = ("https://www.pinterest.de/digitalmomblog/_created/", {
"pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
"count": 10,
+ "range": "1-10",
})
def __init__(self, match):
@@ -272,7 +302,7 @@ class PinterestSearchExtractor(PinterestExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)"
- test = ("https://www.pinterest.de/search/pins/?q=nature", {
+ test = ("https://www.pinterest.com/search/pins/?q=nature", {
"range": "1-50",
"count": ">= 50",
})
@@ -357,26 +387,23 @@ class PinterestAPI():
- https://github.com/seregazhuk/php-pinterest-bot
"""
- BASE_URL = "https://www.pinterest.com"
- HEADERS = {
- "Accept" : "application/json, text/javascript, "
- "*/*, q=0.01",
- "Accept-Language" : "en-US,en;q=0.5",
- "Referer" : BASE_URL + "/",
- "X-Requested-With" : "XMLHttpRequest",
- "X-APP-VERSION" : "31461e0",
- "X-CSRFToken" : None,
- "X-Pinterest-AppState": "active",
- "Origin" : BASE_URL,
- }
-
def __init__(self, extractor):
- self.extractor = extractor
-
csrf_token = util.generate_token()
- self.headers = self.HEADERS.copy()
- self.headers["X-CSRFToken"] = csrf_token
+
+ self.extractor = extractor
+ self.root = extractor.root
self.cookies = {"csrftoken": csrf_token}
+ self.headers = {
+ "Accept" : "application/json, text/javascript, "
+ "*/*, q=0.01",
+ "Accept-Language" : "en-US,en;q=0.5",
+ "Referer" : self.root + "/",
+ "X-Requested-With" : "XMLHttpRequest",
+ "X-APP-VERSION" : "0c4af40",
+ "X-CSRFToken" : csrf_token,
+ "X-Pinterest-AppState": "active",
+ "Origin" : self.root,
+ }
def pin(self, pin_id):
"""Query information about a pin"""
@@ -437,6 +464,16 @@ class PinterestAPI():
options = {"board_id": board_id, "add_vase": True}
return self._pagination("BoardRelatedPixieFeed", options)
+ def user_pins(self, user):
+ """Yield all pins from 'user'"""
+ options = {
+ "is_own_profile_pins": False,
+ "username" : user,
+ "field_set_key" : "grid_item",
+ "pin_filter" : None,
+ }
+ return self._pagination("UserPins", options)
+
def user_activity_pins(self, user):
"""Yield pins created by 'user'"""
options = {
@@ -462,7 +499,7 @@ class PinterestAPI():
def _login_impl(self, username, password):
self.extractor.log.info("Logging in as %s", username)
- url = self.BASE_URL + "/resource/UserSessionResource/create/"
+ url = self.root + "/resource/UserSessionResource/create/"
options = {
"username_or_email": username,
"password" : password,
@@ -485,7 +522,7 @@ class PinterestAPI():
}
def _call(self, resource, options):
- url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource)
+ url = "{}/resource/{}Resource/get/".format(self.root, resource)
params = {"data": json.dumps({"options": options}), "source_url": ""}
response = self.extractor.request(
@@ -497,10 +534,11 @@ class PinterestAPI():
except ValueError:
data = {}
- if response.status_code < 400 and not response.history:
+ if response.history:
+ self.root = text.root_from_url(response.url)
+ if response.status_code < 400:
return data
-
- if response.status_code == 404 or response.history:
+ if response.status_code == 404:
resource = self.extractor.subcategory.rpartition("-")[2]
raise exception.NotFoundError(resource)
self.extractor.log.debug("Server response: %s", response.text)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 134361d..a17518f 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -45,7 +45,8 @@ class PixivExtractor(Extractor):
work["tags"] = [tag["name"] for tag in work["tags"]]
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
- userdata = self.config("metadata")
+ meta_user = self.config("metadata")
+ meta_bookmark = self.config("metadata-bookmark")
metadata = self.metadata()
works = self.works()
@@ -61,8 +62,12 @@ class PixivExtractor(Extractor):
del work["image_urls"]
del work["meta_pages"]
- if userdata:
+ if meta_user:
work.update(self.api.user_detail(work["user"]["id"]))
+ if meta_bookmark and work["is_bookmarked"]:
+ detail = self.api.illust_bookmark_detail(work["id"])
+ work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
+ if tag["is_registered"]]
if transform_tags:
transform_tags(work)
work["num"] = 0
@@ -398,6 +403,8 @@ class PixivFavoriteExtractor(PixivExtractor):
# own bookmarks
("https://www.pixiv.net/bookmark.php", {
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
+ "keyword": {"tags_bookmark": ["47", "hitman"]},
+ "options": (("metadata-bookmark", True),),
}),
# own bookmarks with tag (#596)
("https://www.pixiv.net/bookmark.php?tag=foobar", {
@@ -880,6 +887,11 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._call("/v1/illust/detail", params)["illust"]
+ def illust_bookmark_detail(self, illust_id):
+ params = {"illust_id": illust_id}
+ return self._call(
+ "/v2/illust/bookmark/detail", params)["bookmark_detail"]
+
def illust_follow(self, restrict="all"):
params = {"restrict": restrict}
return self._pagination("/v2/illust/follow", params)
@@ -900,9 +912,16 @@ class PixivAppAPI():
return self._pagination("/v1/search/illust", params)
def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
+ """Return illusts bookmarked by a user"""
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
return self._pagination("/v1/user/bookmarks/illust", params)
+ def user_bookmark_tags_illust(self, user_id, restrict="public"):
+ """Return bookmark tags defined by a user"""
+ params = {"user_id": user_id, "restrict": restrict}
+ return self._pagination(
+ "/v1/user/bookmark-tags/illust", params, "bookmark_tags")
+
@memcache(keyarg=1)
def user_detail(self, user_id):
params = {"user_id": user_id}
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 4283081..c35ee74 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -59,7 +59,7 @@ class PoipikuExtractor(Extractor):
"//img.", "//img-org.", 1)
yield Message.Url, url, text.nameext_from_url(url, post)
- if not extr('> show all', '<'):
+ if not extr(' show all(+', '<'):
continue
url = self.root + "/f/ShowAppendFileF.jsp"
@@ -79,6 +79,9 @@ class PoipikuExtractor(Extractor):
page = self.request(
url, method="POST", headers=headers, data=data).json()["html"]
+ if page.startswith("You need to"):
+ self.log.warning("'%s'", page)
+
for thumb in text.extract_iter(
page, 'class="IllustItemThumbImg" src="', '"'):
post["num"] += 1
@@ -162,6 +165,21 @@ class PoipikuPostExtractor(PoipikuExtractor):
"user_name": "wadahito",
},
}),
+ # different warning button style
+ ("https://poipiku.com/3572553/5776587.html", {
+ "pattern": r"https://img-org\.poipiku.com/user_img\d+/003572553"
+ r"/005776587_(\d+_)?\w+\.jpeg$",
+ "count": 3,
+ "keyword": {
+ "count": "3",
+ "description": "ORANGE OASISボスネタバレ",
+ "num": int,
+ "post_category": "SPOILER",
+ "post_id": "5776587",
+ "user_id": "3572553",
+ "user_name": "nagakun",
+ },
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
new file mode 100644
index 0000000..cac5a54
--- /dev/null
+++ b/gallery_dl/extractor/tcbscans.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://onepiecechapters.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+
+
+class TcbscansChapterExtractor(ChapterExtractor):
+ category = "tcbscans"
+ pattern = (r"(?:https?://)?onepiecechapters\.com"
+ r"(/chapters/\d+/[^/?#]+)")
+ root = "https://onepiecechapters.com"
+ test = (
+ (("https://onepiecechapters.com"
+ "/chapters/4708/chainsaw-man-chapter-108"), {
+ "pattern": (r"https://cdn\.[^/]+"
+ r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+ "count" : 17,
+ "keyword": {
+ "manga": "Chainsaw Man",
+ "chapter": 108,
+ "chapter_minor": "",
+ "lang": "en",
+ "language": "English",
+ },
+ }),
+ ("https://onepiecechapters.com/chapters/4716/one-piece-chapter-1065", {
+ "pattern": (r"https://cdn\.[^/]+"
+ r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+ "count" : 18,
+ "keyword": {
+ "manga": "One Piece",
+ "chapter": 1065,
+ "chapter_minor": "",
+ "lang": "en",
+ "language": "English",
+ },
+ }),
+ (("https://onepiecechapters.com/"
+ "chapters/44/ace-novel-manga-adaptation-chapter-1")),
+ )
+
+ def images(self, page):
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, '<img class="fixed-ratio-content" src="', '"')
+ ]
+
+ def metadata(self, page):
+ manga, _, chapter = text.extr(
+ page, 'font-bold mt-8">', "</h1>").rpartition(" - Chapter ")
+ chapter, sep, minor = chapter.partition(".")
+ return {
+ "manga": text.unescape(manga),
+ "chapter": text.parse_int(chapter),
+ "chapter_minor": sep + minor,
+ "lang": "en", "language": "English",
+ }
+
+
+class TcbscansMangaExtractor(MangaExtractor):
+ category = "tcbscans"
+ chapterclass = TcbscansChapterExtractor
+ pattern = (r"(?:https?://)?onepiecechapters\.com"
+ r"(/mangas/\d+/[^/?#]+)")
+ root = "https://onepiecechapters.com"
+ test = (
+ ("https://onepiecechapters.com/mangas/13/chainsaw-man", {
+ "pattern": TcbscansChapterExtractor.pattern,
+ "range" : "1-50",
+ "count" : 50,
+ }),
+ ("https://onepiecechapters.com/mangas/4/jujutsu-kaisen", {
+ "pattern": TcbscansChapterExtractor.pattern,
+ "range" : "1-50",
+ "count" : 50,
+ }),
+ ("https://onepiecechapters.com/mangas/15/hunter-x-hunter"),
+ )
+
+ def chapters(self, page):
+ data = {
+ "manga": text.unescape(text.extr(
+ page, 'class="my-3 font-bold text-3xl">', "</h1>")),
+ "lang": "en", "language": "English",
+ }
+
+ results = []
+ page = text.extr(page, 'class="col-span-2"', 'class="order-1')
+ for chapter in text.extract_iter(page, "<a", "</a>"):
+ url = text.extr(chapter, 'href="', '"')
+ data["title"] = text.unescape(text.extr(
+ chapter, 'text-gray-500">', "</div>"))
+ chapter = text.extr(
+ chapter, 'font-bold">', "</div>").rpartition(" Chapter ")[2]
+ chapter, sep, minor = chapter.partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+ results.append((self.root + url, data.copy()))
+ return results
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index 8e9bf2c..5996268 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -12,7 +12,6 @@ from .. import text
class TelegraphGalleryExtractor(GalleryExtractor):
"""Extractor for articles from telegra.ph"""
-
category = "telegraph"
root = "https://telegra.ph"
directory_fmt = ("{category}", "{slug}")
@@ -52,6 +51,23 @@ class TelegraphGalleryExtractor(GalleryExtractor):
"url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
},
}),
+ ("https://telegra.ph/Vsyo-o-druzyah-moej-sestricy-05-27", {
+ "url": "c1f3048e5d94bee53af30a8c27f70b0d3b15438e",
+ "pattern": r"^https://pith1\.ru/uploads"
+ r"/posts/2019-12/\d+_\d+\.jpg$",
+ "keyword": {
+ "author": "Shotacon - заходи сюда",
+ "caption": "",
+ "count": 19,
+ "date": "dt:2022-05-27 16:17:27",
+ "description": "",
+ "num_formatted": r"re:^\d{2}$",
+ "post_url": "https://telegra.ph"
+ "/Vsyo-o-druzyah-moej-sestricy-05-27",
+ "slug": "Vsyo-o-druzyah-moej-sestricy-05-27",
+ "title": "Всё о друзьях моей сестрицы",
+ },
+ }),
)
def metadata(self, page):
@@ -79,11 +95,12 @@ class TelegraphGalleryExtractor(GalleryExtractor):
result = []
for figure in figures:
- src, pos = text.extract(figure, 'src="', '"')
- if src.startswith("/embed/"):
+ url, pos = text.extract(figure, 'src="', '"')
+ if url.startswith("/embed/"):
continue
+ elif url.startswith("/"):
+ url = self.root + url
caption, pos = text.extract(figure, "<figcaption>", "<", pos)
- url = self.root + src
num += 1
result.append((url, {
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22aa78e..c2d8247 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -39,6 +39,7 @@ class TwitterExtractor(Extractor):
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self.cards_blacklist = self.config("cards-blacklist")
+ self.syndication = self.config("syndication")
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -75,11 +76,6 @@ class TwitterExtractor(Extractor):
else:
data = tweet
- if seen_tweets is not None:
- if data["id_str"] in seen_tweets:
- continue
- seen_tweets.add(data["id_str"])
-
if not self.retweets and "retweeted_status_id_str" in data:
self.log.debug("Skipping %s (retweet)", data["id_str"])
continue
@@ -97,6 +93,13 @@ class TwitterExtractor(Extractor):
self.log.debug("Skipping %s (reply)", data["id_str"])
continue
+ if seen_tweets is not None:
+ if data["id_str"] in seen_tweets:
+ self.log.debug(
+ "Skipping %s (previously seen)", data["id_str"])
+ continue
+ seen_tweets.add(data["id_str"])
+
files = []
if "extended_entities" in data:
self._extract_media(
@@ -220,14 +223,16 @@ class TwitterExtractor(Extractor):
def _extract_twitpic(self, tweet, files):
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
- if "//twitpic.com/" in url and "/photos/" not in url:
- response = self.request(url, fatal=False)
- if response.status_code >= 400:
- continue
- url = text.extr(
- response.text, 'name="twitter:image" value="', '"')
- if url:
- files.append({"url": url})
+ if "//twitpic.com/" not in url or "/photos/" in url:
+ continue
+ if url.startswith("http:"):
+ url = "https" + url[4:]
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ continue
+ url = text.extr(response.text, 'name="twitter:image" value="', '"')
+ if url:
+ files.append({"url": url})
def _transform_tweet(self, tweet):
if "author" in tweet:
@@ -299,6 +304,9 @@ class TwitterExtractor(Extractor):
if "legacy" in user:
user = user["legacy"]
+ elif "statuses_count" not in user and self.syndication == "extended":
+ # try to fetch extended user data
+ user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
uget = user.get
entities = user["entities"]
@@ -361,18 +369,22 @@ class TwitterExtractor(Extractor):
def _expand_tweets(self, tweets):
seen = set()
for tweet in tweets:
-
- if "legacy" in tweet:
- cid = tweet["legacy"]["conversation_id_str"]
- else:
- cid = tweet["conversation_id_str"]
-
- if cid not in seen:
- seen.add(cid)
- try:
- yield from self.api.tweet_detail(cid)
- except Exception:
- yield tweet
+ obj = tweet["legacy"] if "legacy" in tweet else tweet
+ cid = obj.get("conversation_id_str")
+ if not cid:
+ tid = obj["id_str"]
+ self.log.warning(
+ "Unable to expand %s (no 'conversation_id')", tid)
+ continue
+ if cid in seen:
+ self.log.debug(
+ "Skipping expansion of %s (previously seen)", cid)
+ continue
+ seen.add(cid)
+ try:
+ yield from self.api.tweet_detail(cid)
+ except Exception:
+ yield tweet
def _make_tweet(self, user, id_str, url, timestamp):
return {
@@ -772,7 +784,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# age-restricted (#2354)
("https://twitter.com/mightbecursed/status/1492954264909479936", {
"options": (("syndication", True),),
- "keywords": {"date": "dt:2022-02-13 20:10:09"},
+ "keyword": {"date": "dt:2022-02-13 20:10:09"},
"count": 1,
}),
# media alt texts / descriptions (#2617)
@@ -991,7 +1003,7 @@ class TwitterAPI():
}
self._nsfw_warning = True
- self._syndication = extractor.config("syndication")
+ self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
cookies = extractor.session.cookies
@@ -1516,6 +1528,12 @@ class TwitterAPI():
else:
retweet_id = None
+ # assume 'conversation_id' is the same as 'id' when the tweet
+ # is not a reply
+ if "conversation_id_str" not in tweet and \
+ "in_reply_to_status_id_str" not in tweet:
+ tweet["conversation_id_str"] = tweet["id_str"]
+
tweet["created_at"] = text.parse_datetime(
tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
"%a %b %d %H:%M:%S +0000 %Y")
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9b6831b..5692452 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -110,7 +110,7 @@ class VkPhotosExtractor(VkExtractor):
"pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1"
r"/[\w-]+\.jpg\?size=\d+x\d+&quality=96&type=album",
"count": ">= 35",
- "keywords": {
+ "keyword": {
"id": r"re:\d+",
"user": {
"id": "398982326",
@@ -122,12 +122,11 @@ class VkPhotosExtractor(VkExtractor):
}),
("https://vk.com/cosplayinrussia", {
"range": "15-25",
- "keywords": {
+ "keyword": {
"id": r"re:\d+",
"user": {
"id" : "-165740836",
- "info": "Предложка открыта, кидайте ваши косплейчики. При "
- "правильном оформлении они будут опубликованы",
+ "info": str,
"name": "cosplayinrussia",
"nick": "Косплей | Cosplay 18+",
},
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 74da615..03fd909 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -111,13 +111,15 @@ class ZerochanTagExtractor(ZerochanExtractor):
test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
"pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
"count": "> 24",
- "keywords": {
+ "keyword": {
"extension": r"re:jpg|png",
- "file_url": "",
- "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+ "file_url": r"re:https://static\.zerochan\.net"
+ r"/.+\.full\.\d+\.(jpg|png)",
+ "filename": r"re:(Perth\.\(Kantai\.Collection\)"
+ r"|Kantai\.Collection)\.full\.\d+",
"height": r"re:^\d+$",
"id": r"re:^\d+$",
- "name": "Perth (Kantai Collection)",
+ "name": r"re:(Perth \(Kantai Collection\)|Kantai Collection)",
"search_tags": "Perth (Kantai Collection)",
"size": r"re:^\d+k$",
"width": r"re:^\d+$",
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 8a45330..58bf48d 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -60,14 +60,21 @@ class StringFormatter():
- "u": calls str.upper
- "c": calls str.capitalize
- "C": calls string.capwords
- - "j". calls json.dumps
+ - "g": calls text.slugify()
+ - "j": calls json.dumps
- "t": calls str.strip
+ - "T": calls util.datetime_to_timestamp_string()
- "d": calls text.parse_timestamp
- - "U": calls urllib.parse.unescape
+ - "s": calls str()
- "S": calls util.to_string()
- - "T": calls util.to_timestamü()
+ - "U": calls urllib.parse.unescape
+ - "r": calls repr()
+ - "a": calls ascii()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
+ # Go to _CONVERSIONS and _SPECIFIERS below to se all of them, read:
+ # https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
+
Extra Format Specifiers:
- "?<before>/<after>/":
Adds <before> and <after> to the actual value if it evaluates to True.
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 91e9169..32cac79 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -325,7 +325,7 @@ def build_parser():
configuration.add_argument(
"--ignore-config",
dest="load_config", action="store_false",
- help="Do not read the default configuration files",
+ help="Do not read default configuration files",
)
authentication = parser.add_argument_group("Authentication Options")
@@ -349,7 +349,7 @@ def build_parser():
selection.add_argument(
"--download-archive",
dest="archive", metavar="FILE", action=ConfigAction,
- help=("Record all downloaded files in the archive file and "
+ help=("Record all downloaded or skipped files in FILE and "
"skip downloading any file already in it"),
)
selection.add_argument(
@@ -367,19 +367,20 @@ def build_parser():
selection.add_argument(
"--range",
dest="image-range", metavar="RANGE", action=ConfigAction,
- help=("Index-range(s) specifying which images to download. "
- "For example '5-10' or '1,3-5,10-'"),
+ help=("Index range(s) specifying which files to download. "
+ "These can be either a constant value, range, or slice "
+ "(e.g. '5', '8-20', or '1:24:3')"),
)
selection.add_argument(
"--chapter-range",
dest="chapter-range", metavar="RANGE", action=ConfigAction,
- help=("Like '--range', but applies to manga-chapters "
+ help=("Like '--range', but applies to manga chapters "
"and other delegated URLs"),
)
selection.add_argument(
"--filter",
dest="image-filter", metavar="EXPR", action=ConfigAction,
- help=("Python expression controlling which images to download. "
+ help=("Python expression controlling which files to download. "
"Files for which the expression evaluates to False are ignored. "
"Available keys are the filename-specific ones listed by '-K'. "
"Example: --filter \"image_width >= 1000 and "
@@ -388,7 +389,7 @@ def build_parser():
selection.add_argument(
"--chapter-filter",
dest="chapter-filter", metavar="EXPR", action=ConfigAction,
- help=("Like '--filter', but applies to manga-chapters "
+ help=("Like '--filter', but applies to manga chapters "
"and other delegated URLs"),
)
@@ -472,7 +473,7 @@ def build_parser():
dest="postprocessors", metavar="CMD",
action=AppendCommandAction, const={"name": "exec"},
help=("Execute CMD for each downloaded file. "
- "Example: --exec 'convert {} {}.png && rm {}'"),
+ "Example: --exec \"convert {} {}.png && rm {}\""),
)
postprocessor.add_argument(
"--exec-after",
@@ -480,7 +481,7 @@ def build_parser():
action=AppendCommandAction, const={
"name": "exec", "event": "finalize"},
help=("Execute CMD after all files were downloaded successfully. "
- "Example: --exec-after 'cd {} && convert * ../doc.pdf'"),
+ "Example: --exec-after \"cd {} && convert * ../doc.pdf\""),
)
postprocessor.add_argument(
"-P", "--postprocessor",
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 23d5bc8..543fb10 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -714,74 +714,71 @@ def chain_predicates(predicates, url, kwdict):
class RangePredicate():
- """Predicate; True if the current index is in the given range"""
+ """Predicate; True if the current index is in the given range(s)"""
+
def __init__(self, rangespec):
- self.ranges = self.optimize_range(self.parse_range(rangespec))
+ self.ranges = ranges = self._parse(rangespec)
self.index = 0
- if self.ranges:
- self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1]
+ if ranges:
+ # technically wrong, but good enough for now
+ # and evaluating min/max for a large range is slow
+ self.lower = min(r.start for r in ranges)
+ self.upper = max(r.stop for r in ranges) - 1
else:
- self.lower, self.upper = 0, 0
+ self.lower = self.upper = 0
- def __call__(self, url, _):
- self.index += 1
+ def __call__(self, _url, _kwdict):
+ self.index = index = self.index + 1
- if self.index > self.upper:
+ if index > self.upper:
raise exception.StopExtraction()
- for lower, upper in self.ranges:
- if lower <= self.index <= upper:
+ for range in self.ranges:
+ if index in range:
return True
return False
@staticmethod
- def parse_range(rangespec):
+ def _parse(rangespec):
"""Parse an integer range string and return the resulting ranges
Examples:
- parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
- parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)]
+ _parse("-2,4,6-8,10-") -> [(1,3), (4,5), (6,9), (10,INTMAX)]
+ _parse(" - 3 , 4- 4, 2-6") -> [(1,4), (4,5), (2,7)]
+ _parse("1:2,4:8:2") -> [(1,1), (4,7,2)]
"""
ranges = []
+ append = ranges.append
- for group in rangespec.split(","):
+ if isinstance(rangespec, str):
+ rangespec = rangespec.split(",")
+
+ for group in rangespec:
if not group:
continue
- first, sep, last = group.partition("-")
- if not sep:
- beg = end = int(first)
- else:
- beg = int(first) if first.strip() else 1
- end = int(last) if last.strip() else sys.maxsize
- ranges.append((beg, end) if beg <= end else (end, beg))
- return ranges
+ elif ":" in group:
+ start, _, stop = group.partition(":")
+ stop, _, step = stop.partition(":")
+ append(range(
+ int(start) if start.strip() else 1,
+ int(stop) if stop.strip() else sys.maxsize,
+ int(step) if step.strip() else 1,
+ ))
+
+ elif "-" in group:
+ start, _, stop = group.partition("-")
+ append(range(
+ int(start) if start.strip() else 1,
+ int(stop) + 1 if stop.strip() else sys.maxsize,
+ ))
- @staticmethod
- def optimize_range(ranges):
- """Simplify/Combine a parsed list of ranges
-
- Examples:
- optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
- optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
- """
- if len(ranges) <= 1:
- return ranges
-
- ranges.sort()
- riter = iter(ranges)
- result = []
+ else:
+ start = int(group)
+ append(range(start, start+1))
- beg, end = next(riter)
- for lower, upper in riter:
- if lower > end+1:
- result.append((beg, end))
- beg, end = lower, upper
- elif upper > end:
- end = upper
- result.append((beg, end))
- return result
+ return ranges
class UniquePredicate():
@@ -802,6 +799,8 @@ class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True"""
def __init__(self, expr, target="image"):
+ if not isinstance(expr, str):
+ expr = "(" + ") and (".join(expr) + ")"
name = "<{} filter>".format(target)
self.expr = compile_expression(expr, name)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d832185..5e3b507 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.24.2"
+__version__ = "1.24.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index db313c3..7b71349 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -199,13 +199,27 @@ def parse_command_line(module, argv):
action += args
yield action
- if getattr(opts, "parse_metadata", None) is None:
- opts.parse_metadata = []
- if opts.metafromtitle is not None:
- opts.parse_metadata.append("title:%s" % opts.metafromtitle)
- opts.metafromtitle = None
- opts.parse_metadata = list(itertools.chain.from_iterable(map(
- metadataparser_actions, opts.parse_metadata)))
+ parse_metadata = getattr(opts, "parse_metadata", None)
+ if isinstance(parse_metadata, dict):
+ if opts.metafromtitle is not None:
+ if "pre_process" not in parse_metadata:
+ parse_metadata["pre_process"] = []
+ parse_metadata["pre_process"].append(
+ "title:%s" % opts.metafromtitle)
+ opts.parse_metadata = {
+ k: list(itertools.chain.from_iterable(map(
+ metadataparser_actions, v)))
+ for k, v in parse_metadata.items()
+ }
+ else:
+ if parse_metadata is None:
+ parse_metadata = []
+ if opts.metafromtitle is not None:
+ parse_metadata.append("title:%s" % opts.metafromtitle)
+ opts.parse_metadata = list(itertools.chain.from_iterable(map(
+ metadataparser_actions, parse_metadata)))
+
+ opts.metafromtitle = None
else:
opts.parse_metadata = ()
diff --git a/test/test_extractor.py b/test/test_extractor.py
index de43ff7..144c6f9 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -96,9 +96,10 @@ class TestExtractorModule(unittest.TestCase):
test_urls = []
# collect testcase URLs
+ append = test_urls.append
for extr in extractor.extractors():
for testcase in extr._get_tests():
- test_urls.append((testcase[0], extr))
+ append((testcase[0], extr))
# iterate over all testcase URLs
for url, extr1 in test_urls:
@@ -114,20 +115,23 @@ class TestExtractorModule(unittest.TestCase):
match = extr2.pattern.match(url)
if match:
- matches.append(match)
+ matches.append((match, extr2))
# fail if more or less than 1 match happened
if len(matches) > 1:
msg = "'{}' gets matched by more than one pattern:".format(url)
- for match in matches:
- msg += "\n- "
- msg += match.re.pattern
+ for match, extr in matches:
+ msg += "\n\n- {}:\n{}".format(
+ extr.__name__, match.re.pattern)
self.fail(msg)
- if len(matches) < 1:
+ elif len(matches) < 1:
msg = "'{}' isn't matched by any pattern".format(url)
self.fail(msg)
+ else:
+ self.assertIs(extr1, matches[0][1], url)
+
def test_docstrings(self):
"""ensure docstring uniqueness"""
for extr1 in extractor.extractors():
diff --git a/test/test_util.py b/test/test_util.py
index 4b8f9ae..67fdf60 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -24,39 +24,62 @@ from gallery_dl import util, text, exception # noqa E402
class TestRange(unittest.TestCase):
- def test_parse_range(self, f=util.RangePredicate.parse_range):
- self.assertEqual(
- f(""),
- [])
- self.assertEqual(
- f("1-2"),
- [(1, 2)])
+ def test_parse_empty(self, f=util.RangePredicate._parse):
+ self.assertEqual(f(""), [])
+ self.assertEqual(f([]), [])
+
+ def test_parse_digit(self, f=util.RangePredicate._parse):
+ self.assertEqual(f("2"), [range(2, 3)])
+
self.assertEqual(
- f("-"),
- [(1, sys.maxsize)])
+ f("2, 3, 4"),
+ [range(2, 3),
+ range(3, 4),
+ range(4, 5)],
+ )
+
+ def test_parse_range(self, f=util.RangePredicate._parse):
+ self.assertEqual(f("1-2"), [range(1, 3)])
+ self.assertEqual(f("2-"), [range(2, sys.maxsize)])
+ self.assertEqual(f("-3"), [range(1, 4)])
+ self.assertEqual(f("-"), [range(1, sys.maxsize)])
+
self.assertEqual(
f("-2,4,6-8,10-"),
- [(1, 2), (4, 4), (6, 8), (10, sys.maxsize)])
+ [range(1, 3),
+ range(4, 5),
+ range(6, 9),
+ range(10, sys.maxsize)],
+ )
self.assertEqual(
f(" - 3 , 4- 4, 2-6"),
- [(1, 3), (4, 4), (2, 6)])
+ [range(1, 4),
+ range(4, 5),
+ range(2, 7)],
+ )
+
+ def test_parse_slice(self, f=util.RangePredicate._parse):
+ self.assertEqual(f("2:4") , [range(2, 4)])
+ self.assertEqual(f("3::") , [range(3, sys.maxsize)])
+ self.assertEqual(f(":4:") , [range(1, 4)])
+ self.assertEqual(f("::5") , [range(1, sys.maxsize, 5)])
+ self.assertEqual(f("::") , [range(1, sys.maxsize)])
+ self.assertEqual(f("2:3:4"), [range(2, 3, 4)])
- def test_optimize_range(self, f=util.RangePredicate.optimize_range):
- self.assertEqual(
- f([]),
- [])
- self.assertEqual(
- f([(2, 4)]),
- [(2, 4)])
- self.assertEqual(
- f([(2, 4), (6, 8), (10, 12)]),
- [(2, 4), (6, 8), (10, 12)])
self.assertEqual(
- f([(2, 4), (4, 6), (5, 8)]),
- [(2, 8)])
+ f("2:4, 4:, :4, :4:, ::4"),
+ [range(2, 4),
+ range(4, sys.maxsize),
+ range(1, 4),
+ range(1, 4),
+ range(1, sys.maxsize, 4)],
+ )
self.assertEqual(
- f([(1, 1), (2, 2), (3, 6), (8, 9)]),
- [(1, 6), (8, 9)])
+ f(" : 3 , 4: 4, 2:6"),
+ [range(1, 3),
+ range(4, 4),
+ range(2, 6)],
+ )
class TestPredicate(unittest.TestCase):
@@ -68,7 +91,7 @@ class TestPredicate(unittest.TestCase):
for i in range(6):
self.assertTrue(pred(dummy, dummy))
with self.assertRaises(exception.StopExtraction):
- bool(pred(dummy, dummy))
+ pred(dummy, dummy)
pred = util.RangePredicate("1, 3, 5")
self.assertTrue(pred(dummy, dummy))
@@ -77,11 +100,11 @@ class TestPredicate(unittest.TestCase):
self.assertFalse(pred(dummy, dummy))
self.assertTrue(pred(dummy, dummy))
with self.assertRaises(exception.StopExtraction):
- bool(pred(dummy, dummy))
+ pred(dummy, dummy)
pred = util.RangePredicate("")
with self.assertRaises(exception.StopExtraction):
- bool(pred(dummy, dummy))
+ pred(dummy, dummy)
def test_unique_predicate(self):
dummy = None
@@ -116,6 +139,14 @@ class TestPredicate(unittest.TestCase):
with self.assertRaises(exception.FilterError):
util.FilterPredicate("b > 1")(url, {"a": 2})
+ pred = util.FilterPredicate(["a < 3", "b < 4", "c < 5"])
+ self.assertTrue(pred(url, {"a": 2, "b": 3, "c": 4}))
+ self.assertFalse(pred(url, {"a": 3, "b": 3, "c": 4}))
+ self.assertFalse(pred(url, {"a": 2, "b": 4, "c": 4}))
+ self.assertFalse(pred(url, {"a": 2, "b": 3, "c": 5}))
+ with self.assertRaises(exception.FilterError):
+ pred(url, {"a": 2})
+
def test_build_predicate(self):
pred = util.build_predicate([])
self.assertIsInstance(pred, type(lambda: True))
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index eedb4f9..a273604 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -262,11 +262,21 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
def test_metadata_from_title(self):
opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"])
+
+ try:
+ legacy = (self.module.version.__version__ < "2023.01.01")
+ except AttributeError:
+ legacy = True
+
+ actions = [self.module.MetadataFromFieldPP.to_action(
+ "title:%(artist)s - %(title)s")]
+ if not legacy:
+ actions = {"pre_process": actions}
+
self.assertEqual(opts["postprocessors"][0], {
- "key": "MetadataParser",
- "when": "pre_process",
- "actions": [self.module.MetadataFromFieldPP.to_action(
- "title:%(artist)s - %(title)s")],
+ "key" : "MetadataParser",
+ "when" : "pre_process",
+ "actions": actions,
})