diff options
| author | 2023-01-11 04:09:13 -0500 | |
|---|---|---|
| committer | 2023-01-11 04:09:13 -0500 | |
| commit | fe385c3ff784ba3d19454a35446502c0ec295893 (patch) | |
| tree | 897982793ef2a0c0f349044bf4cf803ccd483e6e | |
| parent | ebdfcd3cd3f76534a590ba08933ff7ea54813316 (diff) | |
New upstream version 1.24.3.upstream/1.24.3
41 files changed, 1622 insertions, 570 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 700efb1..a62a8ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +## 1.24.3 - 2023-01-10 +### Additions +- [danbooru] extract `uploader` metadata ([#3457](https://github.com/mikf/gallery-dl/issues/3457)) +- [deviantart] initial implementation of username & password login for `scraps` ([#1029](https://github.com/mikf/gallery-dl/issues/1029)) +- [fanleaks] add `post` and `model` extractors ([#3468](https://github.com/mikf/gallery-dl/issues/3468), [#3474](https://github.com/mikf/gallery-dl/issues/3474)) +- [imagefap] add `folder` extractor ([#3504](https://github.com/mikf/gallery-dl/issues/3504)) +- [lynxchan] support `bbw-chan.nl` ([#3456](https://github.com/mikf/gallery-dl/issues/3456), [#3463](https://github.com/mikf/gallery-dl/issues/3463)) +- [pinterest] support `All Pins` boards ([#2855](https://github.com/mikf/gallery-dl/issues/2855), [#3484](https://github.com/mikf/gallery-dl/issues/3484)) +- [pinterest] add `domain` option ([#3484](https://github.com/mikf/gallery-dl/issues/3484)) +- [pixiv] implement `metadata-bookmark` option ([#3417](https://github.com/mikf/gallery-dl/issues/3417)) +- [tcbscans] add `chapter` and `manga` extractors ([#3189](https://github.com/mikf/gallery-dl/issues/3189)) +- [twitter] implement `syndication=extended` ([#3483](https://github.com/mikf/gallery-dl/issues/3483)) +- implement slice notation for `range` options ([#918](https://github.com/mikf/gallery-dl/issues/918), [#2865](https://github.com/mikf/gallery-dl/issues/2865)) +- allow `filter` options to be a list of expressions +### Fixes +- [behance] use delay between requests ([#2507](https://github.com/mikf/gallery-dl/issues/2507)) +- [bunkr] fix URLs returned by API ([#3481](https://github.com/mikf/gallery-dl/issues/3481)) +- [fanbox] return `imageMap` files in order ([#2718](https://github.com/mikf/gallery-dl/issues/2718)) +- [imagefap] use delay between requests ([#1140](https://github.com/mikf/gallery-dl/issues/1140)) +- [imagefap] warn about redirects to `/human-verification` ([#1140](https://github.com/mikf/gallery-dl/issues/1140)) +- [kemonoparty] reject invalid/empty files ([#3510](https://github.com/mikf/gallery-dl/issues/3510)) +- [myhentaigallery] handle whitespace before title tag ([#3503](https://github.com/mikf/gallery-dl/issues/3503)) +- [poipiku] fix extraction for a different warning button style ([#3493](https://github.com/mikf/gallery-dl/issues/3493), [#3460](https://github.com/mikf/gallery-dl/issues/3460)) +- [poipiku] warn about login requirements +- [telegraph] fix file URLs ([#3506](https://github.com/mikf/gallery-dl/issues/3506)) +- [twitter] fix crash when using `expand` and `syndication` ([#3473](https://github.com/mikf/gallery-dl/issues/3473)) +- [twitter] apply tweet type checks before uniqueness check ([#3439](https://github.com/mikf/gallery-dl/issues/3439), [#3455](https://github.com/mikf/gallery-dl/issues/3455)) +- [twitter] force `https://` for TwitPic URLs ([#3449](https://github.com/mikf/gallery-dl/issues/3449)) +- [ytdl] adapt to yt-dlp changes +- updste and improve documentation ([#3453](https://github.com/mikf/gallery-dl/issues/3453), [#3462](https://github.com/mikf/gallery-dl/issues/3462), [#3496](https://github.com/mikf/gallery-dl/issues/3496)) + ## 1.24.2 - 2022-12-18 ### Additions - [2chen] support `.club` URLs ([#3406](https://github.com/mikf/gallery-dl/issues/3406)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.24.2 +Version: 1.24.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -39,13 +39,16 @@ License-File: LICENSE gallery-dl ========== -*gallery-dl* is a command-line program to download image galleries and -collections from several image hosting sites (see `Supported Sites`_). -It is a cross-platform tool with many configuration options -and powerful `filenaming capabilities <Formatting_>`_. +*gallery-dl* is a command-line program +to download image galleries and collections +from several image hosting sites +(see `Supported Sites <docs/supportedsites.md>`__). +It is a cross-platform tool +with many `configuration options <docs/configuration.rst>`__ +and powerful `filenaming capabilities <docs/formatting.md>`__. -|pypi| |build| |gitter| +|pypi| |build| .. contents:: @@ -59,7 +62,7 @@ Dependencies Optional -------- -- FFmpeg_: Pixiv Ugoira to WebM conversion +- FFmpeg_: Pixiv Ugoira conversion - yt-dlp_ or youtube-dl_: Video downloads - PySocks_: SOCKS proxy support - brotli_ or brotlicffi_: Brotli compression support @@ -103,9 +106,13 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.bin>`__ + + +Nightly Builds +-------------- | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -141,6 +148,16 @@ Scoop scoop install gallery-dl +Homebrew +-------- + +For macOS or Linux users using Homebrew: + +.. code:: bash + + brew install gallery-dl + + Usage ===== @@ -149,9 +166,10 @@ from: .. code:: bash - gallery-dl [OPTION]... URL... + gallery-dl [OPTIONS]... URLS... -See also :code:`gallery-dl --help`. +Use :code:`gallery-dl --help` or see `<docs/options.md>`__ +for a full list of all command-line options. Examples @@ -199,13 +217,22 @@ Configuration Configuration files for *gallery-dl* use a JSON-based file format. -| For a (more or less) complete example with options set to their default values, - see gallery-dl.conf_. -| For a configuration file example with more involved settings and options, - see gallery-dl-example.conf_. -| A list of all available configuration options and their - descriptions can be found in configuration.rst_. -| + +Documentation +------------- + +A list of all available configuration options and their descriptions +can be found in `<docs/configuration.rst>`__. + +| For a default configuration file with available options set to their + default values, see `<docs/gallery-dl.conf>`__. + +| For a commented example with more involved settings and option usage, + see `<docs/gallery-dl-example.conf>`__. + + +Locations +--------- *gallery-dl* searches for configuration files in the following places: @@ -214,7 +241,7 @@ Windows: * ``%USERPROFILE%\gallery-dl\config.json`` * ``%USERPROFILE%\gallery-dl.conf`` - (``%USERPROFILE%`` usually refers to the user's home directory, + (``%USERPROFILE%`` usually refers to a user's home directory, i.e. ``C:\Users\<username>\``) Linux, macOS, etc.: @@ -223,12 +250,13 @@ Linux, macOS, etc.: * ``${HOME}/.config/gallery-dl/config.json`` * ``${HOME}/.gallery-dl.conf`` -Values in later configuration files will override previous ones. +When run as `executable <Standalone Executable_>`__, +*gallery-dl* will also look for a ``gallery-dl.conf`` file +in the same directory as said executable. -Command line options will override all related settings in the configuration file(s), -e.g. using ``--write-metadata`` will enable writing metadata using the default values -for all ``postprocessors.metadata.*`` settings, overriding any specific settings in -configuration files. +It is possible to use more than one configuration file at a time. +In this case, any values from files after the first will get merged +into the already loaded settings and potentially override previous ones. Authentication @@ -258,8 +286,8 @@ and optional for ``twitter``, and ``zerochan``. -You can set the necessary information in your configuration file -(cf. gallery-dl.conf_) +You can set the necessary information in your +`configuration file <Configuration_>`__ .. code:: json @@ -278,8 +306,8 @@ or you can provide them directly via the .. code:: bash - gallery-dl -u <username> -p <password> URL - gallery-dl -o username=<username> -o password=<password> URL + gallery-dl -u "<username>" -p "<password>" "URL" + gallery-dl -o "username=<username>" -o "password=<password>" "URL" Cookies @@ -290,7 +318,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the cookies from a browser login session and input them into *gallery-dl*. This can be done via the -`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__ +`cookies <docs/configuration.rst#extractorcookies>`__ option in your configuration file by specifying - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon @@ -301,6 +329,9 @@ option in your configuration file by specifying | (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__, in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__) +- | the name of a browser to extract cookies from + | (supported browsers are Chromium-based ones, Firefox, and Safari) + For example: .. code:: json @@ -314,30 +345,43 @@ For example: "cookies": { "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" } + }, + "twitter": { + "cookies": ["firefox"] } } } -You can also specify a cookies.txt file with -the :code:`--cookies` command-line option: +| You can also specify a cookies.txt file with + the :code:`--cookies` command-line option +| or a browser to extract cookies from with :code:`--cookies-from-browser`: .. code:: bash - gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + gallery-dl --cookies "$HOME/path/to/cookies.txt" "URL" + gallery-dl --cookies-from-browser firefox "URL" OAuth ----- -*gallery-dl* supports user authentication via OAuth_ for -``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, +*gallery-dl* supports user authentication via OAuth_ for some extractors. +This is necessary for +``pixiv`` +and optional for +``deviantart``, +``flickr``, +``reddit``, +``smugmug``, +``tumblr``, and ``mastodon`` instances. -This is mostly optional, but grants *gallery-dl* the ability -to issue requests on your account's behalf and enables it to access resources -which would otherwise be unavailable to a public user. -To link your account to *gallery-dl*, start by invoking it with -``oauth:<sitename>`` as an argument. For example: +Linking your account to *gallery-dl* grants it the ability to issue requests +on your account's behalf and enables it to access resources which would +otherwise be unavailable to a public user. + +To do so, start by invoking it with ``oauth:<sitename>`` as an argument. +For example: .. code:: bash @@ -356,13 +400,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with gallery-dl oauth:mastodon:https://mastodon.social/ - -.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf -.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf -.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst -.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md -.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md - .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ .. _pip: https://pip.pypa.io/en/stable/ @@ -373,7 +410,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _PySocks: https://pypi.org/project/PySocks/ .. _brotli: https://github.com/google/brotli .. _brotlicffi: https://github.com/python-hyper/brotlicffi -.. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install @@ -2,13 +2,16 @@ gallery-dl ========== -*gallery-dl* is a command-line program to download image galleries and -collections from several image hosting sites (see `Supported Sites`_). -It is a cross-platform tool with many configuration options -and powerful `filenaming capabilities <Formatting_>`_. +*gallery-dl* is a command-line program +to download image galleries and collections +from several image hosting sites +(see `Supported Sites <docs/supportedsites.md>`__). +It is a cross-platform tool +with many `configuration options <docs/configuration.rst>`__ +and powerful `filenaming capabilities <docs/formatting.md>`__. -|pypi| |build| |gitter| +|pypi| |build| .. contents:: @@ -22,7 +25,7 @@ Dependencies Optional -------- -- FFmpeg_: Pixiv Ugoira to WebM conversion +- FFmpeg_: Pixiv Ugoira conversion - yt-dlp_ or youtube-dl_: Video downloads - PySocks_: SOCKS proxy support - brotli_ or brotlicffi_: Brotli compression support @@ -66,9 +69,13 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.bin>`__ + + +Nightly Builds +-------------- | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -104,6 +111,16 @@ Scoop scoop install gallery-dl +Homebrew +-------- + +For macOS or Linux users using Homebrew: + +.. code:: bash + + brew install gallery-dl + + Usage ===== @@ -112,9 +129,10 @@ from: .. code:: bash - gallery-dl [OPTION]... URL... + gallery-dl [OPTIONS]... URLS... -See also :code:`gallery-dl --help`. +Use :code:`gallery-dl --help` or see `<docs/options.md>`__ +for a full list of all command-line options. Examples @@ -162,13 +180,22 @@ Configuration Configuration files for *gallery-dl* use a JSON-based file format. -| For a (more or less) complete example with options set to their default values, - see gallery-dl.conf_. -| For a configuration file example with more involved settings and options, - see gallery-dl-example.conf_. -| A list of all available configuration options and their - descriptions can be found in configuration.rst_. -| + +Documentation +------------- + +A list of all available configuration options and their descriptions +can be found in `<docs/configuration.rst>`__. + +| For a default configuration file with available options set to their + default values, see `<docs/gallery-dl.conf>`__. + +| For a commented example with more involved settings and option usage, + see `<docs/gallery-dl-example.conf>`__. + + +Locations +--------- *gallery-dl* searches for configuration files in the following places: @@ -177,7 +204,7 @@ Windows: * ``%USERPROFILE%\gallery-dl\config.json`` * ``%USERPROFILE%\gallery-dl.conf`` - (``%USERPROFILE%`` usually refers to the user's home directory, + (``%USERPROFILE%`` usually refers to a user's home directory, i.e. ``C:\Users\<username>\``) Linux, macOS, etc.: @@ -186,12 +213,13 @@ Linux, macOS, etc.: * ``${HOME}/.config/gallery-dl/config.json`` * ``${HOME}/.gallery-dl.conf`` -Values in later configuration files will override previous ones. +When run as `executable <Standalone Executable_>`__, +*gallery-dl* will also look for a ``gallery-dl.conf`` file +in the same directory as said executable. -Command line options will override all related settings in the configuration file(s), -e.g. using ``--write-metadata`` will enable writing metadata using the default values -for all ``postprocessors.metadata.*`` settings, overriding any specific settings in -configuration files. +It is possible to use more than one configuration file at a time. +In this case, any values from files after the first will get merged +into the already loaded settings and potentially override previous ones. Authentication @@ -221,8 +249,8 @@ and optional for ``twitter``, and ``zerochan``. -You can set the necessary information in your configuration file -(cf. gallery-dl.conf_) +You can set the necessary information in your +`configuration file <Configuration_>`__ .. code:: json @@ -241,8 +269,8 @@ or you can provide them directly via the .. code:: bash - gallery-dl -u <username> -p <password> URL - gallery-dl -o username=<username> -o password=<password> URL + gallery-dl -u "<username>" -p "<password>" "URL" + gallery-dl -o "username=<username>" -o "password=<password>" "URL" Cookies @@ -253,7 +281,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the cookies from a browser login session and input them into *gallery-dl*. This can be done via the -`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__ +`cookies <docs/configuration.rst#extractorcookies>`__ option in your configuration file by specifying - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon @@ -264,6 +292,9 @@ option in your configuration file by specifying | (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__, in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__) +- | the name of a browser to extract cookies from + | (supported browsers are Chromium-based ones, Firefox, and Safari) + For example: .. code:: json @@ -277,30 +308,43 @@ For example: "cookies": { "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" } + }, + "twitter": { + "cookies": ["firefox"] } } } -You can also specify a cookies.txt file with -the :code:`--cookies` command-line option: +| You can also specify a cookies.txt file with + the :code:`--cookies` command-line option +| or a browser to extract cookies from with :code:`--cookies-from-browser`: .. code:: bash - gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + gallery-dl --cookies "$HOME/path/to/cookies.txt" "URL" + gallery-dl --cookies-from-browser firefox "URL" OAuth ----- -*gallery-dl* supports user authentication via OAuth_ for -``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, +*gallery-dl* supports user authentication via OAuth_ for some extractors. +This is necessary for +``pixiv`` +and optional for +``deviantart``, +``flickr``, +``reddit``, +``smugmug``, +``tumblr``, and ``mastodon`` instances. -This is mostly optional, but grants *gallery-dl* the ability -to issue requests on your account's behalf and enables it to access resources -which would otherwise be unavailable to a public user. -To link your account to *gallery-dl*, start by invoking it with -``oauth:<sitename>`` as an argument. For example: +Linking your account to *gallery-dl* grants it the ability to issue requests +on your account's behalf and enables it to access resources which would +otherwise be unavailable to a public user. + +To do so, start by invoking it with ``oauth:<sitename>`` as an argument. +For example: .. code:: bash @@ -319,13 +363,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with gallery-dl oauth:mastodon:https://mastodon.social/ - -.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf -.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf -.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst -.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md -.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md - .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ .. _pip: https://pip.pypa.io/en/stable/ @@ -336,7 +373,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _PySocks: https://pypi.org/project/PySocks/ .. _brotli: https://github.com/google/brotli .. _brotlicffi: https://github.com/python-hyper/brotlicffi -.. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 13ee2ea..1125b36 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -48,17 +48,17 @@ _arguments -C -S \ {-c,--config}'[Additional configuration files]':'<file>':_files \ --config-yaml'[==SUPPRESS==]':'<file>':_files \ {-o,--option}'[Additional "<key>=<value>" option values]':'<opt>' \ ---ignore-config'[Do not read the default configuration files]' \ +--ignore-config'[Do not read default configuration files]' \ {-u,--username}'[Username to login with]':'<user>' \ {-p,--password}'[Password belonging to the given username]':'<pass>' \ --netrc'[Enable .netrc authentication data]' \ ---download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it]':'<file>':_files \ +--download-archive'[Record all downloaded or skipped files in FILE and skip downloading any file already in it]':'<file>':_files \ {-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \ {-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \ ---range'[Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"]':'<range>' \ ---chapter-range'[Like "--range", but applies to manga-chapters and other delegated URLs]':'<range>' \ ---filter'[Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \ ---chapter-filter'[Like "--filter", but applies to manga-chapters and other delegated URLs]':'<expr>' \ +--range'[Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. "5", "8-20", or "1:24:3")]':'<range>' \ +--chapter-range'[Like "--range", but applies to manga chapters and other delegated URLs]':'<range>' \ +--filter'[Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \ +--chapter-filter'[Like "--filter", but applies to manga chapters and other delegated URLs]':'<expr>' \ --zip'[Store downloaded files in a ZIP archive]' \ --ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \ --ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \ diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index 50ad132..986d9df 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -42,17 +42,17 @@ complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate v complete -c gallery-dl -r -F -s 'c' -l 'config' -d 'Additional configuration files' complete -c gallery-dl -r -F -l 'config-yaml' -d '==SUPPRESS==' complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional "<key>=<value>" option values' -complete -c gallery-dl -l 'ignore-config' -d 'Do not read the default configuration files' +complete -c gallery-dl -l 'ignore-config' -d 'Do not read default configuration files' complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with' complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username' complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data' -complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded files in the archive file and skip downloading any file already in it' +complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded or skipped files in FILE and skip downloading any file already in it' complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped' complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped' -complete -c gallery-dl -x -l 'range' -d 'Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"' -complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but applies to manga-chapters and other delegated URLs' -complete -c gallery-dl -x -l 'filter' -d 'Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"' -complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies to manga-chapters and other delegated URLs' +complete -c gallery-dl -x -l 'range' -d 'Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. "5", "8-20", or "1:24:3")' +complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but applies to manga chapters and other delegated URLs' +complete -c gallery-dl -x -l 'filter' -d 'Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"' +complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies to manga chapters and other delegated URLs' complete -c gallery-dl -l 'zip' -d 'Store downloaded files in a ZIP archive' complete -c gallery-dl -l 'ugoira-conv' -d 'Convert Pixiv Ugoira to WebM (requires FFmpeg)' complete -c gallery-dl -l 'ugoira-conv-lossless' -d 'Convert Pixiv Ugoira to WebM in VP9 lossless mode' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index d85b1c9..e88dd4f 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2022-12-18" "1.24.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-01-10" "1.24.3" "gallery-dl Manual" .\" disable hyphenation .nh @@ -144,7 +144,7 @@ Additional configuration files Additional '<key>=<value>' option values .TP .B "\-\-ignore\-config" -Do not read the default configuration files +Do not read default configuration files .TP .B "\-u, \-\-username" \f[I]USER\f[] Username to login with @@ -156,7 +156,7 @@ Password belonging to the given username Enable .netrc authentication data .TP .B "\-\-download\-archive" \f[I]FILE\f[] -Record all downloaded files in the archive file and skip downloading any file already in it +Record all downloaded or skipped files in FILE and skip downloading any file already in it .TP .B "\-A, \-\-abort" \f[I]N\f[] Stop current extractor run after N consecutive file downloads were skipped @@ -165,16 +165,16 @@ Stop current extractor run after N consecutive file downloads were skipped Stop current and parent extractor runs after N consecutive file downloads were skipped .TP .B "\-\-range" \f[I]RANGE\f[] -Index-range(s) specifying which images to download. For example '5-10' or '1,3-5,10-' +Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. '5', '8-20', or '1:24:3') .TP .B "\-\-chapter\-range" \f[I]RANGE\f[] -Like '--range', but applies to manga-chapters and other delegated URLs +Like '--range', but applies to manga chapters and other delegated URLs .TP .B "\-\-filter" \f[I]EXPR\f[] -Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '-K'. Example: --filter "image_width >= 1000 and rating in ('s', 'q')" +Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '-K'. Example: --filter "image_width >= 1000 and rating in ('s', 'q')" .TP .B "\-\-chapter\-filter" \f[I]EXPR\f[] -Like '--filter', but applies to manga-chapters and other delegated URLs +Like '--filter', but applies to manga chapters and other delegated URLs .TP .B "\-\-zip" Store downloaded files in a ZIP archive @@ -201,10 +201,10 @@ Write image tags to separate text files Set file modification times according to 'date' metadata .TP .B "\-\-exec" \f[I]CMD\f[] -Execute CMD for each downloaded file. Example: --exec 'convert {} {}.png && rm {}' +Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}" .TP .B "\-\-exec\-after" \f[I]CMD\f[] -Execute CMD after all files were downloaded successfully. Example: --exec-after 'cd {} && convert * ../doc.pdf' +Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf" .TP .B "\-P, \-\-postprocessor" \f[I]NAME\f[] Activate the specified post processor diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 36b2c84..ff0067b 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2022-12-18" "1.24.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-01-10" "1.24.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -75,16 +75,17 @@ those as makeshift comments by settings their values to arbitrary strings. .SH EXTRACTOR OPTIONS .SS extractor.*.filename .IP "Type:" 6 -\f[I]string\f[] or \f[I]object\f[] +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] (condition -> \f[I]format string\f[]) .IP "Example:" 4 -.br -* .. code:: json +.. code:: json "{manga}_c{chapter}_{page:>03}.{extension}" -.br -* .. code:: json +.. code:: json { "extension == 'mp4'": "{id}_video.{extension}", @@ -135,16 +136,17 @@ a valid filename extension. .SS extractor.*.directory .IP "Type:" 6 -\f[I]list\f[] of \f[I]strings\f[] or \f[I]object\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]object\f[] (condition -> \f[I]format strings\f[]) .IP "Example:" 4 -.br -* .. code:: json +.. code:: json ["{category}", "{manga}", "c{chapter} - {title}"] -.br -* .. code:: json +.. code:: json { "'nature' in content": ["Nature Pictures"], @@ -190,7 +192,10 @@ for any spawned child extractors. .SS extractor.*.parent-metadata .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]false\f[] @@ -226,7 +231,10 @@ Share number of skipped downloads between parent and child extractors. .SS extractor.*.path-restrict .IP "Type:" 6 -\f[I]string\f[] or \f[I]object\f[] +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] (character -> replacement character(s)) .IP "Default:" 9 \f[I]"auto"\f[] @@ -324,7 +332,7 @@ prefixed with \f[I]\\\\?\\\f[] to work around the 260 characters path length lim .SS extractor.*.extension-map .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (extension -> replacement) .IP "Default:" 9 .. code:: json @@ -343,7 +351,10 @@ A JSON \f[I]object\f[] mapping filename extensions to their replacements. .SS extractor.*.skip .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -435,8 +446,12 @@ Specifying a username and password is required for and optional for .br +* \f[I]aibooru\f[] (*) +.br * \f[I]aryion\f[] .br +* \f[I]atfbooru\f[] (*) +.br * \f[I]danbooru\f[] (*) .br * \f[I]e621\f[] (*) @@ -475,7 +490,7 @@ These values can also be specified via the \f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or by using a \f[I].netrc\f[] file. (see Authentication_) -(*) The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be +(*) The password value for these sites should be the API key found in your user profile, not the actual account password. @@ -492,10 +507,12 @@ Enable the use of \f[I].netrc\f[] authentication data. .SS extractor.*.cookies .IP "Type:" 6 -\f[I]Path\f[] or \f[I]object\f[] or \f[I]list\f[] - -.IP "Default:" 9 -\f[I]null\f[] +.br +* \f[I]Path\f[] +.br +* \f[I]object\f[] (name -> value) +.br +* \f[I]list\f[] .IP "Description:" 4 Source to read additional cookies from. This can be @@ -545,14 +562,17 @@ Source to read additional cookies from. This can be \f[I]true\f[] .IP "Description:" 4 -If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] to a cookies.txt +If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] of a cookies.txt file and it can be opened and parsed without errors, update its contents with cookies received during data extraction. .SS extractor.*.proxy .IP "Type:" 6 -\f[I]string\f[] or \f[I]object\f[] +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] (scheme -> proxy) .IP "Default:" 9 \f[I]null\f[] @@ -580,8 +600,8 @@ Example: "http://10.20.1.128": "http://10.10.1.10:5323" } -Note: All proxy URLs should include a scheme, -otherwise \f[I]http://\f[] is assumed. +Note: If a proxy URLs does not include a scheme, +\f[I]http://\f[] is assumed. .SS extractor.*.source-address @@ -619,8 +639,9 @@ User-Agent header value to be used for HTTP requests. Setting this value to \f[I]"browser"\f[] will try to automatically detect and use the User-Agent used by the system's default browser. -Note: This option has no effect on pixiv extractors, -as these need specific values to function correctly. +Note: This option has no effect on +pixiv, e621, and mangadex +extractors, as these need specific values to function correctly. .SS extractor.*.browser @@ -650,13 +671,13 @@ browser would use HTTP/2. .SS extractor.*.keywords .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (name -> value) .IP "Example:" 4 {"type": "Pixel Art", "type_id": 123} .IP "Description:" 4 -Additional key-value pairs to be added to each metadata dictionary. +Additional name-value pairs to be added to each metadata dictionary. .SS extractor.*.keywords-default @@ -696,11 +717,12 @@ with a \f[I]metadata\f[] post processor, etc. \f[I]null\f[] .IP "Description:" 4 -Insert a reference to the current \f[I]PathFormat\f[] +Insert a reference to the current +\f[I]PathFormat\f[] data structure into metadata dictionaries as the given name. For example, setting this option to \f[I]"gdl_path"\f[] would make it possible -to access the current file's filename as \f[I]"[gdl_path.filename}"\f[]. +to access the current file's filename as \f[I]"{gdl_path.filename}"\f[]. .SS extractor.*.http-metadata @@ -904,7 +926,10 @@ This value gets internally used as the \f[I]timeout\f[] parameter for the .SS extractor.*.verify .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -947,18 +972,40 @@ Use fallback download URLs when a download fails. .SS extractor.*.image-range .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] -.IP "Example:" 4 +.IP "Examples:" 4 .br -* "10-20" +* \f[I]"10-20"\f[] .br -* "-5, 10, 30-50, 100-" +* \f[I]"-5, 10, 30-50, 100-"\f[] +.br +* \f[I]"10:21, 30:51:2, :5, 100:"\f[] +.br +* \f[I]["-5", "10", "30-50", "100-"]\f[] .IP "Description:" 4 -Index-range(s) specifying which images to download. +Index range(s) selecting which files to download. -Note: The index of the first image is \f[I]1\f[]. +These can be specified as + +.br +* index: \f[I]3\f[] (file number 3) +.br +* range: \f[I]2-4\f[] (files 2, 3, and 4) +.br +* \f[I]slice\f[]: \f[I]3:8:2\f[] (files 3, 5, and 7) + +Arguments for range and slice notation are optional +.br +and will default to begin (\f[I]1\f[]) or end (\f[I]sys.maxsize\f[]) if omitted. +For example \f[I]5-\f[], \f[I]5:\f[], and \f[I]5::\f[] all mean "Start at file number 5". +.br + +Note: The index of the first file is \f[I]1\f[]. .SS extractor.*.chapter-range @@ -967,41 +1014,46 @@ Note: The index of the first image is \f[I]1\f[]. .IP "Description:" 4 Like \f[I]image-range\f[], -but applies to delegated URLs like manga-chapters, etc. +but applies to delegated URLs like manga chapters, etc. .SS extractor.*.image-filter .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] -.IP "Example:" 4 +.IP "Examples:" 4 .br -* "width >= 1200 and width/height > 1.2" +* \f[I]"re.search(r'foo(bar)+', description)"\f[] .br -* "re.search(r'foo(bar)+', description)" +* \f[I]["width >= 1200", "width/height > 1.2"]\f[] .IP "Description:" 4 Python expression controlling which files to download. -Files for which the expression evaluates to \f[I]False\f[] are ignored. -.br -Available keys are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[]. -.br +A file only gets downloaded when *all* of the given expressions evaluate to \f[I]True\f[]. + +Available values are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[]. .SS extractor.*.chapter-filter .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] -.IP "Example:" 4 +.IP "Examples:" 4 .br -* "lang == 'en'" +* \f[I]"lang == 'en'"\f[] .br -* "language == 'French' and 10 <= chapter < 20" +* \f[I]["language == 'French'", "10 <= chapter < 20"]\f[] .IP "Description:" 4 Like \f[I]image-filter\f[], -but applies to delegated URLs like manga-chapters, etc. +but applies to delegated URLs like manga chapters, etc. .SS extractor.*.image-unique @@ -1025,7 +1077,7 @@ current extractor run. .IP "Description:" 4 Like \f[I]image-unique\f[], -but applies to delegated URLs like manga-chapters, etc. +but applies to delegated URLs like manga chapters, etc. .SS extractor.*.date-format @@ -1041,6 +1093,12 @@ date-min and date-max. See \f[I]strptime\f[] for a list of formatting directives. +Note: Despite its name, this option does **not** control how +\f[I]{date}\f[] metadata fields are formatted. +To use a different formatting for those values other than the default +\f[I]%Y-%m-%d %H:%M:%S\f[], put \f[I]strptime\f[] formatting directives +after a colon \f[I]:\f[], for example \f[I]{date:%Y%m%d}\f[]. + .SH EXTRACTOR-SPECIFIC OPTIONS .SS extractor.artstation.external @@ -1096,7 +1154,7 @@ descend into subfolders .SS extractor.bbc.width .IP "Type:" 6 -\f[I]int\f[] +\f[I]integer\f[] .IP "Default:" 9 \f[I]1920\f[] @@ -1156,14 +1214,18 @@ follow the \f[I]source\f[] and download from there if possible. \f[I]false\f[] .IP "Description:" 4 -Extract additional metadata (notes, artist commentary, parent, children) +Extract additional metadata +(notes, artist commentary, parent, children, uploader) -Note: This requires 1 additional HTTP request for each post. +Note: This requires 1 additional HTTP request per post. .SS extractor.danbooru.threshold .IP "Type:" 6 -\f[I]string\f[] or \f[I]int\f[] +.br +* \f[I]string\f[] +.br +* \f[I]integer\f[] .IP "Default:" 9 \f[I]"auto"\f[] @@ -1324,13 +1386,19 @@ belongs to a group or a regular user. .SS extractor.deviantart.include .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"gallery"\f[] .IP "Example:" 4 -"favorite,journal,scraps" or ["favorite", "journal", "scraps"] +.br +* "favorite,journal,scraps" +.br +* ["favorite", "journal", "scraps"] .IP "Description:" 4 A (comma-separated) list of subcategories to include @@ -1339,7 +1407,7 @@ when processing a user profile. Possible values are \f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], \f[I]"favorite"\f[]. -You can use \f[I]"all"\f[] instead of listing all values separately. +It is possible to use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.deviantart.journals @@ -1389,7 +1457,10 @@ Request extended metadata for deviation objects to additionally provide .SS extractor.deviantart.original .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -1519,7 +1590,10 @@ Selects an alternative source to download files from. .SS extractor.fanbox.embeds .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -1562,7 +1636,10 @@ Extract and download videos. .SS extractor.flickr.size-max .IP "Type:" 6 -\f[I]integer\f[] or \f[I]string\f[] +.br +* \f[I]integer\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]null\f[] @@ -1608,13 +1685,19 @@ Follow external URLs linked in descriptions. .SS extractor.furaffinity.include .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"gallery"\f[] .IP "Example:" 4 -"scraps,favorite" or ["scraps", "favorite"] +.br +* "scraps,favorite" +.br +* ["scraps", "favorite"] .IP "Description:" 4 A (comma-separated) list of subcategories to include @@ -1623,7 +1706,7 @@ when processing a user profile. Possible values are \f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"favorite"\f[]. -You can use \f[I]"all"\f[] instead of listing all values separately. +It is possible to use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.furaffinity.layout @@ -1672,9 +1755,9 @@ even ones without a \f[I]generic:\f[] prefix. .SS extractor.gfycat.format .IP "Type:" 6 .br -* \f[I]list\f[] of \f[I]strings\f[] -.br * \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]["mp4", "webm", "mobile", "gif"]\f[] @@ -1733,13 +1816,19 @@ Recursively download files from subfolders. .SS extractor.hentaifoundry.include .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"pictures"\f[] .IP "Example:" 4 -"scraps,stories" or ["scraps", "stories"] +.br +* "scraps,stories" +.br +* ["scraps", "stories"] .IP "Description:" 4 A (comma-separated) list of subcategories to include @@ -1748,7 +1837,7 @@ when processing a user profile. Possible values are \f[I]"pictures"\f[], \f[I]"scraps"\f[], \f[I]"stories"\f[], \f[I]"favorite"\f[]. -You can use \f[I]"all"\f[] instead of listing all values separately. +It is possible to use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.hitomi.format @@ -1769,7 +1858,10 @@ but is most likely going to fail with \f[I]403 Forbidden\f[] errors. .SS extractor.imgur.mp4 .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -1818,13 +1910,19 @@ Selects which API endpoints to use. .SS extractor.instagram.include .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"posts"\f[] .IP "Example:" 4 -"stories,highlights,posts" or ["stories", "highlights", "posts"] +.br +* "stories,highlights,posts" +.br +* ["stories", "highlights", "posts"] .IP "Description:" 4 A (comma-separated) list of subcategories to include @@ -1838,7 +1936,7 @@ Possible values are \f[I]"highlights"\f[], \f[I]"avatar"\f[]. -You can use \f[I]"all"\f[] instead of listing all values separately. +It is possible to use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.instagram.previews @@ -1884,6 +1982,8 @@ Download video files. .IP "Description:" 4 Extract \f[I]comments\f[] metadata. +Note: This requires 1 additional HTTP request per post. + .SS extractor.kemonoparty.duplicates .IP "Type:" 6 @@ -2019,7 +2119,7 @@ The server to use for API requests. .SS extractor.mangadex.api-parameters .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (name -> value) .IP "Example:" 4 {"order[updatedAt]": "desc"} @@ -2054,7 +2154,24 @@ to filter chapters by. List of acceptable content ratings for returned chapters. -.SS extractor.mastodon.reblogs +.SS extractor.[mastodon].access-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +The \f[I]access-token\f[] value you get from \f[I]linking your account to +gallery-dl\f[]. + +Note: gallery-dl comes with built-in tokens for \f[I]mastodon.social\f[], +\f[I]pawoo\f[] and \f[I]baraag\f[]. For other instances, you need to obtain an +\f[I]access-token\f[] in order to use usernames in place of numerical +user IDs. + + +.SS extractor.[mastodon].reblogs .IP "Type:" 6 \f[I]bool\f[] @@ -2065,7 +2182,7 @@ List of acceptable content ratings for returned chapters. Fetch media from reblogged posts. -.SS extractor.mastodon.replies +.SS extractor.[mastodon].replies .IP "Type:" 6 \f[I]bool\f[] @@ -2076,7 +2193,7 @@ Fetch media from reblogged posts. Fetch media from replies to other posts. -.SS extractor.mastodon.text-posts +.SS extractor.[mastodon].text-posts .IP "Type:" 6 \f[I]bool\f[] @@ -2129,13 +2246,19 @@ the next smaller one gets chosen. .SS extractor.newgrounds.include .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"art"\f[] .IP "Example:" 4 -"movies,audio" or ["movies", "audio"] +.br +* "movies,audio" +.br +* ["movies", "audio"] .IP "Description:" 4 A (comma-separated) list of subcategories to include @@ -2144,12 +2267,15 @@ when processing a user profile. Possible values are \f[I]"art"\f[], \f[I]"audio"\f[], \f[I]"games"\f[], \f[I]"movies"\f[]. -You can use \f[I]"all"\f[] instead of listing all values separately. +It is possible to use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.nijie.include .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"illustration,doujin"\f[] @@ -2161,7 +2287,7 @@ when processing a user profile. Possible values are \f[I]"illustration"\f[], \f[I]"doujin"\f[], \f[I]"favorite"\f[], \f[I]"nuita"\f[]. -You can use \f[I]"all"\f[] instead of listing all values separately. +It is possible to use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.nitter.quoted @@ -2188,7 +2314,10 @@ Fetch media from Retweets. .SS extractor.nitter.videos .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -2254,7 +2383,7 @@ Host name / IP address to bind to during OAuth authorization. .IP "Description:" 4 Port number to listen on during OAuth authorization. -Note: All redirects will go to http://localhost:6414/, regardless +Note: All redirects will go to port \f[I]6414\f[], regardless of the port specified here. You'll have to manually adjust the port number in your browser's address bar when using a different port than the default. @@ -2331,6 +2460,20 @@ Extract inline images. Extract media from reblogged posts. +.SS extractor.pinterest.domain +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Specifies the domain used by \f[I]pinterest\f[] extractors. + +Setting this option to \f[I]"auto"\f[] +uses the same domain as a given input URL. + + .SS extractor.pinterest.sections .IP "Type:" 6 \f[I]bool\f[] @@ -2379,6 +2522,17 @@ Possible values are It is possible to use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.pixiv.refresh-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +The \f[I]refresh-token\f[] value you get +from running \f[I]gallery-dl oauth:pixiv\f[] (see OAuth_) or +by using a third-party tool like +\f[I]gppt\f[]. + + .SS extractor.pixiv.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -2390,6 +2544,21 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately. Fetch extended \f[I]user\f[] metadata. +.SS extractor.pixiv.metadata-bookmark +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +For works bookmarked by +\f[I]your own account\f[], +fetch bookmark tags as \f[I]tags_bookmark\f[] metadata. + +Note: This requires 1 additional API call per bookmarked post. + + .SS extractor.pixiv.work.related .IP "Type:" 6 \f[I]bool\f[] @@ -2534,7 +2703,7 @@ time required when scanning a subreddit. Retrieve additional comments by resolving the \f[I]more\f[] comment stubs in the base comment tree. -This requires 1 additional API call for every 100 extra comments. +Note: This requires 1 additional API call for every 100 extra comments. .SS extractor.reddit.date-min & .date-max @@ -2599,7 +2768,10 @@ at 600 requests every 10 minutes/600 seconds. .SS extractor.reddit.videos .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -2620,9 +2792,9 @@ video extraction and download .SS extractor.redgifs.format .IP "Type:" 6 .br -* \f[I]list\f[] of \f[I]strings\f[] -.br * \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]["hd", "sd", "gif"]\f[] @@ -2707,7 +2879,10 @@ Download thumbnails. .SS extractor.skeb.search.filters .IP "Type:" 6 -\f[I]list\f[] or \f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]["genre:art", "genre:voice", "genre:novel", "genre:video", "genre:music", "genre:correction"]\f[] @@ -2822,7 +2997,10 @@ Selects how to handle exceeding the daily API rate limit. .SS extractor.tumblr.reblogs .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -2839,13 +3017,19 @@ is from the same blog .SS extractor.tumblr.posts .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"all"\f[] .IP "Example:" 4 -"video,audio,link" or ["video", "audio", "link"] +.br +* "video,audio,link" +.br +* ["video", "audio", "link"] .IP "Description:" 4 A (comma-separated) list of post types to extract images, etc. from. @@ -2853,7 +3037,7 @@ A (comma-separated) list of post types to extract images, etc. from. Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[], \f[I]video\f[], \f[I]audio\f[], \f[I]photo\f[], \f[I]chat\f[]. -You can use \f[I]"all"\f[] instead of listing all types separately. +It is possible to use \f[I]"all"\f[] instead of listing all types separately. .SS extractor.tumblr.fallback-delay @@ -2909,7 +3093,10 @@ See \f[I]Filters\f[] for details. .SS extractor.twitter.cards .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]false\f[] @@ -2993,6 +3180,8 @@ with enabled \f[I]conversations\f[] option for each Tweet in said timeline. Note: This requires at least 1 additional API call per initial Tweet. +Age-restricted replies cannot be expanded when using the +\f[I]syndication\f[] API. .SS extractor.twitter.size @@ -3013,13 +3202,32 @@ Known available sizes are .SS extractor.twitter.syndication .IP "Type:" 6 -\f[I]bool\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]false\f[] .IP "Description:" 4 -Retrieve age-restricted content using Twitter's syndication API. +Controls how to retrieve age-restricted content when not logged in. + +.br +* \f[I]false\f[]: Skip age-restricted Tweets. +.br +* \f[I]true\f[]: Download using Twitter's syndication API. +.br +* \f[I]"extended"\f[]: Try to fetch Tweet metadata using the normal API +in addition to the syndication API. This requires additional HTTP +requests in some cases (e.g. when \f[I]retweets\f[] +are enabled). + +Note: This does not apply to search results (including +\f[I]timeline strategies\f[]). +To retrieve such content from search results, you must log in and +disable "Hide sensitive content" in your \f[I]search settings +<https://twitter.com/settings/search>\f[]. .SS extractor.twitter.logout @@ -3054,6 +3262,9 @@ Fetch media from pinned Tweets. .IP "Description:" 4 Fetch media from quoted Tweets. +If this option is enabled, gallery-dl will try to fetch +a quoted (original) Tweet when it sees the Tweet which quotes it. + .SS extractor.twitter.replies .IP "Type:" 6 @@ -3068,6 +3279,13 @@ Fetch media from replies to other Tweets. If this value is \f[I]"self"\f[], only consider replies where reply and original Tweet are from the same user. +Note: Twitter will automatically expand conversations if you +use the \f[I]/with_replies\f[] timeline while logged in. For example, +media from Tweets which the user replied to will also be downloaded. + +It is possible to exclude unwanted Tweets using \f[I]image-filter +<extractor.*.image-filter_>\f[]. + .SS extractor.twitter.retweets .IP "Type:" 6 @@ -3174,7 +3392,10 @@ for \f[I]twitter\f[] to a non-default value, e.g. an empty string \f[I]""\f[]. .SS extractor.twitter.videos .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -3265,7 +3486,7 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately. .IP "Description:" 4 Extract additional metadata (tags, uploader) -Note: This requires 1 additional HTTP request for each post. +Note: This requires 1 additional HTTP request per post. .SS extractor.weasyl.api-key @@ -3312,7 +3533,12 @@ A (comma-separated) list of subcategories to include when processing a user profile. Possible values are -\f[I]"home"\f[], \f[I]"feed"\f[], \f[I]"videos"\f[], \f[I]"newvideo"\f[], \f[I]"article"\f[], \f[I]"album"\f[]. +\f[I]"home"\f[], +\f[I]"feed"\f[], +\f[I]"videos"\f[], +\f[I]"newvideo"\f[], +\f[I]"article"\f[], +\f[I]"album"\f[]. It is possible to use \f[I]"all"\f[] instead of listing all values separately. @@ -3422,7 +3648,7 @@ followed by \f[I]"youtube_dl"\f[] as fallback. .SS extractor.ytdl.raw-options .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (name -> value) .IP "Example:" 4 .. code:: json @@ -3479,7 +3705,7 @@ Location of a youtube-dl configuration file to load options from. .IP "Description:" 4 Extract additional metadata (date, md5, tags, ...) -Note: This requires 1-2 additional HTTP request for each post. +Note: This requires 1-2 additional HTTP requests per post. .SS extractor.[booru].tags @@ -3493,7 +3719,7 @@ Note: This requires 1-2 additional HTTP request for each post. Categorize tags by their respective types and provide them as \f[I]tags_<type>\f[] metadata fields. -Note: This requires 1 additional HTTP request for each post. +Note: This requires 1 additional HTTP request per post. .SS extractor.[booru].notes @@ -3506,7 +3732,7 @@ Note: This requires 1 additional HTTP request for each post. .IP "Description:" 4 Extract overlay notes (position and text). -Note: This requires 1 additional HTTP request for each post. +Note: This requires 1 additional HTTP request per post. .SS extractor.[manga-extractor].chapter-reverse @@ -3659,7 +3885,7 @@ or \f[I]-1\f[] for infinite retries. .SS downloader.*.timeout .IP "Type:" 6 -\f[I]float\f[] or \f[I]null\f[] +\f[I]float\f[] .IP "Default:" 9 \f[I]extractor.*.timeout\f[] @@ -3670,7 +3896,10 @@ Connection timeout during file downloads. .SS downloader.*.verify .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]extractor.*.verify\f[] @@ -3681,16 +3910,19 @@ Certificate validation during file downloads. .SS downloader.*.proxy .IP "Type:" 6 -\f[I]string\f[] or \f[I]object\f[] +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] (scheme -> proxy) .IP "Default:" 9 \f[I]extractor.*.proxy\f[] .IP "Description:" 4 Proxy server used for file downloads. -.br -Disable the use of a proxy by explicitly setting this option to \f[I]null\f[]. -.br + +Disable the use of a proxy for file downloads +by explicitly setting this option to \f[I]null\f[]. .SS downloader.http.adjust-extensions @@ -3711,7 +3943,10 @@ contains JPEG/JFIF data. .SS downloader.http.chunk-size .IP "Type:" 6 -\f[I]integer\f[] or \f[I]string\f[] +.br +* \f[I]integer\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]32768\f[] @@ -3729,7 +3964,7 @@ These suffixes are case-insensitive. .SS downloader.http.headers .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (name -> value) .IP "Example:" 4 {"Accept": "image/webp,*/*", "Referer": "https://example.org/"} @@ -3835,7 +4070,7 @@ cause unexpected results in combination with other options .SS downloader.ytdl.raw-options .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (name -> value) .IP "Example:" 4 .. code:: json @@ -3885,7 +4120,10 @@ Location of a youtube-dl configuration file to load options from. .SH OUTPUT OPTIONS .SS output.mode .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] (key -> format string) .IP "Default:" 9 \f[I]"auto"\f[] @@ -3902,7 +4140,54 @@ Controls the output string format and status indicators. .br * \f[I]"color"\f[]: Suitable for terminals that understand ANSI escape codes and colors .br -* \f[I]"auto"\f[]: Automatically choose the best suitable output mode +* \f[I]"auto"\f[]: \f[I]"terminal"\f[] on Windows with \f[I]output.ansi\f[] disabled, +\f[I]"color"\f[] otherwise. + +It is possible to use custom output format strings +.br +by setting this option to an \f[I]object\f[] and specifying +\f[I]start\f[], \f[I]success\f[], \f[I]skip\f[], \f[I]progress\f[], and \f[I]progress-total\f[]. +.br + +For example, the following will replicate the same output as \f[I]mode: color\f[]: + +.. code:: json + +{ +"start" : "{}", +"success": "\\r\\u001b[1;32m{}\\u001b[0m\\n", +"skip" : "\\u001b[2m{}\\u001b[0m\\n", +"progress" : "\\r{0:>7}B {1:>7}B/s ", +"progress-total": "\\r{3:>3}% {0:>7}B {1:>7}B/s " +} + +\f[I]start\f[], \f[I]success\f[], and \f[I]skip\f[] are used to output the current +filename, where \f[I]{}\f[] or \f[I]{0}\f[] is replaced with said filename. +If a given format string contains printable characters other than that, +their number needs to be specified as \f[I][<number>, <format string>]\f[] +to get the correct results for \f[I]output.shorten\f[]. For example + +.. code:: json + +"start" : [12, "Downloading {}"] + +\f[I]progress\f[] and \f[I]progress-total\f[] are used when displaying the +.br +\f[I]download progress indicator\f[], +\f[I]progress\f[] when the total number of bytes to download is unknown, +.br +\f[I]progress-total\f[] otherwise. + +For these format strings + +.br +* \f[I]{0}\f[] is number of bytes downloaded +.br +* \f[I]{1}\f[] is number of downloaded bytes per second +.br +* \f[I]{2}\f[] is total number of bytes +.br +* \f[I]{3}\f[] is percent of bytes downloaded to total bytes .SS output.shorten @@ -3922,7 +4207,7 @@ with a display width greater than 1. .SS output.colors .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (key -> ANSI color) .IP "Default:" 9 \f[I]{"success": "1;32", "skip": "2"}\f[] @@ -3983,7 +4268,10 @@ in the output of \f[I]-K/--list-keywords\f[] and \f[I]-j/--dump-json\f[]. .SS output.progress .IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -4005,13 +4293,16 @@ as a custom \f[I]format string\f[]. Possible replacement keys are .SS output.log .IP "Type:" 6 -\f[I]string\f[] or \f[I]Logging Configuration\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Logging Configuration\f[] .IP "Default:" 9 \f[I]"[{name}][{levelname}] {message}"\f[] .IP "Description:" 4 -Configuration for standard logging output to stderr. +Configuration for logging output to stderr. If this is a simple \f[I]string\f[], it specifies the format string for logging messages. @@ -4019,10 +4310,10 @@ the format string for logging messages. .SS output.logfile .IP "Type:" 6 -\f[I]Path\f[] or \f[I]Logging Configuration\f[] - -.IP "Default:" 9 -\f[I]null\f[] +.br +* \f[I]Path\f[] +.br +* \f[I]Logging Configuration\f[] .IP "Description:" 4 File to write logging output to. @@ -4030,10 +4321,10 @@ File to write logging output to. .SS output.unsupportedfile .IP "Type:" 6 -\f[I]Path\f[] or \f[I]Logging Configuration\f[] - -.IP "Default:" 9 -\f[I]null\f[] +.br +* \f[I]Path\f[] +.br +* \f[I]Logging Configuration\f[] .IP "Description:" 4 File to write external URLs unsupported by *gallery-dl* to. @@ -4056,7 +4347,7 @@ before outputting them as JSON. .SH POSTPROCESSOR OPTIONS .SS classify.mapping .IP "Type:" 6 -\f[I]object\f[] +\f[I]object\f[] (directory -> extensions) .IP "Default:" 9 .. code:: json @@ -4144,7 +4435,10 @@ or to let it run asynchronously. .SS exec.command .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Example:" 4 .br @@ -4193,8 +4487,7 @@ See \f[I]metadata.event\f[] for a list of available events. Selects how to process metadata. .br -* \f[I]"json"\f[]: write metadata using \f[I]json.dump() -<https://docs.python.org/3/library/json.html#json.dump>\f[] +* \f[I]"json"\f[]: write metadata using \f[I]json.dump()\f[] .br * \f[I]"jsonl"\f[]: write metadata in \f[I]JSON Lines <https://jsonlines.org/>\f[] format @@ -4314,13 +4607,11 @@ After downloading all files of a post * \f[I]object\f[] (field name -> \f[I]format string\f[]) .IP "Example:" 4 -.br -* .. code:: json +.. code:: json ["blocked", "watching", "status[creator][name]"] -.br -* .. code:: json +.. code:: json { "blocked" : "***", @@ -4341,7 +4632,10 @@ whose result is assigned to said field name. .SS metadata.content-format .IP "Type:" 6 -\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Example:" 4 .br @@ -4355,6 +4649,24 @@ Custom format string to build the content of metadata files with. Note: Only applies for \f[I]"mode": "custom"\f[]. +.SS metadata.indent +.IP "Type:" 6 +.br +* \f[I]integer\f[] +.br +* \f[I]string\f[] + +.IP "Default:" 9 +\f[I]4\f[] + +.IP "Description:" 4 +Indentation level of JSON output. + +See the \f[I]indent\f[] argument of \f[I]json.dump()\f[] for further details. + +Note: Only applies for \f[I]"mode": "json"\f[]. + + .SS metadata.open .IP "Type:" 6 \f[I]string\f[] @@ -4369,19 +4681,7 @@ For example, use \f[I]"a"\f[] to append to a file's content or \f[I]"w"\f[] to truncate it. -See the \f[I]mode\f[] parameter of \f[I]open()\f[] for further details. - - -.SS metadata.private -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Include private fields, -i.e. fields whose name starts with an underscore. +See the \f[I]mode\f[] argument of \f[I]open()\f[] for further details. .SS metadata.encoding @@ -4394,7 +4694,19 @@ i.e. fields whose name starts with an underscore. .IP "Description:" 4 Name of the encoding used to encode a file's content. -See the \f[I]encoding\f[] parameter of \f[I]open()\f[] for further details. +See the \f[I]encoding\f[] argument of \f[I]open()\f[] for further details. + + +.SS metadata.private +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Include private fields, +i.e. fields whose name starts with an underscore. .SS metadata.archive @@ -4834,8 +5146,8 @@ in Flickr's \f[I]App Garden\f[] * fill out the form with a random name and description and click "SUBMIT" .br -* copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration -file +* copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration file +as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[] .SS extractor.reddit.client-id & .user-agent @@ -4855,9 +5167,10 @@ section of your account's preferences .br * copy the client id (third line, under your application's name and "installed app") and put it in your configuration file +as \f[I]"client-id"\f[] .br * use "\f[I]Python:<application name>:v1.0 (by /u/<username>)\f[]" as -user-agent and replace \f[I]<application name>\f[] and \f[I]<username>\f[] +\f[I]user-agent\f[] and replace \f[I]<application name>\f[] and \f[I]<username>\f[] accordingly (see Reddit's \f[I]API access rules\f[]) @@ -4878,6 +5191,7 @@ and "Use" to "Non-Commercial" .br * copy \f[I]API Key\f[] and \f[I]API Secret\f[] and put them in your configuration file +as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[] .SS extractor.tumblr.api-key & .api-secret @@ -4901,6 +5215,7 @@ callback URL" .br * copy your \f[I]OAuth Consumer Key\f[] and \f[I]Secret Key\f[] and put them in your configuration file +as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[] .SH CUSTOM TYPES @@ -4952,7 +5267,7 @@ A \f[I]Duration\f[] represents a span of time in seconds. * If given as a single \f[I]float\f[], it will be used as that exact value. .br * If given as a \f[I]list\f[] with 2 floating-point numbers \f[I]a\f[] & \f[I]b\f[] , -it will be randomly chosen with uniform distribution such that \f[I]a <= N <=b\f[]. +it will be randomly chosen with uniform distribution such that \f[I]a <= N <= b\f[]. (see \f[I]random.uniform()\f[]) .br * If given as a \f[I]string\f[], it can either represent a single \f[I]float\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 98974e9..2d2adbb 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -228,6 +228,7 @@ }, "pinterest": { + "domain": "auto", "sections": true, "videos": true }, @@ -236,6 +237,7 @@ "refresh-token": null, "include": "artworks", "metadata": false, + "metadata-bookmark": false, "tags": "japanese", "ugoira": true }, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 03c1930..17442cc 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.24.2 +Version: 1.24.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -39,13 +39,16 @@ License-File: LICENSE gallery-dl ========== -*gallery-dl* is a command-line program to download image galleries and -collections from several image hosting sites (see `Supported Sites`_). -It is a cross-platform tool with many configuration options -and powerful `filenaming capabilities <Formatting_>`_. +*gallery-dl* is a command-line program +to download image galleries and collections +from several image hosting sites +(see `Supported Sites <docs/supportedsites.md>`__). +It is a cross-platform tool +with many `configuration options <docs/configuration.rst>`__ +and powerful `filenaming capabilities <docs/formatting.md>`__. -|pypi| |build| |gitter| +|pypi| |build| .. contents:: @@ -59,7 +62,7 @@ Dependencies Optional -------- -- FFmpeg_: Pixiv Ugoira to WebM conversion +- FFmpeg_: Pixiv Ugoira conversion - yt-dlp_ or youtube-dl_: Video downloads - PySocks_: SOCKS proxy support - brotli_ or brotlicffi_: Brotli compression support @@ -103,9 +106,13 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.3/gallery-dl.bin>`__ + + +Nightly Builds +-------------- | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -141,6 +148,16 @@ Scoop scoop install gallery-dl +Homebrew +-------- + +For macOS or Linux users using Homebrew: + +.. code:: bash + + brew install gallery-dl + + Usage ===== @@ -149,9 +166,10 @@ from: .. code:: bash - gallery-dl [OPTION]... URL... + gallery-dl [OPTIONS]... URLS... -See also :code:`gallery-dl --help`. +Use :code:`gallery-dl --help` or see `<docs/options.md>`__ +for a full list of all command-line options. Examples @@ -199,13 +217,22 @@ Configuration Configuration files for *gallery-dl* use a JSON-based file format. -| For a (more or less) complete example with options set to their default values, - see gallery-dl.conf_. -| For a configuration file example with more involved settings and options, - see gallery-dl-example.conf_. -| A list of all available configuration options and their - descriptions can be found in configuration.rst_. -| + +Documentation +------------- + +A list of all available configuration options and their descriptions +can be found in `<docs/configuration.rst>`__. + +| For a default configuration file with available options set to their + default values, see `<docs/gallery-dl.conf>`__. + +| For a commented example with more involved settings and option usage, + see `<docs/gallery-dl-example.conf>`__. + + +Locations +--------- *gallery-dl* searches for configuration files in the following places: @@ -214,7 +241,7 @@ Windows: * ``%USERPROFILE%\gallery-dl\config.json`` * ``%USERPROFILE%\gallery-dl.conf`` - (``%USERPROFILE%`` usually refers to the user's home directory, + (``%USERPROFILE%`` usually refers to a user's home directory, i.e. ``C:\Users\<username>\``) Linux, macOS, etc.: @@ -223,12 +250,13 @@ Linux, macOS, etc.: * ``${HOME}/.config/gallery-dl/config.json`` * ``${HOME}/.gallery-dl.conf`` -Values in later configuration files will override previous ones. +When run as `executable <Standalone Executable_>`__, +*gallery-dl* will also look for a ``gallery-dl.conf`` file +in the same directory as said executable. -Command line options will override all related settings in the configuration file(s), -e.g. using ``--write-metadata`` will enable writing metadata using the default values -for all ``postprocessors.metadata.*`` settings, overriding any specific settings in -configuration files. +It is possible to use more than one configuration file at a time. +In this case, any values from files after the first will get merged +into the already loaded settings and potentially override previous ones. Authentication @@ -258,8 +286,8 @@ and optional for ``twitter``, and ``zerochan``. -You can set the necessary information in your configuration file -(cf. gallery-dl.conf_) +You can set the necessary information in your +`configuration file <Configuration_>`__ .. code:: json @@ -278,8 +306,8 @@ or you can provide them directly via the .. code:: bash - gallery-dl -u <username> -p <password> URL - gallery-dl -o username=<username> -o password=<password> URL + gallery-dl -u "<username>" -p "<password>" "URL" + gallery-dl -o "username=<username>" -o "password=<password>" "URL" Cookies @@ -290,7 +318,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the cookies from a browser login session and input them into *gallery-dl*. This can be done via the -`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__ +`cookies <docs/configuration.rst#extractorcookies>`__ option in your configuration file by specifying - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon @@ -301,6 +329,9 @@ option in your configuration file by specifying | (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__, in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__) +- | the name of a browser to extract cookies from + | (supported browsers are Chromium-based ones, Firefox, and Safari) + For example: .. code:: json @@ -314,30 +345,43 @@ For example: "cookies": { "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" } + }, + "twitter": { + "cookies": ["firefox"] } } } -You can also specify a cookies.txt file with -the :code:`--cookies` command-line option: +| You can also specify a cookies.txt file with + the :code:`--cookies` command-line option +| or a browser to extract cookies from with :code:`--cookies-from-browser`: .. code:: bash - gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + gallery-dl --cookies "$HOME/path/to/cookies.txt" "URL" + gallery-dl --cookies-from-browser firefox "URL" OAuth ----- -*gallery-dl* supports user authentication via OAuth_ for -``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, +*gallery-dl* supports user authentication via OAuth_ for some extractors. +This is necessary for +``pixiv`` +and optional for +``deviantart``, +``flickr``, +``reddit``, +``smugmug``, +``tumblr``, and ``mastodon`` instances. -This is mostly optional, but grants *gallery-dl* the ability -to issue requests on your account's behalf and enables it to access resources -which would otherwise be unavailable to a public user. -To link your account to *gallery-dl*, start by invoking it with -``oauth:<sitename>`` as an argument. For example: +Linking your account to *gallery-dl* grants it the ability to issue requests +on your account's behalf and enables it to access resources which would +otherwise be unavailable to a public user. + +To do so, start by invoking it with ``oauth:<sitename>`` as an argument. +For example: .. code:: bash @@ -356,13 +400,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with gallery-dl oauth:mastodon:https://mastodon.social/ - -.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf -.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf -.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst -.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md -.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md - .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ .. _pip: https://pip.pypa.io/en/stable/ @@ -373,7 +410,6 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _PySocks: https://pypi.org/project/PySocks/ .. _brotli: https://github.com/google/brotli .. _brotlicffi: https://github.com/python-hyper/brotlicffi -.. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 556dc49..599a828 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -72,6 +72,7 @@ gallery_dl/extractor/erome.py gallery_dl/extractor/exhentai.py gallery_dl/extractor/fallenangels.py gallery_dl/extractor/fanbox.py +gallery_dl/extractor/fanleaks.py gallery_dl/extractor/fantia.py gallery_dl/extractor/fapachi.py gallery_dl/extractor/fapello.py @@ -180,6 +181,7 @@ gallery_dl/extractor/soundgasm.py gallery_dl/extractor/speakerdeck.py gallery_dl/extractor/subscribestar.py gallery_dl/extractor/tapas.py +gallery_dl/extractor/tcbscans.py gallery_dl/extractor/telegraph.py gallery_dl/extractor/test.py gallery_dl/extractor/toyhouse.py diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index ee00bf7..f18cc47 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -980,6 +980,7 @@ def _is_path(value): def _parse_browser_specification( browser, profile=None, keyring=None, container=None): + browser = browser.lower() if browser not in SUPPORTED_BROWSERS: raise ValueError("unsupported browser '{}'".format(browser)) if keyring and keyring not in SUPPORTED_KEYRINGS: diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 444075c..f26f6a9 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -38,6 +38,7 @@ modules = [ "exhentai", "fallenangels", "fanbox", + "fanleaks", "fantia", "fapello", "fapachi", @@ -135,6 +136,7 @@ modules = [ "speakerdeck", "subscribestar", "tapas", + "tcbscans", "telegraph", "toyhouse", "tsumino", diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index cf332ac..6da6175 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2022 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.behance.net/""" +"""Extractors for https://www.behance.net/""" from .common import Extractor, Message from .. import text @@ -17,6 +17,7 @@ class BehanceExtractor(Extractor): """Base class for behance extractors""" category = "behance" root = "https://www.behance.net" + request_interval = (2.0, 4.0) def items(self): for gallery in self.galleries(): diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 882c2b3..8283fbc 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -56,8 +56,12 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): files = album["files"] except Exception as exc: self.log.debug("%s: %s", exc.__class__.__name__, exc) + self.log.debug("Falling back to lolisafe API") self.root = root.replace("://", "://app.", 1) files, data = LolisafeAlbumExtractor.fetch_album(self, album_id) + # fix file URLs (bunkr..ru -> bunkr.ru) (#3481) + for file in files: + file["file"] = file["file"].replace("bunkr..", "bunkr.", 1) else: for file in files: file["file"] = file["cdn"] + "/" + file["name"] diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 4352aa7..ad766da 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -327,6 +327,7 @@ class Extractor(): except Exception as exc: self.log.warning("cookies: %s", exc) else: + self.log.debug("Loading cookies from '%s'", cookies) self._cookiefile = cookiefile elif isinstance(cookies, (list, tuple)): diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index ef17176..4c93604 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -101,8 +101,8 @@ class DanbooruExtractor(BaseExtractor): if self.extended_metadata: template = ( - "{}/posts/{}.json" - "?only=artist_commentary,children,notes,parent" + "{}/posts/{}.json?only=artist_commentary,children,notes," + "parent,uploader" ) resp = self.request(template.format(self.root, post["id"])) post.update(resp.json()) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index aa78cfb..aeb2d0a 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -987,13 +987,9 @@ class DeviantartScrapsExtractor(DeviantartExtractor): _warning = True def deviations(self): - eclipse_api = DeviantartEclipseAPI(self) - if self._warning: - DeviantartScrapsExtractor._warning = False - if not self._check_cookies(self.cookienames): - self.log.warning( - "No session cookies set: Unable to fetch mature scraps.") + self.login() + eclipse_api = DeviantartEclipseAPI(self) for obj in eclipse_api.gallery_scraps(self.user, self.offset): deviation = obj["deviation"] deviation_uuid = eclipse_api.deviation_extended_fetch( @@ -1004,6 +1000,17 @@ class DeviantartScrapsExtractor(DeviantartExtractor): yield self.api.deviation(deviation_uuid) + def login(self): + """Login and obtain session cookies""" + if not self._check_cookies(self.cookienames): + username, password = self._get_auth_info() + if username: + self._update_cookies(_login_impl(self, username, password)) + elif self._warning: + self.log.warning( + "No session cookies set: Unable to fetch mature scraps.") + DeviantartScrapsExtractor._warning = False + class DeviantartFollowingExtractor(DeviantartExtractor): """Extractor for user's watched users""" @@ -1513,13 +1520,47 @@ class DeviantartEclipseAPI(): return token -@cache(maxage=100*365*24*3600, keyarg=0) +@cache(maxage=100*365*86400, keyarg=0) def _refresh_token_cache(token): if token and token[0] == "#": return None return token +@cache(maxage=28*86400, keyarg=1) +def _login_impl(extr, username, password): + extr.log.info("Logging in as %s", username) + + url = "https://www.deviantart.com/users/login" + page = extr.request(url).text + + data = {} + for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'): + name, _, value = item.partition('" value="') + data[name] = value + + challenge = data.get("challenge") + if challenge and challenge != "0": + extr.log.warning("Login requires solving a CAPTCHA") + extr.log.debug(challenge) + + data["username"] = username + data["password"] = password + data["remember"] = "on" + + extr.sleep(2.0, "login") + url = "https://www.deviantart.com/_sisu/do/signin" + response = extr.request(url, method="POST", data=data) + + if not response.history: + raise exception.AuthenticationError() + + return { + cookie.name: cookie.value + for cookie in extr.session.cookies + } + + ############################################################################### # Journal Formats ############################################################# diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index f692a90..41431dc 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -69,14 +69,28 @@ class FanboxExtractor(Extractor): if post["type"] == "article": post["articleBody"] = content_body.copy() if "blocks" in content_body: - content = [] + content = [] # text content + images = [] # image IDs in 'body' order + append = content.append + append_img = images.append for block in content_body["blocks"]: if "text" in block: append(block["text"]) if "links" in block: for link in block["links"]: append(link["url"]) + if "imageId" in block: + append_img(block["imageId"]) + + if images and "imageMap" in content_body: + # reorder 'imageMap' (#2718) + image_map = content_body["imageMap"] + content_body["imageMap"] = { + image_id: image_map[image_id] + for image_id in images + } + post["content"] = "\n".join(content) post["date"] = text.parse_datetime(post["publishedDatetime"]) @@ -294,6 +308,10 @@ class FanboxPostExtractor(FanboxExtractor): r"Thank you for your continued support of FANBOX.$", }, }), + # imageMap file order (#2718) + ("https://mochirong.fanbox.cc/posts/3746116", { + "url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062", + }), ) def __init__(self, match): diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py new file mode 100644 index 0000000..466bb8c --- /dev/null +++ b/gallery_dl/extractor/fanleaks.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://fanleaks.club/""" + +from .common import Extractor, Message +from .. import text, exception + + +class FanleaksExtractor(Extractor): + """Base class for Fanleaks extractors""" + category = "fanleaks" + directory_fmt = ("{category}", "{model}") + filename_fmt = "{model_id}_{id}.{extension}" + archive_fmt = "{model_id}_{id}" + root = "https://fanleaks.club" + + def __init__(self, match): + Extractor.__init__(self, match) + self.model_id = match.group(1) + + def extract_post(self, url): + extr = text.extract_from(self.request(url, notfound="post").text) + data = { + "model_id": self.model_id, + "model" : text.unescape(extr('text-lg">', "</a>")), + "id" : text.parse_int(self.id), + "type" : extr('type="', '"')[:5] or "photo", + } + url = extr('src="', '"') + yield Message.Directory, data + yield Message.Url, url, text.nameext_from_url(url, data) + + +class FanleaksPostExtractor(FanleaksExtractor): + """Extractor for individual posts on fanleak.club""" + subcategory = "post" + pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)" + test = ( + ("https://fanleaks.club/selti/880", { + "pattern": (r"https://fanleaks\.club//models" + r"/selti/images/selti_0880\.jpg"), + "keyword": { + "model_id": "selti", + "model" : "Selti", + "id" : 880, + "type" : "photo", + }, + }), + ("https://fanleaks.club/daisy-keech/1038", { + "pattern": (r"https://fanleaks\.club//models" + r"/daisy-keech/videos/daisy-keech_1038\.mp4"), + "keyword": { + "model_id": "daisy-keech", + "model" : "Daisy Keech", + "id" : 1038, + "type" : "video", + }, + }), + ("https://fanleaks.club/hannahowo/000", { + "exception": exception.NotFoundError, + }), + ) + + def __init__(self, match): + FanleaksExtractor.__init__(self, match) + self.id = match.group(2) + + def items(self): + url = "{}/{}/{}".format(self.root, self.model_id, self.id) + return self.extract_post(url) + + +class FanleaksModelExtractor(FanleaksExtractor): + """Extractor for all posts from a fanleaks model""" + subcategory = "model" + pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club" + r"/(?!latest/?$)([^/?#]+)/?$") + test = ( + ("https://fanleaks.club/hannahowo", { + "pattern": (r"https://fanleaks\.club//models" + r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"), + "range" : "1-100", + "count" : 100, + }), + ("https://fanleaks.club/belle-delphine", { + "pattern": (r"https://fanleaks\.club//models" + r"/belle-delphine/(images|videos)" + r"/belle-delphine_\d+\.\w+"), + "range" : "1-100", + "count" : 100, + }), + ("https://fanleaks.club/daisy-keech"), + ) + + def items(self): + page_num = 1 + page = self.request( + self.root + "/" + self.model_id, notfound="model").text + data = { + "model_id": self.model_id, + "model" : text.unescape( + text.extr(page, 'mt-4">', "</h1>")), + "type" : "photo", + } + page_url = text.extr(page, "url: '", "'") + while True: + page = self.request("{}{}".format(page_url, page_num)).text + if not page: + return + + for item in text.extract_iter(page, '<a href="/', "</a>"): + self.id = id = text.extr(item, "/", '"') + if "/icon-play.svg" in item: + url = "{}/{}/{}".format(self.root, self.model_id, id) + yield from self.extract_post(url) + continue + + data["id"] = text.parse_int(id) + url = text.extr(item, 'src="', '"').replace( + "/thumbs/", "/", 1) + yield Message.Directory, data + yield Message.Url, url, text.nameext_from_url(url, data) + page_num += 1 diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index d8109e1..8d73949 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2022 Mike Fährmann +# Copyright 2014-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -195,7 +195,7 @@ class GelbooruPostExtractor(GelbooruBase, # notes ("https://gelbooru.com/index.php?page=post&s=view&id=5997331", { "options": (("notes", True),), - "keywords": { + "keyword": { "notes": [ { "body": "Look over this way when you talk~", diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 56bd048..1efbbf0 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,25 +9,37 @@ """Extractors for https://www.imagefap.com/""" from .common import Extractor, Message -from .. import text +from .. import text, exception import json - BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" class ImagefapExtractor(Extractor): """Base class for imagefap extractors""" category = "imagefap" + root = "https://www.imagefap.com" directory_fmt = ("{category}", "{gallery_id} {title}") filename_fmt = "{category}_{gallery_id}_{filename}.{extension}" archive_fmt = "{gallery_id}_{image_id}" - root = "https://www.imagefap.com" + request_interval = (2.0, 4.0) def __init__(self, match): Extractor.__init__(self, match) self.session.headers["Referer"] = self.root + def request(self, url, **kwargs): + response = Extractor.request(self, url, **kwargs) + + if response.history and response.url.endswith("/human-verification"): + msg = text.extr(response.text, '<div class="mt-4', '<') + if msg: + msg = " ".join(msg.partition(">")[2].split()) + raise exception.StopExtraction("'%s'", msg) + self.log.warning("HTTP redirect to %s", response.url) + + return response + class ImagefapGalleryExtractor(ImagefapExtractor): """Extractor for image galleries from imagefap.com""" @@ -41,12 +53,20 @@ class ImagefapGalleryExtractor(ImagefapExtractor): "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3", "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab", }), - ("https://www.imagefap.com/gallery/5486966", { + ("https://www.imagefap.com/gallery/7876223", { "pattern": r"https://cdnh?\.imagefap\.com" r"/images/full/\d+/\d+/\d+\.jpg", - "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98", - "archive": False, - "count": 62, + "keyword": { + "count": 44, + "gallery_id": 7876223, + "image_id": int, + "num": int, + "tags": ["big ass", "panties", "horny", + "pussy", "exposed", "outdoor"], + "title": "Kelsi Monroe in lingerie", + "uploader": "BdRachel", + }, + "count": 44, }), ("https://www.imagefap.com/gallery.php?gid=7102714"), ("https://beta.imagefap.com/gallery.php?gid=7102714"), @@ -118,12 +138,20 @@ class ImagefapImageExtractor(ImagefapExtractor): subcategory = "image" pattern = BASE_PATTERN + r"/photo/(\d+)" test = ( - ("https://www.imagefap.com/photo/1369341772/", { + ("https://www.imagefap.com/photo/1962981893", { "pattern": r"https://cdnh?\.imagefap\.com" - r"/images/full/\d+/\d+/\d+\.jpg", - "keyword": "8894e45f7262020d8d66ce59917315def1fc475b", + r"/images/full/65/196/1962981893\.jpg", + "keyword": { + "date": "21/08/2014", + "gallery_id": 7876223, + "height": 1600, + "image_id": 1962981893, + "title": "Kelsi Monroe in lingerie", + "uploader": "BdRachel", + "width": 1066, + }, }), - ("https://beta.imagefap.com/photo/1369341772/"), + ("https://beta.imagefap.com/photo/1962981893"), ) def __init__(self, match): @@ -159,61 +187,70 @@ class ImagefapImageExtractor(ImagefapExtractor): }) -class ImagefapUserExtractor(ImagefapExtractor): - """Extractor for all galleries from a user at imagefap.com""" - subcategory = "user" - categorytransfer = True - pattern = (BASE_PATTERN + - r"/(?:profile(?:\.php\?user=|/)([^/?#]+)" - r"|usergallery\.php\?userid=(\d+))") +class ImagefapFolderExtractor(ImagefapExtractor): + """Extractor for imagefap user folders""" + subcategory = "folder" + pattern = (BASE_PATTERN + r"/(?:organizer/|" + r"(?:usergallery\.php\?user(id)?=([^&#]+)&" + r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)") test = ( - ("https://www.imagefap.com/profile/LucyRae/galleries", { - "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a", + ("https://www.imagefap.com/organizer/409758", { + "pattern": r"https://www\.imagefap\.com/gallery/7876223", + "url": "37822523e6e4a56feb9dea35653760c86b44ff89", + "count": 1, }), - ("https://www.imagefap.com/usergallery.php?userid=1862791", { - "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a", + (("https://www.imagefap.com/usergallery.php" + "?userid=1981976&folderid=409758"), { + "url": "37822523e6e4a56feb9dea35653760c86b44ff89", + }), + (("https://www.imagefap.com/usergallery.php" + "?user=BdRachel&folderid=409758"), { + "url": "37822523e6e4a56feb9dea35653760c86b44ff89", + }), + ("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", { + "pattern": ImagefapGalleryExtractor.pattern, + "range": "1-40", + }), + (("https://www.imagefap.com/usergallery.php" + "?userid=1981976&folderid=-1"), { + "pattern": ImagefapGalleryExtractor.pattern, + "range": "1-40", + }), + (("https://www.imagefap.com/usergallery.php" + "?user=BdRachel&folderid=-1"), { + "pattern": ImagefapGalleryExtractor.pattern, + "range": "1-40", }), - ("https://www.imagefap.com/profile.php?user=LucyRae"), - ("https://beta.imagefap.com/profile.php?user=LucyRae"), ) def __init__(self, match): ImagefapExtractor.__init__(self, match) - self.user, self.user_id = match.groups() + self._id, user, profile, self.folder_id = match.groups() + self.user = user or profile def items(self): - for folder_id in self.folders(): - for gallery_id, name in self.galleries(folder_id): - url = "{}/gallery/{}".format(self.root, gallery_id) - data = { - "gallery_id": text.parse_int(gallery_id), - "title" : text.unescape(name), - "_extractor": ImagefapGalleryExtractor, - } - yield Message.Queue, url, data - - def folders(self): - """Return a list of folder_ids of a specific user""" - if self.user: - url = "{}/profile/{}/galleries".format(self.root, self.user) - else: - url = "{}/usergallery.php?userid={}".format( - self.root, self.user_id) - - response = self.request(url) - self.user = response.url.split("/")[-2] - folders = text.extr(response.text, ' id="tgl_all" value="', '"') - return folders.rstrip("|").split("|") + for gallery_id, name in self.galleries(self.folder_id): + url = "{}/gallery/{}".format(self.root, gallery_id) + data = { + "gallery_id": gallery_id, + "title" : text.unescape(name), + "_extractor": ImagefapGalleryExtractor, + } + yield Message.Queue, url, data def galleries(self, folder_id): - """Yield gallery_ids of a folder""" + """Yield gallery IDs and titles of a folder""" if folder_id == "-1": - url = "{}/profile/{}/galleries?folderid=-1".format( - self.root, self.user) + if self._id: + url = "{}/usergallery.php?userid={}&folderid=-1".format( + self.root, self.user) + else: + url = "{}/profile/{}/galleries?folderid=-1".format( + self.root, self.user) else: url = "{}/organizer/{}/".format(self.root, folder_id) - params = {"page": 0} + params = {"page": 0} while True: extr = text.extract_from(self.request(url, params=params).text) cnt = 0 @@ -228,3 +265,53 @@ class ImagefapUserExtractor(ImagefapExtractor): if cnt < 25: break params["page"] += 1 + + +class ImagefapUserExtractor(ImagefapExtractor): + """Extractor for an imagefap user profile""" + subcategory = "user" + pattern = (BASE_PATTERN + + r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?" + r"|usergallery\.php\?userid=(\d+))(?:$|#)") + test = ( + ("https://www.imagefap.com/profile/BdRachel", { + "pattern": ImagefapFolderExtractor.pattern, + "count": ">= 18", + }), + ("https://www.imagefap.com/usergallery.php?userid=1862791", { + "pattern": r"https://www\.imagefap\.com" + r"/profile/LucyRae/galleries\?folderid=-1", + "count": 1, + }), + ("https://www.imagefap.com/profile/BdRachel/galleries"), + ("https://www.imagefap.com/profile.php?user=BdRachel"), + ("https://beta.imagefap.com/profile.php?user=BdRachel"), + ) + + def __init__(self, match): + ImagefapExtractor.__init__(self, match) + self.user, self.user_id = match.groups() + + def items(self): + data = {"_extractor": ImagefapFolderExtractor} + + for folder_id in self.folders(): + if folder_id == "-1": + url = "{}/profile/{}/galleries?folderid=-1".format( + self.root, self.user) + else: + url = "{}/organizer/{}/".format(self.root, folder_id) + yield Message.Queue, url, data + + def folders(self): + """Return a list of folder IDs of a user""" + if self.user: + url = "{}/profile/{}/galleries".format(self.root, self.user) + else: + url = "{}/usergallery.php?userid={}".format( + self.root, self.user_id) + + response = self.request(url) + self.user = response.url.split("/")[-2] + folders = text.extr(response.text, ' id="tgl_all" value="', '"') + return folders.rstrip("|").split("|") diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 8a61728..541e427 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -67,6 +67,7 @@ class KemonopartyExtractor(Extractor): headers["Referer"] = "{}/{}/user/{}/post/{}".format( self.root, post["service"], post["user"], post["id"]) post["_http_headers"] = headers + post["_http_validate"] = _validate post["date"] = text.parse_datetime( post["published"] or post["added"], "%a, %d %b %Y %H:%M:%S %Z") @@ -197,6 +198,11 @@ class KemonopartyExtractor(Extractor): return dms +def _validate(response): + return (response.headers["content-length"] != "9" and + response.content != b"not found") + + class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" subcategory = "user" @@ -309,6 +315,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor): "pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968" r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg", }), + # invalid file (#3510) + ("https://kemono.party/patreon/user/19623797/post/29035449", { + "pattern": r"907ba78b4545338d3539683e63ecb51c" + r"f51c10adc9dabd86e92bd52339f298b9\.txt", + "content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", + }), ("https://kemono.party/subscribestar/user/alcorart/post/184330"), ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"), ("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"), diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py index bbcf9c0..85e8bb1 100644 --- a/gallery_dl/extractor/lynxchan.py +++ b/gallery_dl/extractor/lynxchan.py @@ -17,9 +17,13 @@ class LynxchanExtractor(BaseExtractor): BASE_PATTERN = LynxchanExtractor.update({ + "bbw-chan": { + "root": "https://bbw-chan.nl", + "pattern": r"bbw-chan\.nl", + }, "kohlchan": { "root": "https://kohlchan.net", - "pattern": r"kohlchan\.net" + "pattern": r"kohlchan\.net", }, "endchan": { "root": None, @@ -37,6 +41,11 @@ class LynxchanThreadExtractor(LynxchanExtractor): archive_fmt = "{boardUri}_{postId}_{num}" pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)" test = ( + ("https://bbw-chan.nl/bbwdraw/res/499.html", { + "pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$", + "count": ">= 352", + }), + ("https://bbw-chan.nl/bbwdraw/res/489.html"), ("https://kohlchan.net/a/res/4594.html", { "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$", "count": ">= 80", @@ -78,6 +87,11 @@ class LynxchanBoardExtractor(LynxchanExtractor): subcategory = "board" pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)" test = ( + ("https://bbw-chan.nl/bbwdraw/", { + "pattern": LynxchanThreadExtractor.pattern, + "count": ">= 148", + }), + ("https://bbw-chan.nl/bbwdraw/2.html"), ("https://kohlchan.net/a/", { "pattern": LynxchanThreadExtractor.pattern, "count": ">= 100", diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 3dbd5fc..5dc4cb6 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -44,7 +44,10 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): extr = text.extract_from(page) split = text.split_html - title = extr('<div class="comic-description">\n<h1>', '</h1>') + title = extr('<div class="comic-description">\n', '</h1>').lstrip() + if title.startswith("<h1>"): + title = title[len("<h1>"):] + if not title: raise exception.NotFoundError("gallery") diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py index dfe78ae..f9c6abf 100644 --- a/gallery_dl/extractor/nitter.py +++ b/gallery_dl/extractor/nitter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -451,7 +451,7 @@ class NitterTweetExtractor(NitterExtractor): }), # age-restricted (#2354) ("https://nitter.unixfox.eu/mightbecurse/status/1492954264909479936", { - "keywords": {"date": "dt:2022-02-13 20:10:09"}, + "keyword": {"date": "dt:2022-02-13 20:10:00"}, "count": 1, }), ) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index f786be6..63b16ce 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -26,6 +26,13 @@ class PinterestExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) + + domain = self.config("domain") + if not domain or domain == "auto" : + self.root = text.root_from_url(match.group(0)) + else: + self.root = text.ensure_http_scheme(domain) + self.api = PinterestAPI(self) def items(self): @@ -142,7 +149,7 @@ class PinterestBoardExtractor(PinterestExtractor): directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") archive_fmt = "{board[id]}_{id}" pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)" - "/(?!_saved|_created)([^/?#&]+)/?$") + "/(?!_saved|_created|pins/)([^/?#&]+)/?$") test = ( ("https://www.pinterest.com/g1952849/test-/", { "pattern": r"https://i\.pinimg\.com/originals/", @@ -151,7 +158,7 @@ class PinterestBoardExtractor(PinterestExtractor): # board with sections (#835) ("https://www.pinterest.com/g1952849/stuff/", { "options": (("sections", True),), - "count": 5, + "count": 4, }), # secret board (#1055) ("https://www.pinterest.de/g1952849/secret/", { @@ -194,11 +201,11 @@ class PinterestUserExtractor(PinterestExtractor): subcategory = "user" pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$" test = ( - ("https://www.pinterest.de/g1952849/", { + ("https://www.pinterest.com/g1952849/", { "pattern": PinterestBoardExtractor.pattern, "count": ">= 2", }), - ("https://www.pinterest.de/g1952849/_saved/"), + ("https://www.pinterest.com/g1952849/_saved/"), ) def __init__(self, match): @@ -213,15 +220,38 @@ class PinterestUserExtractor(PinterestExtractor): yield Message.Queue, self.root + url, board +class PinterestAllpinsExtractor(PinterestExtractor): + """Extractor for a user's 'All Pins' feed""" + subcategory = "allpins" + directory_fmt = ("{category}", "{user}") + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$" + test = ("https://www.pinterest.com/g1952849/pins/", { + "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" + r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}", + "count": 7, + }) + + def __init__(self, match): + PinterestExtractor.__init__(self, match) + self.user = text.unquote(match.group(1)) + + def metadata(self): + return {"user": self.user} + + def pins(self): + return self.api.user_pins(self.user) + + class PinterestCreatedExtractor(PinterestExtractor): """Extractor for a user's created pins""" subcategory = "created" directory_fmt = ("{category}", "{user}") pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$" - test = ("https://www.pinterest.com/amazon/_created", { + test = ("https://www.pinterest.de/digitalmomblog/_created/", { "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg", "count": 10, + "range": "1-10", }) def __init__(self, match): @@ -272,7 +302,7 @@ class PinterestSearchExtractor(PinterestExtractor): subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)" - test = ("https://www.pinterest.de/search/pins/?q=nature", { + test = ("https://www.pinterest.com/search/pins/?q=nature", { "range": "1-50", "count": ">= 50", }) @@ -357,26 +387,23 @@ class PinterestAPI(): - https://github.com/seregazhuk/php-pinterest-bot """ - BASE_URL = "https://www.pinterest.com" - HEADERS = { - "Accept" : "application/json, text/javascript, " - "*/*, q=0.01", - "Accept-Language" : "en-US,en;q=0.5", - "Referer" : BASE_URL + "/", - "X-Requested-With" : "XMLHttpRequest", - "X-APP-VERSION" : "31461e0", - "X-CSRFToken" : None, - "X-Pinterest-AppState": "active", - "Origin" : BASE_URL, - } - def __init__(self, extractor): - self.extractor = extractor - csrf_token = util.generate_token() - self.headers = self.HEADERS.copy() - self.headers["X-CSRFToken"] = csrf_token + + self.extractor = extractor + self.root = extractor.root self.cookies = {"csrftoken": csrf_token} + self.headers = { + "Accept" : "application/json, text/javascript, " + "*/*, q=0.01", + "Accept-Language" : "en-US,en;q=0.5", + "Referer" : self.root + "/", + "X-Requested-With" : "XMLHttpRequest", + "X-APP-VERSION" : "0c4af40", + "X-CSRFToken" : csrf_token, + "X-Pinterest-AppState": "active", + "Origin" : self.root, + } def pin(self, pin_id): """Query information about a pin""" @@ -437,6 +464,16 @@ class PinterestAPI(): options = {"board_id": board_id, "add_vase": True} return self._pagination("BoardRelatedPixieFeed", options) + def user_pins(self, user): + """Yield all pins from 'user'""" + options = { + "is_own_profile_pins": False, + "username" : user, + "field_set_key" : "grid_item", + "pin_filter" : None, + } + return self._pagination("UserPins", options) + def user_activity_pins(self, user): """Yield pins created by 'user'""" options = { @@ -462,7 +499,7 @@ class PinterestAPI(): def _login_impl(self, username, password): self.extractor.log.info("Logging in as %s", username) - url = self.BASE_URL + "/resource/UserSessionResource/create/" + url = self.root + "/resource/UserSessionResource/create/" options = { "username_or_email": username, "password" : password, @@ -485,7 +522,7 @@ class PinterestAPI(): } def _call(self, resource, options): - url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource) + url = "{}/resource/{}Resource/get/".format(self.root, resource) params = {"data": json.dumps({"options": options}), "source_url": ""} response = self.extractor.request( @@ -497,10 +534,11 @@ class PinterestAPI(): except ValueError: data = {} - if response.status_code < 400 and not response.history: + if response.history: + self.root = text.root_from_url(response.url) + if response.status_code < 400: return data - - if response.status_code == 404 or response.history: + if response.status_code == 404: resource = self.extractor.subcategory.rpartition("-")[2] raise exception.NotFoundError(resource) self.extractor.log.debug("Server response: %s", response.text) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 134361d..a17518f 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2022 Mike Fährmann +# Copyright 2014-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -45,7 +45,8 @@ class PixivExtractor(Extractor): work["tags"] = [tag["name"] for tag in work["tags"]] ratings = {0: "General", 1: "R-18", 2: "R-18G"} - userdata = self.config("metadata") + meta_user = self.config("metadata") + meta_bookmark = self.config("metadata-bookmark") metadata = self.metadata() works = self.works() @@ -61,8 +62,12 @@ class PixivExtractor(Extractor): del work["image_urls"] del work["meta_pages"] - if userdata: + if meta_user: work.update(self.api.user_detail(work["user"]["id"])) + if meta_bookmark and work["is_bookmarked"]: + detail = self.api.illust_bookmark_detail(work["id"]) + work["tags_bookmark"] = [tag["name"] for tag in detail["tags"] + if tag["is_registered"]] if transform_tags: transform_tags(work) work["num"] = 0 @@ -398,6 +403,8 @@ class PixivFavoriteExtractor(PixivExtractor): # own bookmarks ("https://www.pixiv.net/bookmark.php", { "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", + "keyword": {"tags_bookmark": ["47", "hitman"]}, + "options": (("metadata-bookmark", True),), }), # own bookmarks with tag (#596) ("https://www.pixiv.net/bookmark.php?tag=foobar", { @@ -880,6 +887,11 @@ class PixivAppAPI(): params = {"illust_id": illust_id} return self._call("/v1/illust/detail", params)["illust"] + def illust_bookmark_detail(self, illust_id): + params = {"illust_id": illust_id} + return self._call( + "/v2/illust/bookmark/detail", params)["bookmark_detail"] + def illust_follow(self, restrict="all"): params = {"restrict": restrict} return self._pagination("/v2/illust/follow", params) @@ -900,9 +912,16 @@ class PixivAppAPI(): return self._pagination("/v1/search/illust", params) def user_bookmarks_illust(self, user_id, tag=None, restrict="public"): + """Return illusts bookmarked by a user""" params = {"user_id": user_id, "tag": tag, "restrict": restrict} return self._pagination("/v1/user/bookmarks/illust", params) + def user_bookmark_tags_illust(self, user_id, restrict="public"): + """Return bookmark tags defined by a user""" + params = {"user_id": user_id, "restrict": restrict} + return self._pagination( + "/v1/user/bookmark-tags/illust", params, "bookmark_tags") + @memcache(keyarg=1) def user_detail(self, user_id): params = {"user_id": user_id} diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index 4283081..c35ee74 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -59,7 +59,7 @@ class PoipikuExtractor(Extractor): "//img.", "//img-org.", 1) yield Message.Url, url, text.nameext_from_url(url, post) - if not extr('> show all', '<'): + if not extr(' show all(+', '<'): continue url = self.root + "/f/ShowAppendFileF.jsp" @@ -79,6 +79,9 @@ class PoipikuExtractor(Extractor): page = self.request( url, method="POST", headers=headers, data=data).json()["html"] + if page.startswith("You need to"): + self.log.warning("'%s'", page) + for thumb in text.extract_iter( page, 'class="IllustItemThumbImg" src="', '"'): post["num"] += 1 @@ -162,6 +165,21 @@ class PoipikuPostExtractor(PoipikuExtractor): "user_name": "wadahito", }, }), + # different warning button style + ("https://poipiku.com/3572553/5776587.html", { + "pattern": r"https://img-org\.poipiku.com/user_img\d+/003572553" + r"/005776587_(\d+_)?\w+\.jpeg$", + "count": 3, + "keyword": { + "count": "3", + "description": "ORANGE OASISボスネタバレ", + "num": int, + "post_category": "SPOILER", + "post_id": "5776587", + "user_id": "3572553", + "user_name": "nagakun", + }, + }), ) def __init__(self, match): diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py new file mode 100644 index 0000000..cac5a54 --- /dev/null +++ b/gallery_dl/extractor/tcbscans.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://onepiecechapters.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text + + +class TcbscansChapterExtractor(ChapterExtractor): + category = "tcbscans" + pattern = (r"(?:https?://)?onepiecechapters\.com" + r"(/chapters/\d+/[^/?#]+)") + root = "https://onepiecechapters.com" + test = ( + (("https://onepiecechapters.com" + "/chapters/4708/chainsaw-man-chapter-108"), { + "pattern": (r"https://cdn\.[^/]+" + r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"), + "count" : 17, + "keyword": { + "manga": "Chainsaw Man", + "chapter": 108, + "chapter_minor": "", + "lang": "en", + "language": "English", + }, + }), + ("https://onepiecechapters.com/chapters/4716/one-piece-chapter-1065", { + "pattern": (r"https://cdn\.[^/]+" + r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"), + "count" : 18, + "keyword": { + "manga": "One Piece", + "chapter": 1065, + "chapter_minor": "", + "lang": "en", + "language": "English", + }, + }), + (("https://onepiecechapters.com/" + "chapters/44/ace-novel-manga-adaptation-chapter-1")), + ) + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, '<img class="fixed-ratio-content" src="', '"') + ] + + def metadata(self, page): + manga, _, chapter = text.extr( + page, 'font-bold mt-8">', "</h1>").rpartition(" - Chapter ") + chapter, sep, minor = chapter.partition(".") + return { + "manga": text.unescape(manga), + "chapter": text.parse_int(chapter), + "chapter_minor": sep + minor, + "lang": "en", "language": "English", + } + + +class TcbscansMangaExtractor(MangaExtractor): + category = "tcbscans" + chapterclass = TcbscansChapterExtractor + pattern = (r"(?:https?://)?onepiecechapters\.com" + r"(/mangas/\d+/[^/?#]+)") + root = "https://onepiecechapters.com" + test = ( + ("https://onepiecechapters.com/mangas/13/chainsaw-man", { + "pattern": TcbscansChapterExtractor.pattern, + "range" : "1-50", + "count" : 50, + }), + ("https://onepiecechapters.com/mangas/4/jujutsu-kaisen", { + "pattern": TcbscansChapterExtractor.pattern, + "range" : "1-50", + "count" : 50, + }), + ("https://onepiecechapters.com/mangas/15/hunter-x-hunter"), + ) + + def chapters(self, page): + data = { + "manga": text.unescape(text.extr( + page, 'class="my-3 font-bold text-3xl">', "</h1>")), + "lang": "en", "language": "English", + } + + results = [] + page = text.extr(page, 'class="col-span-2"', 'class="order-1') + for chapter in text.extract_iter(page, "<a", "</a>"): + url = text.extr(chapter, 'href="', '"') + data["title"] = text.unescape(text.extr( + chapter, 'text-gray-500">', "</div>")) + chapter = text.extr( + chapter, 'font-bold">', "</div>").rpartition(" Chapter ")[2] + chapter, sep, minor = chapter.partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + results.append((self.root + url, data.copy())) + return results diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py index 8e9bf2c..5996268 100644 --- a/gallery_dl/extractor/telegraph.py +++ b/gallery_dl/extractor/telegraph.py @@ -12,7 +12,6 @@ from .. import text class TelegraphGalleryExtractor(GalleryExtractor): """Extractor for articles from telegra.ph""" - category = "telegraph" root = "https://telegra.ph" directory_fmt = ("{category}", "{slug}") @@ -52,6 +51,23 @@ class TelegraphGalleryExtractor(GalleryExtractor): "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg", }, }), + ("https://telegra.ph/Vsyo-o-druzyah-moej-sestricy-05-27", { + "url": "c1f3048e5d94bee53af30a8c27f70b0d3b15438e", + "pattern": r"^https://pith1\.ru/uploads" + r"/posts/2019-12/\d+_\d+\.jpg$", + "keyword": { + "author": "Shotacon - заходи сюда", + "caption": "", + "count": 19, + "date": "dt:2022-05-27 16:17:27", + "description": "", + "num_formatted": r"re:^\d{2}$", + "post_url": "https://telegra.ph" + "/Vsyo-o-druzyah-moej-sestricy-05-27", + "slug": "Vsyo-o-druzyah-moej-sestricy-05-27", + "title": "Всё о друзьях моей сестрицы", + }, + }), ) def metadata(self, page): @@ -79,11 +95,12 @@ class TelegraphGalleryExtractor(GalleryExtractor): result = [] for figure in figures: - src, pos = text.extract(figure, 'src="', '"') - if src.startswith("/embed/"): + url, pos = text.extract(figure, 'src="', '"') + if url.startswith("/embed/"): continue + elif url.startswith("/"): + url = self.root + url caption, pos = text.extract(figure, "<figcaption>", "<", pos) - url = self.root + src num += 1 result.append((url, { diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 22aa78e..c2d8247 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -39,6 +39,7 @@ class TwitterExtractor(Extractor): self.videos = self.config("videos", True) self.cards = self.config("cards", False) self.cards_blacklist = self.config("cards-blacklist") + self.syndication = self.config("syndication") self._user = self._user_obj = None self._user_cache = {} self._init_sizes() @@ -75,11 +76,6 @@ class TwitterExtractor(Extractor): else: data = tweet - if seen_tweets is not None: - if data["id_str"] in seen_tweets: - continue - seen_tweets.add(data["id_str"]) - if not self.retweets and "retweeted_status_id_str" in data: self.log.debug("Skipping %s (retweet)", data["id_str"]) continue @@ -97,6 +93,13 @@ class TwitterExtractor(Extractor): self.log.debug("Skipping %s (reply)", data["id_str"]) continue + if seen_tweets is not None: + if data["id_str"] in seen_tweets: + self.log.debug( + "Skipping %s (previously seen)", data["id_str"]) + continue + seen_tweets.add(data["id_str"]) + files = [] if "extended_entities" in data: self._extract_media( @@ -220,14 +223,16 @@ class TwitterExtractor(Extractor): def _extract_twitpic(self, tweet, files): for url in tweet["entities"].get("urls", ()): url = url["expanded_url"] - if "//twitpic.com/" in url and "/photos/" not in url: - response = self.request(url, fatal=False) - if response.status_code >= 400: - continue - url = text.extr( - response.text, 'name="twitter:image" value="', '"') - if url: - files.append({"url": url}) + if "//twitpic.com/" not in url or "/photos/" in url: + continue + if url.startswith("http:"): + url = "https" + url[4:] + response = self.request(url, fatal=False) + if response.status_code >= 400: + continue + url = text.extr(response.text, 'name="twitter:image" value="', '"') + if url: + files.append({"url": url}) def _transform_tweet(self, tweet): if "author" in tweet: @@ -299,6 +304,9 @@ class TwitterExtractor(Extractor): if "legacy" in user: user = user["legacy"] + elif "statuses_count" not in user and self.syndication == "extended": + # try to fetch extended user data + user = self.api.user_by_screen_name(user["screen_name"])["legacy"] uget = user.get entities = user["entities"] @@ -361,18 +369,22 @@ class TwitterExtractor(Extractor): def _expand_tweets(self, tweets): seen = set() for tweet in tweets: - - if "legacy" in tweet: - cid = tweet["legacy"]["conversation_id_str"] - else: - cid = tweet["conversation_id_str"] - - if cid not in seen: - seen.add(cid) - try: - yield from self.api.tweet_detail(cid) - except Exception: - yield tweet + obj = tweet["legacy"] if "legacy" in tweet else tweet + cid = obj.get("conversation_id_str") + if not cid: + tid = obj["id_str"] + self.log.warning( + "Unable to expand %s (no 'conversation_id')", tid) + continue + if cid in seen: + self.log.debug( + "Skipping expansion of %s (previously seen)", cid) + continue + seen.add(cid) + try: + yield from self.api.tweet_detail(cid) + except Exception: + yield tweet def _make_tweet(self, user, id_str, url, timestamp): return { @@ -772,7 +784,7 @@ class TwitterTweetExtractor(TwitterExtractor): # age-restricted (#2354) ("https://twitter.com/mightbecursed/status/1492954264909479936", { "options": (("syndication", True),), - "keywords": {"date": "dt:2022-02-13 20:10:09"}, + "keyword": {"date": "dt:2022-02-13 20:10:09"}, "count": 1, }), # media alt texts / descriptions (#2617) @@ -991,7 +1003,7 @@ class TwitterAPI(): } self._nsfw_warning = True - self._syndication = extractor.config("syndication") + self._syndication = self.extractor.syndication self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode cookies = extractor.session.cookies @@ -1516,6 +1528,12 @@ class TwitterAPI(): else: retweet_id = None + # assume 'conversation_id' is the same as 'id' when the tweet + # is not a reply + if "conversation_id_str" not in tweet and \ + "in_reply_to_status_id_str" not in tweet: + tweet["conversation_id_str"] = tweet["id_str"] + tweet["created_at"] = text.parse_datetime( tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime( "%a %b %d %H:%M:%S +0000 %Y") diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 9b6831b..5692452 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -110,7 +110,7 @@ class VkPhotosExtractor(VkExtractor): "pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1" r"/[\w-]+\.jpg\?size=\d+x\d+&quality=96&type=album", "count": ">= 35", - "keywords": { + "keyword": { "id": r"re:\d+", "user": { "id": "398982326", @@ -122,12 +122,11 @@ class VkPhotosExtractor(VkExtractor): }), ("https://vk.com/cosplayinrussia", { "range": "15-25", - "keywords": { + "keyword": { "id": r"re:\d+", "user": { "id" : "-165740836", - "info": "Предложка открыта, кидайте ваши косплейчики. При " - "правильном оформлении они будут опубликованы", + "info": str, "name": "cosplayinrussia", "nick": "Косплей | Cosplay 18+", }, diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index 74da615..03fd909 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -111,13 +111,15 @@ class ZerochanTagExtractor(ZerochanExtractor): test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", { "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)", "count": "> 24", - "keywords": { + "keyword": { "extension": r"re:jpg|png", - "file_url": "", - "filename": r"re:Perth.\(Kantai.Collection\).full.\d+", + "file_url": r"re:https://static\.zerochan\.net" + r"/.+\.full\.\d+\.(jpg|png)", + "filename": r"re:(Perth\.\(Kantai\.Collection\)" + r"|Kantai\.Collection)\.full\.\d+", "height": r"re:^\d+$", "id": r"re:^\d+$", - "name": "Perth (Kantai Collection)", + "name": r"re:(Perth \(Kantai Collection\)|Kantai Collection)", "search_tags": "Perth (Kantai Collection)", "size": r"re:^\d+k$", "width": r"re:^\d+$", diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 8a45330..58bf48d 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -60,14 +60,21 @@ class StringFormatter(): - "u": calls str.upper - "c": calls str.capitalize - "C": calls string.capwords - - "j". calls json.dumps + - "g": calls text.slugify() + - "j": calls json.dumps - "t": calls str.strip + - "T": calls util.datetime_to_timestamp_string() - "d": calls text.parse_timestamp - - "U": calls urllib.parse.unescape + - "s": calls str() - "S": calls util.to_string() - - "T": calls util.to_timestamü() + - "U": calls urllib.parse.unescape + - "r": calls repr() + - "a": calls ascii() - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE" + # Go to _CONVERSIONS and _SPECIFIERS below to se all of them, read: + # https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md + Extra Format Specifiers: - "?<before>/<after>/": Adds <before> and <after> to the actual value if it evaluates to True. diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 91e9169..32cac79 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -325,7 +325,7 @@ def build_parser(): configuration.add_argument( "--ignore-config", dest="load_config", action="store_false", - help="Do not read the default configuration files", + help="Do not read default configuration files", ) authentication = parser.add_argument_group("Authentication Options") @@ -349,7 +349,7 @@ def build_parser(): selection.add_argument( "--download-archive", dest="archive", metavar="FILE", action=ConfigAction, - help=("Record all downloaded files in the archive file and " + help=("Record all downloaded or skipped files in FILE and " "skip downloading any file already in it"), ) selection.add_argument( @@ -367,19 +367,20 @@ def build_parser(): selection.add_argument( "--range", dest="image-range", metavar="RANGE", action=ConfigAction, - help=("Index-range(s) specifying which images to download. " - "For example '5-10' or '1,3-5,10-'"), + help=("Index range(s) specifying which files to download. " + "These can be either a constant value, range, or slice " + "(e.g. '5', '8-20', or '1:24:3')"), ) selection.add_argument( "--chapter-range", dest="chapter-range", metavar="RANGE", action=ConfigAction, - help=("Like '--range', but applies to manga-chapters " + help=("Like '--range', but applies to manga chapters " "and other delegated URLs"), ) selection.add_argument( "--filter", dest="image-filter", metavar="EXPR", action=ConfigAction, - help=("Python expression controlling which images to download. " + help=("Python expression controlling which files to download. " "Files for which the expression evaluates to False are ignored. " "Available keys are the filename-specific ones listed by '-K'. " "Example: --filter \"image_width >= 1000 and " @@ -388,7 +389,7 @@ def build_parser(): selection.add_argument( "--chapter-filter", dest="chapter-filter", metavar="EXPR", action=ConfigAction, - help=("Like '--filter', but applies to manga-chapters " + help=("Like '--filter', but applies to manga chapters " "and other delegated URLs"), ) @@ -472,7 +473,7 @@ def build_parser(): dest="postprocessors", metavar="CMD", action=AppendCommandAction, const={"name": "exec"}, help=("Execute CMD for each downloaded file. " - "Example: --exec 'convert {} {}.png && rm {}'"), + "Example: --exec \"convert {} {}.png && rm {}\""), ) postprocessor.add_argument( "--exec-after", @@ -480,7 +481,7 @@ def build_parser(): action=AppendCommandAction, const={ "name": "exec", "event": "finalize"}, help=("Execute CMD after all files were downloaded successfully. " - "Example: --exec-after 'cd {} && convert * ../doc.pdf'"), + "Example: --exec-after \"cd {} && convert * ../doc.pdf\""), ) postprocessor.add_argument( "-P", "--postprocessor", diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 23d5bc8..543fb10 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -714,74 +714,71 @@ def chain_predicates(predicates, url, kwdict): class RangePredicate(): - """Predicate; True if the current index is in the given range""" + """Predicate; True if the current index is in the given range(s)""" + def __init__(self, rangespec): - self.ranges = self.optimize_range(self.parse_range(rangespec)) + self.ranges = ranges = self._parse(rangespec) self.index = 0 - if self.ranges: - self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1] + if ranges: + # technically wrong, but good enough for now + # and evaluating min/max for a large range is slow + self.lower = min(r.start for r in ranges) + self.upper = max(r.stop for r in ranges) - 1 else: - self.lower, self.upper = 0, 0 + self.lower = self.upper = 0 - def __call__(self, url, _): - self.index += 1 + def __call__(self, _url, _kwdict): + self.index = index = self.index + 1 - if self.index > self.upper: + if index > self.upper: raise exception.StopExtraction() - for lower, upper in self.ranges: - if lower <= self.index <= upper: + for range in self.ranges: + if index in range: return True return False @staticmethod - def parse_range(rangespec): + def _parse(rangespec): """Parse an integer range string and return the resulting ranges Examples: - parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)] - parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)] + _parse("-2,4,6-8,10-") -> [(1,3), (4,5), (6,9), (10,INTMAX)] + _parse(" - 3 , 4- 4, 2-6") -> [(1,4), (4,5), (2,7)] + _parse("1:2,4:8:2") -> [(1,1), (4,7,2)] """ ranges = [] + append = ranges.append - for group in rangespec.split(","): + if isinstance(rangespec, str): + rangespec = rangespec.split(",") + + for group in rangespec: if not group: continue - first, sep, last = group.partition("-") - if not sep: - beg = end = int(first) - else: - beg = int(first) if first.strip() else 1 - end = int(last) if last.strip() else sys.maxsize - ranges.append((beg, end) if beg <= end else (end, beg)) - return ranges + elif ":" in group: + start, _, stop = group.partition(":") + stop, _, step = stop.partition(":") + append(range( + int(start) if start.strip() else 1, + int(stop) if stop.strip() else sys.maxsize, + int(step) if step.strip() else 1, + )) + + elif "-" in group: + start, _, stop = group.partition("-") + append(range( + int(start) if start.strip() else 1, + int(stop) + 1 if stop.strip() else sys.maxsize, + )) - @staticmethod - def optimize_range(ranges): - """Simplify/Combine a parsed list of ranges - - Examples: - optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)] - optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)] - """ - if len(ranges) <= 1: - return ranges - - ranges.sort() - riter = iter(ranges) - result = [] + else: + start = int(group) + append(range(start, start+1)) - beg, end = next(riter) - for lower, upper in riter: - if lower > end+1: - result.append((beg, end)) - beg, end = lower, upper - elif upper > end: - end = upper - result.append((beg, end)) - return result + return ranges class UniquePredicate(): @@ -802,6 +799,8 @@ class FilterPredicate(): """Predicate; True if evaluating the given expression returns True""" def __init__(self, expr, target="image"): + if not isinstance(expr, str): + expr = "(" + ") and (".join(expr) + ")" name = "<{} filter>".format(target) self.expr = compile_expression(expr, name) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d832185..5e3b507 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.24.2" +__version__ = "1.24.3" diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py index db313c3..7b71349 100644 --- a/gallery_dl/ytdl.py +++ b/gallery_dl/ytdl.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -199,13 +199,27 @@ def parse_command_line(module, argv): action += args yield action - if getattr(opts, "parse_metadata", None) is None: - opts.parse_metadata = [] - if opts.metafromtitle is not None: - opts.parse_metadata.append("title:%s" % opts.metafromtitle) - opts.metafromtitle = None - opts.parse_metadata = list(itertools.chain.from_iterable(map( - metadataparser_actions, opts.parse_metadata))) + parse_metadata = getattr(opts, "parse_metadata", None) + if isinstance(parse_metadata, dict): + if opts.metafromtitle is not None: + if "pre_process" not in parse_metadata: + parse_metadata["pre_process"] = [] + parse_metadata["pre_process"].append( + "title:%s" % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain.from_iterable(map( + metadataparser_actions, v))) + for k, v in parse_metadata.items() + } + else: + if parse_metadata is None: + parse_metadata = [] + if opts.metafromtitle is not None: + parse_metadata.append("title:%s" % opts.metafromtitle) + opts.parse_metadata = list(itertools.chain.from_iterable(map( + metadataparser_actions, parse_metadata))) + + opts.metafromtitle = None else: opts.parse_metadata = () diff --git a/test/test_extractor.py b/test/test_extractor.py index de43ff7..144c6f9 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -96,9 +96,10 @@ class TestExtractorModule(unittest.TestCase): test_urls = [] # collect testcase URLs + append = test_urls.append for extr in extractor.extractors(): for testcase in extr._get_tests(): - test_urls.append((testcase[0], extr)) + append((testcase[0], extr)) # iterate over all testcase URLs for url, extr1 in test_urls: @@ -114,20 +115,23 @@ class TestExtractorModule(unittest.TestCase): match = extr2.pattern.match(url) if match: - matches.append(match) + matches.append((match, extr2)) # fail if more or less than 1 match happened if len(matches) > 1: msg = "'{}' gets matched by more than one pattern:".format(url) - for match in matches: - msg += "\n- " - msg += match.re.pattern + for match, extr in matches: + msg += "\n\n- {}:\n{}".format( + extr.__name__, match.re.pattern) self.fail(msg) - if len(matches) < 1: + elif len(matches) < 1: msg = "'{}' isn't matched by any pattern".format(url) self.fail(msg) + else: + self.assertIs(extr1, matches[0][1], url) + def test_docstrings(self): """ensure docstring uniqueness""" for extr1 in extractor.extractors(): diff --git a/test/test_util.py b/test/test_util.py index 4b8f9ae..67fdf60 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -24,39 +24,62 @@ from gallery_dl import util, text, exception # noqa E402 class TestRange(unittest.TestCase): - def test_parse_range(self, f=util.RangePredicate.parse_range): - self.assertEqual( - f(""), - []) - self.assertEqual( - f("1-2"), - [(1, 2)]) + def test_parse_empty(self, f=util.RangePredicate._parse): + self.assertEqual(f(""), []) + self.assertEqual(f([]), []) + + def test_parse_digit(self, f=util.RangePredicate._parse): + self.assertEqual(f("2"), [range(2, 3)]) + self.assertEqual( - f("-"), - [(1, sys.maxsize)]) + f("2, 3, 4"), + [range(2, 3), + range(3, 4), + range(4, 5)], + ) + + def test_parse_range(self, f=util.RangePredicate._parse): + self.assertEqual(f("1-2"), [range(1, 3)]) + self.assertEqual(f("2-"), [range(2, sys.maxsize)]) + self.assertEqual(f("-3"), [range(1, 4)]) + self.assertEqual(f("-"), [range(1, sys.maxsize)]) + self.assertEqual( f("-2,4,6-8,10-"), - [(1, 2), (4, 4), (6, 8), (10, sys.maxsize)]) + [range(1, 3), + range(4, 5), + range(6, 9), + range(10, sys.maxsize)], + ) self.assertEqual( f(" - 3 , 4- 4, 2-6"), - [(1, 3), (4, 4), (2, 6)]) + [range(1, 4), + range(4, 5), + range(2, 7)], + ) + + def test_parse_slice(self, f=util.RangePredicate._parse): + self.assertEqual(f("2:4") , [range(2, 4)]) + self.assertEqual(f("3::") , [range(3, sys.maxsize)]) + self.assertEqual(f(":4:") , [range(1, 4)]) + self.assertEqual(f("::5") , [range(1, sys.maxsize, 5)]) + self.assertEqual(f("::") , [range(1, sys.maxsize)]) + self.assertEqual(f("2:3:4"), [range(2, 3, 4)]) - def test_optimize_range(self, f=util.RangePredicate.optimize_range): - self.assertEqual( - f([]), - []) - self.assertEqual( - f([(2, 4)]), - [(2, 4)]) - self.assertEqual( - f([(2, 4), (6, 8), (10, 12)]), - [(2, 4), (6, 8), (10, 12)]) self.assertEqual( - f([(2, 4), (4, 6), (5, 8)]), - [(2, 8)]) + f("2:4, 4:, :4, :4:, ::4"), + [range(2, 4), + range(4, sys.maxsize), + range(1, 4), + range(1, 4), + range(1, sys.maxsize, 4)], + ) self.assertEqual( - f([(1, 1), (2, 2), (3, 6), (8, 9)]), - [(1, 6), (8, 9)]) + f(" : 3 , 4: 4, 2:6"), + [range(1, 3), + range(4, 4), + range(2, 6)], + ) class TestPredicate(unittest.TestCase): @@ -68,7 +91,7 @@ class TestPredicate(unittest.TestCase): for i in range(6): self.assertTrue(pred(dummy, dummy)) with self.assertRaises(exception.StopExtraction): - bool(pred(dummy, dummy)) + pred(dummy, dummy) pred = util.RangePredicate("1, 3, 5") self.assertTrue(pred(dummy, dummy)) @@ -77,11 +100,11 @@ class TestPredicate(unittest.TestCase): self.assertFalse(pred(dummy, dummy)) self.assertTrue(pred(dummy, dummy)) with self.assertRaises(exception.StopExtraction): - bool(pred(dummy, dummy)) + pred(dummy, dummy) pred = util.RangePredicate("") with self.assertRaises(exception.StopExtraction): - bool(pred(dummy, dummy)) + pred(dummy, dummy) def test_unique_predicate(self): dummy = None @@ -116,6 +139,14 @@ class TestPredicate(unittest.TestCase): with self.assertRaises(exception.FilterError): util.FilterPredicate("b > 1")(url, {"a": 2}) + pred = util.FilterPredicate(["a < 3", "b < 4", "c < 5"]) + self.assertTrue(pred(url, {"a": 2, "b": 3, "c": 4})) + self.assertFalse(pred(url, {"a": 3, "b": 3, "c": 4})) + self.assertFalse(pred(url, {"a": 2, "b": 4, "c": 4})) + self.assertFalse(pred(url, {"a": 2, "b": 3, "c": 5})) + with self.assertRaises(exception.FilterError): + pred(url, {"a": 2}) + def test_build_predicate(self): pred = util.build_predicate([]) self.assertIsInstance(pred, type(lambda: True)) diff --git a/test/test_ytdl.py b/test/test_ytdl.py index eedb4f9..a273604 100644 --- a/test/test_ytdl.py +++ b/test/test_ytdl.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -262,11 +262,21 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments): def test_metadata_from_title(self): opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"]) + + try: + legacy = (self.module.version.__version__ < "2023.01.01") + except AttributeError: + legacy = True + + actions = [self.module.MetadataFromFieldPP.to_action( + "title:%(artist)s - %(title)s")] + if not legacy: + actions = {"pre_process": actions} + self.assertEqual(opts["postprocessors"][0], { - "key": "MetadataParser", - "when": "pre_process", - "actions": [self.module.MetadataFromFieldPP.to_action( - "title:%(artist)s - %(title)s")], + "key" : "MetadataParser", + "when" : "pre_process", + "actions": actions, }) |
