From 7bc30b43b70556630b4a93c03fefc0d888e3d19f Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Thu, 30 Dec 2021 01:56:41 -0500 Subject: New upstream version 1.20.0. --- CHANGELOG.md | 39 ++ PKG-INFO | 698 +++++++++++++++++----------------- README.rst | 7 +- data/completion/_gallery-dl | 11 +- data/completion/gallery-dl | 4 +- data/man/gallery-dl.1 | 21 +- data/man/gallery-dl.conf.5 | 75 +++- docs/gallery-dl.conf | 6 +- gallery_dl.egg-info/PKG-INFO | 698 +++++++++++++++++----------------- gallery_dl.egg-info/SOURCES.txt | 7 +- gallery_dl/__init__.py | 36 +- gallery_dl/downloader/ytdl.py | 2 +- gallery_dl/extractor/2chan.py | 2 +- gallery_dl/extractor/500px.py | 169 +++++++- gallery_dl/extractor/__init__.py | 3 + gallery_dl/extractor/artstation.py | 2 +- gallery_dl/extractor/blogger.py | 2 +- gallery_dl/extractor/common.py | 11 +- gallery_dl/extractor/cyberdrop.py | 23 +- gallery_dl/extractor/deviantart.py | 60 +-- gallery_dl/extractor/exhentai.py | 16 + gallery_dl/extractor/fanbox.py | 23 +- gallery_dl/extractor/fantia.py | 2 +- gallery_dl/extractor/flickr.py | 2 +- gallery_dl/extractor/furaffinity.py | 7 + gallery_dl/extractor/generic.py | 208 ++++++++++ gallery_dl/extractor/hitomi.py | 37 +- gallery_dl/extractor/imgbb.py | 2 +- gallery_dl/extractor/inkbunny.py | 22 ++ gallery_dl/extractor/instagram.py | 31 +- gallery_dl/extractor/keenspot.py | 2 +- gallery_dl/extractor/kemonoparty.py | 43 ++- gallery_dl/extractor/lolisafe.py | 79 ++++ gallery_dl/extractor/myportfolio.py | 4 +- gallery_dl/extractor/newgrounds.py | 4 +- gallery_dl/extractor/patreon.py | 2 +- gallery_dl/extractor/philomena.py | 2 +- gallery_dl/extractor/photobucket.py | 10 +- gallery_dl/extractor/pixiv.py | 15 +- gallery_dl/extractor/pixnet.py | 2 +- gallery_dl/extractor/pornhub.py | 2 +- gallery_dl/extractor/rule34us.py | 130 +++++++ gallery_dl/extractor/sexcom.py | 9 +- gallery_dl/extractor/slickpic.py | 2 +- gallery_dl/extractor/smugmug.py | 2 +- gallery_dl/extractor/tumblr.py | 2 +- gallery_dl/extractor/tumblrgallery.py | 115 +++--- gallery_dl/extractor/twitter.py | 2 +- gallery_dl/extractor/wordpress.py | 41 ++ gallery_dl/extractor/xhamster.py | 2 +- gallery_dl/extractor/ytdl.py | 10 +- gallery_dl/option.py | 52 ++- gallery_dl/output.py | 30 +- gallery_dl/path.py | 7 +- gallery_dl/util.py | 20 +- gallery_dl/version.py | 2 +- gallery_dl/ytdl.py | 41 +- test/test_results.py | 31 +- test/test_util.py | 25 ++ test/test_ytdl.py | 545 ++++++++++++++++++++++++++ 60 files changed, 2478 insertions(+), 981 deletions(-) create mode 100644 gallery_dl/extractor/generic.py create mode 100644 gallery_dl/extractor/lolisafe.py create mode 100644 gallery_dl/extractor/rule34us.py create mode 100644 gallery_dl/extractor/wordpress.py create mode 100644 test/test_ytdl.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 16e843f..1dc4a21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,44 @@ # Changelog +## 1.20.0 - 2021-12-29 +### Additions +- [500px] add `favorite` extractor ([#1927](https://github.com/mikf/gallery-dl/issues/1927)) +- [exhentai] add `source` option +- [fanbox] support pixiv redirects ([#2122](https://github.com/mikf/gallery-dl/issues/2122)) +- [inkbunny] add `search` extractor ([#2094](https://github.com/mikf/gallery-dl/issues/2094)) +- [kemonoparty] support coomer.party ([#2100](https://github.com/mikf/gallery-dl/issues/2100)) +- [lolisafe] add generic album extractor for lolisafe/chibisafe instances ([#2038](https://github.com/mikf/gallery-dl/issues/2038), [#2105](https://github.com/mikf/gallery-dl/issues/2105)) +- [rule34us] add `tag` and `post` extractors ([#1527](https://github.com/mikf/gallery-dl/issues/1527)) +- add a generic extractor ([#735](https://github.com/mikf/gallery-dl/issues/735), [#683](https://github.com/mikf/gallery-dl/issues/683)) +- add `-d/--directory` and `-f/--filename` command-line options +- add `--sleep-request` and `--sleep-extractor` command-line options +- allow specifying `sleep-*` options as string +### Changes +- [cyberdrop] include file ID in default filenames +- [hitomi] disable `metadata` by default +- [kemonoparty] use `service` as subcategory ([#2147](https://github.com/mikf/gallery-dl/issues/2147)) +- [kemonoparty] change default `files` order to `attachments,file,inline` ([#1991](https://github.com/mikf/gallery-dl/issues/1991)) +- [output] write download progress indicator to stderr +- [ytdl] prefer yt-dlp over youtube-dl ([#1850](https://github.com/mikf/gallery-dl/issues/1850), [#2028](https://github.com/mikf/gallery-dl/issues/2028)) +- rename `--write-infojson` to `--write-info-json` +### Fixes +- [500px] create directories per photo +- [artstation] create directories per asset ([#2136](https://github.com/mikf/gallery-dl/issues/2136)) +- [deviantart] use `/browse/newest` for most-recent searches ([#2096](https://github.com/mikf/gallery-dl/issues/2096)) +- [hitomi] fix image URLs +- [instagram] fix error when PostPage data is not in GraphQL format ([#2037](https://github.com/mikf/gallery-dl/issues/2037)) +- [instagran] match post URLs with usernames ([#2085](https://github.com/mikf/gallery-dl/issues/2085)) +- [instagram] allow downloading specific stories ([#2088](https://github.com/mikf/gallery-dl/issues/2088)) +- [furaffinity] warn when no session cookies were found +- [pixiv] respect date ranges in search URLs ([#2133](https://github.com/mikf/gallery-dl/issues/2133)) +- [sexcom] fix and improve embed extraction ([#2145](https://github.com/mikf/gallery-dl/issues/2145)) +- [tumblrgallery] fix extraction ([#2112](https://github.com/mikf/gallery-dl/issues/2112)) +- [tumblrgallery] improve `id` extraction ([#2115](https://github.com/mikf/gallery-dl/issues/2115)) +- [tumblrgallery] improve search pagination ([#2132](https://github.com/mikf/gallery-dl/issues/2132)) +- [twitter] include `4096x4096` as a default image fallback ([#1881](https://github.com/mikf/gallery-dl/issues/1881), [#2107](https://github.com/mikf/gallery-dl/issues/2107)) +- [ytdl] update argument parsing to latest yt-dlp changes ([#2124](https://github.com/mikf/gallery-dl/issues/2124)) +- handle UNC paths ([#2113](https://github.com/mikf/gallery-dl/issues/2113)) + ## 1.19.3 - 2021-11-27 ### Additions - [dynastyscans] add `manga` extractor ([#2035](https://github.com/mikf/gallery-dl/issues/2035)) diff --git a/PKG-INFO b/PKG-INFO index b758e4c..08b652d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.19.3 +Version: 1.20.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -9,352 +9,6 @@ Maintainer: Mike Fährmann Maintainer-email: mike_faehrmann@web.de License: GPLv2 Download-URL: https://github.com/mikf/gallery-dl/releases/latest -Description: ========== - gallery-dl - ========== - - *gallery-dl* is a command-line program to download image galleries and - collections from several image hosting sites (see `Supported Sites`_). - It is a cross-platform tool with many configuration options - and powerful `filenaming capabilities `_. - - - |pypi| |build| |gitter| - - .. contents:: - - - Dependencies - ============ - - - Python_ 3.4+ - - Requests_ - - Optional - -------- - - - FFmpeg_: Pixiv Ugoira to WebM conversion - - youtube-dl_: Video downloads - - - Installation - ============ - - - Pip - --- - - The stable releases of *gallery-dl* are distributed on PyPI_ and can be - easily installed or upgraded using pip_: - - .. code:: bash - - $ python3 -m pip install -U gallery-dl - - Installing the latest dev version directly from GitHub can be done with - pip_ as well: - - .. code:: bash - - $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz - - Note: Windows users should use :code:`py -3` instead of :code:`python3`. - - It is advised to use the latest version of pip_, - including the essential packages :code:`setuptools` and :code:`wheel`. - To ensure these packages are up-to-date, run - - .. code:: bash - - $ python3 -m pip install --upgrade pip setuptools wheel - - - Standalone Executable - --------------------- - - Prebuilt executable files with a Python interpreter and - required Python packages included are available for - - - `Windows `__ - - `Linux `__ - - | Executables build from the latest commit can be found at - | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml - - - Snap - ---- - - Linux users that are using a distro that is supported by Snapd_ can install *gallery-dl* from the Snap Store: - - .. code:: bash - - $ snap install gallery-dl - - - Chocolatey - ---------- - - Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: - - .. code:: powershell - - $ choco install gallery-dl - - - Scoop - ----- - - *gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: - - .. code:: powershell - - $ scoop install gallery-dl - - - Usage - ===== - - To use *gallery-dl* simply call it with the URLs you wish to download images - from: - - .. code:: bash - - $ gallery-dl [OPTION]... URL... - - See also :code:`gallery-dl --help`. - - - Examples - -------- - - Download images; in this case from danbooru via tag search for 'bonocho': - - .. code:: bash - - $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho" - - - Get the direct URL of an image from a site that requires authentication: - - .. code:: bash - - $ gallery-dl -g -u "" -p "" "https://seiga.nicovideo.jp/seiga/im3211703" - - - Filter manga chapters by language and chapter number: - - .. code:: bash - - $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/" - - - | Search a remote resource for URLs and download images from them: - | (URLs for which no extractor can be found will be silently ignored) - - .. code:: bash - - $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT" - - - If a site's address is nonstandard for its extractor, you can prefix the URL with the - extractor's name to force the use of a specific extractor: - - .. code:: bash - - $ gallery-dl "tumblr:https://sometumblrblog.example" - - - Configuration - ============= - - Configuration files for *gallery-dl* use a JSON-based file format. - - | For a (more or less) complete example with options set to their default values, - see gallery-dl.conf_. - | For a configuration file example with more involved settings and options, - see gallery-dl-example.conf_. - | A list of all available configuration options and their - descriptions can be found in configuration.rst_. - | - - *gallery-dl* searches for configuration files in the following places: - - Windows: - * ``%APPDATA%\gallery-dl\config.json`` - * ``%USERPROFILE%\gallery-dl\config.json`` - * ``%USERPROFILE%\gallery-dl.conf`` - - (``%USERPROFILE%`` usually refers to the user's home directory, - i.e. ``C:\Users\\``) - - Linux, macOS, etc.: - * ``/etc/gallery-dl.conf`` - * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` - * ``${HOME}/.config/gallery-dl/config.json`` - * ``${HOME}/.gallery-dl.conf`` - - Values in later configuration files will override previous ones. - - Command line options will override all related settings in the configuration file(s), - e.g. using ``--write-metadata`` will enable writing metadata using the default values - for all ``postprocessors.metadata.*`` settings, overriding any specific settings in - configuration files. - - - Authentication - ============== - - Username & Password - ------------------- - - Some extractors require you to provide valid login credentials in the form of - a username & password pair. This is necessary for - ``nijie`` and ``seiga`` - and optional for - ``aryion``, - ``danbooru``, - ``e621``, - ``exhentai``, - ``idolcomplex``, - ``imgbb``, - ``inkbunny``, - ``instagram``, - ``mangadex``, - ``mangoxo``, - ``pillowfort``, - ``sankaku``, - ``subscribestar``, - ``tapas``, - ``tsumino``, - and ``twitter``. - - You can set the necessary information in your configuration file - (cf. gallery-dl.conf_) - - .. code:: json - - { - "extractor": { - "seiga": { - "username": "", - "password": "" - } - } - } - - or you can provide them directly via the - :code:`-u/--username` and :code:`-p/--password` or via the - :code:`-o/--option` command-line options - - .. code:: bash - - $ gallery-dl -u -p URL - $ gallery-dl -o username= -o password= URL - - - Cookies - ------- - - For sites where login with username & password is not possible due to - CAPTCHA or similar, or has not been implemented yet, you can use the - cookies from a browser login session and input them into *gallery-dl*. - - This can be done via the - `cookies `__ - option in your configuration file by specifying - - - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon - | (e.g. `Get cookies.txt `__ for Chrome, - `Export Cookies `__ for Firefox) - - - | a list of name-value pairs gathered from your browser's web developer tools - | (in `Chrome `__, - in `Firefox `__) - - For example: - - .. code:: json - - { - "extractor": { - "instagram": { - "cookies": "$HOME/path/to/cookies.txt" - }, - "patreon": { - "cookies": { - "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" - } - } - } - } - - You can also specify a cookies.txt file with - the :code:`--cookies` command-line option: - - .. code:: bash - - $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL - - - OAuth - ----- - - *gallery-dl* supports user authentication via OAuth_ for - ``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, - and ``mastodon`` instances. - This is mostly optional, but grants *gallery-dl* the ability - to issue requests on your account's behalf and enables it to access resources - which would otherwise be unavailable to a public user. - - To link your account to *gallery-dl*, start by invoking it with - ``oauth:`` as an argument. For example: - - .. code:: bash - - $ gallery-dl oauth:flickr - - You will be sent to the site's authorization page and asked to grant read - access to *gallery-dl*. Authorize it and you will be shown one or more - "tokens", which should be added to your configuration file. - - To authenticate with a ``mastodon`` instance, run *gallery-dl* with - ``oauth:mastodon:`` as argument. For example: - - .. code:: bash - - $ gallery-dl oauth:mastodon:pawoo.net - $ gallery-dl oauth:mastodon:https://mastodon.social/ - - - - .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf - .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf - .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst - .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md - .. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md - - .. _Python: https://www.python.org/downloads/ - .. _PyPI: https://pypi.org/ - .. _pip: https://pip.pypa.io/en/stable/ - .. _Requests: https://requests.readthedocs.io/en/master/ - .. _FFmpeg: https://www.ffmpeg.org/ - .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ - .. _pyOpenSSL: https://pyopenssl.org/ - .. _Snapd: https://docs.snapcraft.io/installing-snapd - .. _OAuth: https://en.wikipedia.org/wiki/OAuth - .. _Chocolatey: https://chocolatey.org/install - .. _Scoop: https://scoop.sh - - .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg - :target: https://pypi.org/project/gallery-dl/ - - .. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg - :target: https://github.com/mikf/gallery-dl/actions - - .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg - :target: https://gitter.im/gallery-dl/main - Keywords: image gallery downloader crawler scraper Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable @@ -376,3 +30,353 @@ Classifier: Topic :: Multimedia :: Graphics Classifier: Topic :: Utilities Requires-Python: >=3.4 Provides-Extra: video +License-File: LICENSE + +========== +gallery-dl +========== + +*gallery-dl* is a command-line program to download image galleries and +collections from several image hosting sites (see `Supported Sites`_). +It is a cross-platform tool with many configuration options +and powerful `filenaming capabilities `_. + + +|pypi| |build| |gitter| + +.. contents:: + + +Dependencies +============ + +- Python_ 3.4+ +- Requests_ + +Optional +-------- + +- FFmpeg_: Pixiv Ugoira to WebM conversion +- yt-dlp_ or youtube-dl_: Video downloads + + +Installation +============ + + +Pip +--- + +The stable releases of *gallery-dl* are distributed on PyPI_ and can be +easily installed or upgraded using pip_: + +.. code:: bash + + $ python3 -m pip install -U gallery-dl + +Installing the latest dev version directly from GitHub can be done with +pip_ as well: + +.. code:: bash + + $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz + +Note: Windows users should use :code:`py -3` instead of :code:`python3`. + +It is advised to use the latest version of pip_, +including the essential packages :code:`setuptools` and :code:`wheel`. +To ensure these packages are up-to-date, run + +.. code:: bash + + $ python3 -m pip install --upgrade pip setuptools wheel + + +Standalone Executable +--------------------- + +Prebuilt executable files with a Python interpreter and +required Python packages included are available for + +- `Windows `__ +- `Linux `__ + +| Executables build from the latest commit can be found at +| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml + + +Snap +---- + +Linux users that are using a distro that is supported by Snapd_ can install *gallery-dl* from the Snap Store: + +.. code:: bash + + $ snap install gallery-dl + + +Chocolatey +---------- + +Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: + +.. code:: powershell + + $ choco install gallery-dl + + +Scoop +----- + +*gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: + +.. code:: powershell + + $ scoop install gallery-dl + + +Usage +===== + +To use *gallery-dl* simply call it with the URLs you wish to download images +from: + +.. code:: bash + + $ gallery-dl [OPTION]... URL... + +See also :code:`gallery-dl --help`. + + +Examples +-------- + +Download images; in this case from danbooru via tag search for 'bonocho': + +.. code:: bash + + $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho" + + +Get the direct URL of an image from a site that requires authentication: + +.. code:: bash + + $ gallery-dl -g -u "" -p "" "https://seiga.nicovideo.jp/seiga/im3211703" + + +Filter manga chapters by language and chapter number: + +.. code:: bash + + $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/" + + +| Search a remote resource for URLs and download images from them: +| (URLs for which no extractor can be found will be silently ignored) + +.. code:: bash + + $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT" + + +If a site's address is nonstandard for its extractor, you can prefix the URL with the +extractor's name to force the use of a specific extractor: + +.. code:: bash + + $ gallery-dl "tumblr:https://sometumblrblog.example" + + +Configuration +============= + +Configuration files for *gallery-dl* use a JSON-based file format. + +| For a (more or less) complete example with options set to their default values, + see gallery-dl.conf_. +| For a configuration file example with more involved settings and options, + see gallery-dl-example.conf_. +| A list of all available configuration options and their + descriptions can be found in configuration.rst_. +| + +*gallery-dl* searches for configuration files in the following places: + +Windows: + * ``%APPDATA%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl.conf`` + + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\\``) + +Linux, macOS, etc.: + * ``/etc/gallery-dl.conf`` + * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` + * ``${HOME}/.config/gallery-dl/config.json`` + * ``${HOME}/.gallery-dl.conf`` + +Values in later configuration files will override previous ones. + +Command line options will override all related settings in the configuration file(s), +e.g. using ``--write-metadata`` will enable writing metadata using the default values +for all ``postprocessors.metadata.*`` settings, overriding any specific settings in +configuration files. + + +Authentication +============== + +Username & Password +------------------- + +Some extractors require you to provide valid login credentials in the form of +a username & password pair. This is necessary for +``nijie`` and ``seiga`` +and optional for +``aryion``, +``danbooru``, +``e621``, +``exhentai``, +``idolcomplex``, +``imgbb``, +``inkbunny``, +``instagram``, +``mangadex``, +``mangoxo``, +``pillowfort``, +``sankaku``, +``subscribestar``, +``tapas``, +``tsumino``, +and ``twitter``. + +You can set the necessary information in your configuration file +(cf. gallery-dl.conf_) + +.. code:: json + + { + "extractor": { + "seiga": { + "username": "", + "password": "" + } + } + } + +or you can provide them directly via the +:code:`-u/--username` and :code:`-p/--password` or via the +:code:`-o/--option` command-line options + +.. code:: bash + + $ gallery-dl -u -p URL + $ gallery-dl -o username= -o password= URL + + +Cookies +------- + +For sites where login with username & password is not possible due to +CAPTCHA or similar, or has not been implemented yet, you can use the +cookies from a browser login session and input them into *gallery-dl*. + +This can be done via the +`cookies `__ +option in your configuration file by specifying + +- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon + | (e.g. `Get cookies.txt `__ for Chrome, + `Export Cookies `__ for Firefox) + +- | a list of name-value pairs gathered from your browser's web developer tools + | (in `Chrome `__, + in `Firefox `__) + +For example: + +.. code:: json + + { + "extractor": { + "instagram": { + "cookies": "$HOME/path/to/cookies.txt" + }, + "patreon": { + "cookies": { + "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" + } + } + } + } + +You can also specify a cookies.txt file with +the :code:`--cookies` command-line option: + +.. code:: bash + + $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + + +OAuth +----- + +*gallery-dl* supports user authentication via OAuth_ for +``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, +and ``mastodon`` instances. +This is mostly optional, but grants *gallery-dl* the ability +to issue requests on your account's behalf and enables it to access resources +which would otherwise be unavailable to a public user. + +To link your account to *gallery-dl*, start by invoking it with +``oauth:`` as an argument. For example: + +.. code:: bash + + $ gallery-dl oauth:flickr + +You will be sent to the site's authorization page and asked to grant read +access to *gallery-dl*. Authorize it and you will be shown one or more +"tokens", which should be added to your configuration file. + +To authenticate with a ``mastodon`` instance, run *gallery-dl* with +``oauth:mastodon:`` as argument. For example: + +.. code:: bash + + $ gallery-dl oauth:mastodon:pawoo.net + $ gallery-dl oauth:mastodon:https://mastodon.social/ + + + +.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf +.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf +.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst +.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md +.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md + +.. _Python: https://www.python.org/downloads/ +.. _PyPI: https://pypi.org/ +.. _pip: https://pip.pypa.io/en/stable/ +.. _Requests: https://requests.readthedocs.io/en/master/ +.. _FFmpeg: https://www.ffmpeg.org/ +.. _yt-dlp: https://github.com/yt-dlp/yt-dlp +.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ +.. _pyOpenSSL: https://pyopenssl.org/ +.. _Snapd: https://docs.snapcraft.io/installing-snapd +.. _OAuth: https://en.wikipedia.org/wiki/OAuth +.. _Chocolatey: https://chocolatey.org/install +.. _Scoop: https://scoop.sh + +.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg + :target: https://pypi.org/project/gallery-dl/ + +.. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg + :target: https://github.com/mikf/gallery-dl/actions + +.. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg + :target: https://gitter.im/gallery-dl/main + + diff --git a/README.rst b/README.rst index 72f7c82..c8b7afd 100644 --- a/README.rst +++ b/README.rst @@ -23,7 +23,7 @@ Optional -------- - FFmpeg_: Pixiv Ugoira to WebM conversion -- youtube-dl_: Video downloads +- yt-dlp_ or youtube-dl_: Video downloads Installation @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -328,6 +328,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _pip: https://pip.pypa.io/en/stable/ .. _Requests: https://requests.readthedocs.io/en/master/ .. _FFmpeg: https://www.ffmpeg.org/ +.. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 22a5f25..2ac93f7 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -7,8 +7,10 @@ local rc=1 _arguments -C -S \ {-h,--help}'[Print this help message and exit]' \ --version'[Print program version and exit]' \ -{-d,--dest}'[Destination directory]':'':_files \ +--dest'[==SUPPRESS==]':'':_files \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'':_files \ +{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'' \ +{-d,--directory}'[Target location for file downloads]':'' \ --cookies'[File to load additional cookies from]':'':_files \ --proxy'[Use the specified proxy]':'' \ --clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'' \ @@ -28,7 +30,9 @@ _arguments -C -S \ {-r,--limit-rate}'[Maximum download rate (e.g. 500k or 2.5M)]':'' \ {-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'' \ --http-timeout'[Timeout for HTTP connections (default: 30.0)]':'' \ ---sleep'[Number of seconds to sleep before each download]':'' \ +--sleep'[Number of seconds to wait before each download. This can be either a constant value or a range (e.g. 2.7 or 2.0-3.5)]':'' \ +--sleep-request'[Number of seconds to wait between HTTP requests during data extraction]':'' \ +--sleep-extractor'[Number of seconds to wait before starting data extraction for an input URL]':'' \ --filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'' \ --filesize-max'[Do not download files larger than SIZE (e.g. 500k or 2.5M)]':'' \ --no-part'[Do not use .part files]' \ @@ -54,7 +58,8 @@ _arguments -C -S \ --ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \ --ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \ --write-metadata'[Write metadata to separate JSON files]' \ ---write-infojson'[Write gallery metadata to a info.json file]' \ +--write-info-json'[Write gallery metadata to a info.json file]' \ +--write-infojson'[==SUPPRESS==]' \ --write-tags'[Write image tags to separate text files]' \ --mtime-from-date'[Set file modification times according to "date" metadata]' \ --exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index c2ef896..4085bb9 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -7,10 +7,10 @@ _gallery_dl() if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--download-archive)$ ]]; then COMPREPLY=( $(compgen -f -- "${cur}") ) - elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then + elif [[ "${prev}" =~ ^(--dest)$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --filename --directory --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index e7741ef..a7f51a7 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-11-27" "1.19.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-12-29" "1.20.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -23,12 +23,15 @@ Print this help message and exit .B "\-\-version" Print program version and exit .TP -.B "\-d, \-\-dest" \f[I]DEST\f[] -Destination directory -.TP .B "\-i, \-\-input\-file" \f[I]FILE\f[] Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified .TP +.B "\-f, \-\-filename" \f[I]FORMAT\f[] +Filename format string for downloaded files ('/O' for "original" filenames) +.TP +.B "\-d, \-\-directory" \f[I]PATH\f[] +Target location for file downloads +.TP .B "\-\-cookies" \f[I]FILE\f[] File to load additional cookies from .TP @@ -87,7 +90,13 @@ Maximum number of retries for failed HTTP requests or -1 for infinite retries (d Timeout for HTTP connections (default: 30.0) .TP .B "\-\-sleep" \f[I]SECONDS\f[] -Number of seconds to sleep before each download +Number of seconds to wait before each download. This can be either a constant value or a range (e.g. 2.7 or 2.0-3.5) +.TP +.B "\-\-sleep\-request" \f[I]SECONDS\f[] +Number of seconds to wait between HTTP requests during data extraction +.TP +.B "\-\-sleep\-extractor" \f[I]SECONDS\f[] +Number of seconds to wait before starting data extraction for an input URL .TP .B "\-\-filesize\-min" \f[I]SIZE\f[] Do not download files smaller than SIZE (e.g. 500k or 2.5M) @@ -161,7 +170,7 @@ Convert Pixiv Ugoira to WebM in VP9 lossless mode .B "\-\-write\-metadata" Write metadata to separate JSON files .TP -.B "\-\-write\-infojson" +.B "\-\-write\-info\-json" Write gallery metadata to a info.json file .TP .B "\-\-write\-tags" diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 09d2820..a574625 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-11-27" "1.19.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-12-29" "1.20.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1235,18 +1235,6 @@ or whenever your \f[I]cache file\f[] is deleted or cleared. Minimum wait time in seconds before API requests. -.SS extractor.exhentai.limits -.IP "Type:" 6 -\f[I]integer\f[] - -.IP "Default:" 9 -\f[I]null\f[] - -.IP "Description:" 4 -Sets a custom image download limit and -stops extraction when it gets exceeded. - - .SS extractor.exhentai.domain .IP "Type:" 6 \f[I]string\f[] @@ -1264,6 +1252,18 @@ depending on the input URL * \f[I]"exhentai.org"\f[]: Use \f[I]exhentai.org\f[] for all URLs +.SS extractor.exhentai.limits +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Sets a custom image download limit and +stops extraction when it gets exceeded. + + .SS extractor.exhentai.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -1290,6 +1290,20 @@ Makes \f[I]date\f[] and \f[I]filesize\f[] more precise. Download full-sized original images if available. +.SS extractor.exhentai.source +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"gallery"\f[] + +.IP "Description:" 4 +Selects an alternative source to download files from. + +.br +* \f[I]"hitomi"\f[]: Download the corresponding gallery from \f[I]hitomi.la\f[] + + .SS extractor.fanbox.embeds .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1399,6 +1413,18 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.generic.enabled +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Match **all** URLs not otherwise supported by gallery-dl, +even ones without a \f[I]generic:\f[] prefix. + + .SS extractor.gfycat.format .IP "Type:" 6 .br @@ -1446,7 +1472,7 @@ You can use \f[I]"all"\f[] instead of listing all values separately. \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Try to extract @@ -1545,7 +1571,7 @@ Extract a user's direct messages as \f[I]dms\f[] metadata. \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 -\f[I]["file", "attachments", "inline"]\f[] +\f[I]["attachments", "file", "inline"]\f[] .IP "Description:" 4 Determines the type and order of files to be downloaded. @@ -2287,7 +2313,7 @@ Fetch media from all Tweets and replies in a \f[I]conversation \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 -\f[I]["orig", "large", "medium", "small"]\f[] +\f[I]["orig", "4096x4096", "large", "medium", "small"]\f[] .IP "Description:" 4 The image version to download. @@ -2566,11 +2592,14 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in \f[I]string\f[] .IP "Default:" 9 -\f[I]"youtube_dl"\f[] +\f[I]null\f[] .IP "Description:" 4 Name of the youtube-dl Python module to import. +Setting this to \f[I]null\f[] will try to import \f[I]"yt_dlp"\f[] +followed by \f[I]"youtube_dl"\f[] as fallback. + .SS extractor.ytdl.raw-options .IP "Type:" 6 @@ -2885,11 +2914,14 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in \f[I]string\f[] .IP "Default:" 9 -\f[I]"youtube_dl"\f[] +\f[I]null\f[] .IP "Description:" 4 Name of the youtube-dl Python module to import. +Setting this to \f[I]null\f[] will first try to import \f[I]"yt_dlp"\f[] +and use \f[I]"youtube_dl"\f[] as fallback. + .SS downloader.ytdl.outtmpl .IP "Type:" 6 @@ -3736,12 +3768,16 @@ A \f[I]Date\f[] value represents a specific point in time. * \f[I]float\f[] .br * \f[I]list\f[] with 2 \f[I]floats\f[] +.br +* \f[I]string\f[] .IP "Example:" 4 .br * 2.85 .br * [1.5, 3.0] +.br +* "2.85", "1.5-3.0" .IP "Description:" 4 A \f[I]Duration\f[] represents a span of time in seconds. @@ -3752,6 +3788,9 @@ A \f[I]Duration\f[] represents a span of time in seconds. * If given as a \f[I]list\f[] with 2 floating-point numbers \f[I]a\f[] & \f[I]b\f[] , it will be randomly chosen with uniform distribution such that \f[I]a <= N <=b\f[]. (see \f[I]random.uniform()\f[]) +.br +* If given as a \f[I]string\f[], it can either represent a single \f[I]float\f[] +value (\f[I]"2.85"\f[]) or a range (\f[I]"1.5-3.0"\f[]). .SS Path diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 0800ec7..8e7ff6d 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -114,7 +114,7 @@ }, "hitomi": { - "metadata": true + "metadata": false }, "idolcomplex": { @@ -303,7 +303,7 @@ "format": null, "generic": true, "logging": true, - "module": "youtube_dl", + "module": null, "raw-options": null }, "booru": @@ -337,7 +337,7 @@ "format": null, "forward-cookies": false, "logging": true, - "module": "youtube_dl", + "module": null, "outtmpl": null, "raw-options": null } diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index bf70cac..8b87746 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.19.3 +Version: 1.20.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -9,352 +9,6 @@ Maintainer: Mike Fährmann Maintainer-email: mike_faehrmann@web.de License: GPLv2 Download-URL: https://github.com/mikf/gallery-dl/releases/latest -Description: ========== - gallery-dl - ========== - - *gallery-dl* is a command-line program to download image galleries and - collections from several image hosting sites (see `Supported Sites`_). - It is a cross-platform tool with many configuration options - and powerful `filenaming capabilities `_. - - - |pypi| |build| |gitter| - - .. contents:: - - - Dependencies - ============ - - - Python_ 3.4+ - - Requests_ - - Optional - -------- - - - FFmpeg_: Pixiv Ugoira to WebM conversion - - youtube-dl_: Video downloads - - - Installation - ============ - - - Pip - --- - - The stable releases of *gallery-dl* are distributed on PyPI_ and can be - easily installed or upgraded using pip_: - - .. code:: bash - - $ python3 -m pip install -U gallery-dl - - Installing the latest dev version directly from GitHub can be done with - pip_ as well: - - .. code:: bash - - $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz - - Note: Windows users should use :code:`py -3` instead of :code:`python3`. - - It is advised to use the latest version of pip_, - including the essential packages :code:`setuptools` and :code:`wheel`. - To ensure these packages are up-to-date, run - - .. code:: bash - - $ python3 -m pip install --upgrade pip setuptools wheel - - - Standalone Executable - --------------------- - - Prebuilt executable files with a Python interpreter and - required Python packages included are available for - - - `Windows `__ - - `Linux `__ - - | Executables build from the latest commit can be found at - | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml - - - Snap - ---- - - Linux users that are using a distro that is supported by Snapd_ can install *gallery-dl* from the Snap Store: - - .. code:: bash - - $ snap install gallery-dl - - - Chocolatey - ---------- - - Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: - - .. code:: powershell - - $ choco install gallery-dl - - - Scoop - ----- - - *gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: - - .. code:: powershell - - $ scoop install gallery-dl - - - Usage - ===== - - To use *gallery-dl* simply call it with the URLs you wish to download images - from: - - .. code:: bash - - $ gallery-dl [OPTION]... URL... - - See also :code:`gallery-dl --help`. - - - Examples - -------- - - Download images; in this case from danbooru via tag search for 'bonocho': - - .. code:: bash - - $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho" - - - Get the direct URL of an image from a site that requires authentication: - - .. code:: bash - - $ gallery-dl -g -u "" -p "" "https://seiga.nicovideo.jp/seiga/im3211703" - - - Filter manga chapters by language and chapter number: - - .. code:: bash - - $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/" - - - | Search a remote resource for URLs and download images from them: - | (URLs for which no extractor can be found will be silently ignored) - - .. code:: bash - - $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT" - - - If a site's address is nonstandard for its extractor, you can prefix the URL with the - extractor's name to force the use of a specific extractor: - - .. code:: bash - - $ gallery-dl "tumblr:https://sometumblrblog.example" - - - Configuration - ============= - - Configuration files for *gallery-dl* use a JSON-based file format. - - | For a (more or less) complete example with options set to their default values, - see gallery-dl.conf_. - | For a configuration file example with more involved settings and options, - see gallery-dl-example.conf_. - | A list of all available configuration options and their - descriptions can be found in configuration.rst_. - | - - *gallery-dl* searches for configuration files in the following places: - - Windows: - * ``%APPDATA%\gallery-dl\config.json`` - * ``%USERPROFILE%\gallery-dl\config.json`` - * ``%USERPROFILE%\gallery-dl.conf`` - - (``%USERPROFILE%`` usually refers to the user's home directory, - i.e. ``C:\Users\\``) - - Linux, macOS, etc.: - * ``/etc/gallery-dl.conf`` - * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` - * ``${HOME}/.config/gallery-dl/config.json`` - * ``${HOME}/.gallery-dl.conf`` - - Values in later configuration files will override previous ones. - - Command line options will override all related settings in the configuration file(s), - e.g. using ``--write-metadata`` will enable writing metadata using the default values - for all ``postprocessors.metadata.*`` settings, overriding any specific settings in - configuration files. - - - Authentication - ============== - - Username & Password - ------------------- - - Some extractors require you to provide valid login credentials in the form of - a username & password pair. This is necessary for - ``nijie`` and ``seiga`` - and optional for - ``aryion``, - ``danbooru``, - ``e621``, - ``exhentai``, - ``idolcomplex``, - ``imgbb``, - ``inkbunny``, - ``instagram``, - ``mangadex``, - ``mangoxo``, - ``pillowfort``, - ``sankaku``, - ``subscribestar``, - ``tapas``, - ``tsumino``, - and ``twitter``. - - You can set the necessary information in your configuration file - (cf. gallery-dl.conf_) - - .. code:: json - - { - "extractor": { - "seiga": { - "username": "", - "password": "" - } - } - } - - or you can provide them directly via the - :code:`-u/--username` and :code:`-p/--password` or via the - :code:`-o/--option` command-line options - - .. code:: bash - - $ gallery-dl -u -p URL - $ gallery-dl -o username= -o password= URL - - - Cookies - ------- - - For sites where login with username & password is not possible due to - CAPTCHA or similar, or has not been implemented yet, you can use the - cookies from a browser login session and input them into *gallery-dl*. - - This can be done via the - `cookies `__ - option in your configuration file by specifying - - - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon - | (e.g. `Get cookies.txt `__ for Chrome, - `Export Cookies `__ for Firefox) - - - | a list of name-value pairs gathered from your browser's web developer tools - | (in `Chrome `__, - in `Firefox `__) - - For example: - - .. code:: json - - { - "extractor": { - "instagram": { - "cookies": "$HOME/path/to/cookies.txt" - }, - "patreon": { - "cookies": { - "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" - } - } - } - } - - You can also specify a cookies.txt file with - the :code:`--cookies` command-line option: - - .. code:: bash - - $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL - - - OAuth - ----- - - *gallery-dl* supports user authentication via OAuth_ for - ``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, - and ``mastodon`` instances. - This is mostly optional, but grants *gallery-dl* the ability - to issue requests on your account's behalf and enables it to access resources - which would otherwise be unavailable to a public user. - - To link your account to *gallery-dl*, start by invoking it with - ``oauth:`` as an argument. For example: - - .. code:: bash - - $ gallery-dl oauth:flickr - - You will be sent to the site's authorization page and asked to grant read - access to *gallery-dl*. Authorize it and you will be shown one or more - "tokens", which should be added to your configuration file. - - To authenticate with a ``mastodon`` instance, run *gallery-dl* with - ``oauth:mastodon:`` as argument. For example: - - .. code:: bash - - $ gallery-dl oauth:mastodon:pawoo.net - $ gallery-dl oauth:mastodon:https://mastodon.social/ - - - - .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf - .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf - .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst - .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md - .. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md - - .. _Python: https://www.python.org/downloads/ - .. _PyPI: https://pypi.org/ - .. _pip: https://pip.pypa.io/en/stable/ - .. _Requests: https://requests.readthedocs.io/en/master/ - .. _FFmpeg: https://www.ffmpeg.org/ - .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ - .. _pyOpenSSL: https://pyopenssl.org/ - .. _Snapd: https://docs.snapcraft.io/installing-snapd - .. _OAuth: https://en.wikipedia.org/wiki/OAuth - .. _Chocolatey: https://chocolatey.org/install - .. _Scoop: https://scoop.sh - - .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg - :target: https://pypi.org/project/gallery-dl/ - - .. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg - :target: https://github.com/mikf/gallery-dl/actions - - .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg - :target: https://gitter.im/gallery-dl/main - Keywords: image gallery downloader crawler scraper Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable @@ -376,3 +30,353 @@ Classifier: Topic :: Multimedia :: Graphics Classifier: Topic :: Utilities Requires-Python: >=3.4 Provides-Extra: video +License-File: LICENSE + +========== +gallery-dl +========== + +*gallery-dl* is a command-line program to download image galleries and +collections from several image hosting sites (see `Supported Sites`_). +It is a cross-platform tool with many configuration options +and powerful `filenaming capabilities `_. + + +|pypi| |build| |gitter| + +.. contents:: + + +Dependencies +============ + +- Python_ 3.4+ +- Requests_ + +Optional +-------- + +- FFmpeg_: Pixiv Ugoira to WebM conversion +- yt-dlp_ or youtube-dl_: Video downloads + + +Installation +============ + + +Pip +--- + +The stable releases of *gallery-dl* are distributed on PyPI_ and can be +easily installed or upgraded using pip_: + +.. code:: bash + + $ python3 -m pip install -U gallery-dl + +Installing the latest dev version directly from GitHub can be done with +pip_ as well: + +.. code:: bash + + $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz + +Note: Windows users should use :code:`py -3` instead of :code:`python3`. + +It is advised to use the latest version of pip_, +including the essential packages :code:`setuptools` and :code:`wheel`. +To ensure these packages are up-to-date, run + +.. code:: bash + + $ python3 -m pip install --upgrade pip setuptools wheel + + +Standalone Executable +--------------------- + +Prebuilt executable files with a Python interpreter and +required Python packages included are available for + +- `Windows `__ +- `Linux `__ + +| Executables build from the latest commit can be found at +| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml + + +Snap +---- + +Linux users that are using a distro that is supported by Snapd_ can install *gallery-dl* from the Snap Store: + +.. code:: bash + + $ snap install gallery-dl + + +Chocolatey +---------- + +Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: + +.. code:: powershell + + $ choco install gallery-dl + + +Scoop +----- + +*gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: + +.. code:: powershell + + $ scoop install gallery-dl + + +Usage +===== + +To use *gallery-dl* simply call it with the URLs you wish to download images +from: + +.. code:: bash + + $ gallery-dl [OPTION]... URL... + +See also :code:`gallery-dl --help`. + + +Examples +-------- + +Download images; in this case from danbooru via tag search for 'bonocho': + +.. code:: bash + + $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho" + + +Get the direct URL of an image from a site that requires authentication: + +.. code:: bash + + $ gallery-dl -g -u "" -p "" "https://seiga.nicovideo.jp/seiga/im3211703" + + +Filter manga chapters by language and chapter number: + +.. code:: bash + + $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/" + + +| Search a remote resource for URLs and download images from them: +| (URLs for which no extractor can be found will be silently ignored) + +.. code:: bash + + $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT" + + +If a site's address is nonstandard for its extractor, you can prefix the URL with the +extractor's name to force the use of a specific extractor: + +.. code:: bash + + $ gallery-dl "tumblr:https://sometumblrblog.example" + + +Configuration +============= + +Configuration files for *gallery-dl* use a JSON-based file format. + +| For a (more or less) complete example with options set to their default values, + see gallery-dl.conf_. +| For a configuration file example with more involved settings and options, + see gallery-dl-example.conf_. +| A list of all available configuration options and their + descriptions can be found in configuration.rst_. +| + +*gallery-dl* searches for configuration files in the following places: + +Windows: + * ``%APPDATA%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl.conf`` + + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\\``) + +Linux, macOS, etc.: + * ``/etc/gallery-dl.conf`` + * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` + * ``${HOME}/.config/gallery-dl/config.json`` + * ``${HOME}/.gallery-dl.conf`` + +Values in later configuration files will override previous ones. + +Command line options will override all related settings in the configuration file(s), +e.g. using ``--write-metadata`` will enable writing metadata using the default values +for all ``postprocessors.metadata.*`` settings, overriding any specific settings in +configuration files. + + +Authentication +============== + +Username & Password +------------------- + +Some extractors require you to provide valid login credentials in the form of +a username & password pair. This is necessary for +``nijie`` and ``seiga`` +and optional for +``aryion``, +``danbooru``, +``e621``, +``exhentai``, +``idolcomplex``, +``imgbb``, +``inkbunny``, +``instagram``, +``mangadex``, +``mangoxo``, +``pillowfort``, +``sankaku``, +``subscribestar``, +``tapas``, +``tsumino``, +and ``twitter``. + +You can set the necessary information in your configuration file +(cf. gallery-dl.conf_) + +.. code:: json + + { + "extractor": { + "seiga": { + "username": "", + "password": "" + } + } + } + +or you can provide them directly via the +:code:`-u/--username` and :code:`-p/--password` or via the +:code:`-o/--option` command-line options + +.. code:: bash + + $ gallery-dl -u -p URL + $ gallery-dl -o username= -o password= URL + + +Cookies +------- + +For sites where login with username & password is not possible due to +CAPTCHA or similar, or has not been implemented yet, you can use the +cookies from a browser login session and input them into *gallery-dl*. + +This can be done via the +`cookies `__ +option in your configuration file by specifying + +- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon + | (e.g. `Get cookies.txt `__ for Chrome, + `Export Cookies `__ for Firefox) + +- | a list of name-value pairs gathered from your browser's web developer tools + | (in `Chrome `__, + in `Firefox `__) + +For example: + +.. code:: json + + { + "extractor": { + "instagram": { + "cookies": "$HOME/path/to/cookies.txt" + }, + "patreon": { + "cookies": { + "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" + } + } + } + } + +You can also specify a cookies.txt file with +the :code:`--cookies` command-line option: + +.. code:: bash + + $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + + +OAuth +----- + +*gallery-dl* supports user authentication via OAuth_ for +``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, +and ``mastodon`` instances. +This is mostly optional, but grants *gallery-dl* the ability +to issue requests on your account's behalf and enables it to access resources +which would otherwise be unavailable to a public user. + +To link your account to *gallery-dl*, start by invoking it with +``oauth:`` as an argument. For example: + +.. code:: bash + + $ gallery-dl oauth:flickr + +You will be sent to the site's authorization page and asked to grant read +access to *gallery-dl*. Authorize it and you will be shown one or more +"tokens", which should be added to your configuration file. + +To authenticate with a ``mastodon`` instance, run *gallery-dl* with +``oauth:mastodon:`` as argument. For example: + +.. code:: bash + + $ gallery-dl oauth:mastodon:pawoo.net + $ gallery-dl oauth:mastodon:https://mastodon.social/ + + + +.. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf +.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf +.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst +.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md +.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md + +.. _Python: https://www.python.org/downloads/ +.. _PyPI: https://pypi.org/ +.. _pip: https://pip.pypa.io/en/stable/ +.. _Requests: https://requests.readthedocs.io/en/master/ +.. _FFmpeg: https://www.ffmpeg.org/ +.. _yt-dlp: https://github.com/yt-dlp/yt-dlp +.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ +.. _pyOpenSSL: https://pyopenssl.org/ +.. _Snapd: https://docs.snapcraft.io/installing-snapd +.. _OAuth: https://en.wikipedia.org/wiki/OAuth +.. _Chocolatey: https://chocolatey.org/install +.. _Scoop: https://scoop.sh + +.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg + :target: https://pypi.org/project/gallery-dl/ + +.. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg + :target: https://github.com/mikf/gallery-dl/actions + +.. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg + :target: https://gitter.im/gallery-dl/main + + diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index d05066c..127354e 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -76,6 +76,7 @@ gallery_dl/extractor/fuskator.py gallery_dl/extractor/gelbooru.py gallery_dl/extractor/gelbooru_v01.py gallery_dl/extractor/gelbooru_v02.py +gallery_dl/extractor/generic.py gallery_dl/extractor/gfycat.py gallery_dl/extractor/hbrowse.py gallery_dl/extractor/hentai2read.py @@ -105,6 +106,7 @@ gallery_dl/extractor/khinsider.py gallery_dl/extractor/komikcast.py gallery_dl/extractor/lineblog.py gallery_dl/extractor/livedoor.py +gallery_dl/extractor/lolisafe.py gallery_dl/extractor/luscious.py gallery_dl/extractor/mangadex.py gallery_dl/extractor/mangafox.py @@ -147,6 +149,7 @@ gallery_dl/extractor/readcomiconline.py gallery_dl/extractor/recursive.py gallery_dl/extractor/reddit.py gallery_dl/extractor/redgifs.py +gallery_dl/extractor/rule34us.py gallery_dl/extractor/sankaku.py gallery_dl/extractor/sankakucomplex.py gallery_dl/extractor/seiga.py @@ -177,6 +180,7 @@ gallery_dl/extractor/webtoons.py gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/wikieat.py +gallery_dl/extractor/wordpress.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py gallery_dl/extractor/ytdl.py @@ -201,4 +205,5 @@ test/test_output.py test/test_postprocessor.py test/test_results.py test/test_text.py -test/test_util.py \ No newline at end of file +test/test_util.py +test/test_ytdl.py \ No newline at end of file diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 2cad029..ad8286e 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -115,6 +115,13 @@ def main(): config.load(args.cfgfiles, strict=True) if args.yamlfiles: config.load(args.yamlfiles, strict=True, fmt="yaml") + if args.filename: + if args.filename == "/O": + args.filename = "{filename}.{extension}" + config.set((), "filename", args.filename) + if args.directory: + config.set((), "base-directory", args.directory) + config.set((), "directory", ()) if args.postprocessors: config.set((), "postprocessors", args.postprocessors) if args.abort: @@ -142,20 +149,23 @@ def main(): import os.path import requests - head = "" - try: - out, err = subprocess.Popen( - ("git", "rev-parse", "--short", "HEAD"), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__)), - ).communicate() - if out and not err: - head = " - Git HEAD: " + out.decode().rstrip() - except (OSError, subprocess.SubprocessError): - pass + extra = "" + if getattr(sys, "frozen", False): + extra = " - Executable" + else: + try: + out, err = subprocess.Popen( + ("git", "rev-parse", "--short", "HEAD"), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__)), + ).communicate() + if out and not err: + extra = " - Git HEAD: " + out.decode().rstrip() + except (OSError, subprocess.SubprocessError): + pass - log.debug("Version %s%s", __version__, head) + log.debug("Version %s%s", __version__, extra) log.debug("Python %s - %s", platform.python_version(), platform.platform()) try: diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 8416ca0..30f628e 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -39,7 +39,7 @@ class YoutubeDLDownloader(DownloaderBase): if not ytdl_instance: ytdl_instance = self.ytdl_instance if not ytdl_instance: - module = __import__(self.config("module") or "youtube_dl") + module = ytdl.import_module(self.config("module")) self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL( module, self, self.ytdl_opts) if self.outtmpl == "default": diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index c92969b..38b2d5a 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -20,7 +20,7 @@ class _2chanThreadExtractor(Extractor): filename_fmt = "{tim}.{extension}" archive_fmt = "{board}_{thread}_{tim}" url_fmt = "https://{server}.2chan.net/{board}/src/{filename}" - pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)" + pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/]+)/res/(\d+)" test = ("http://dec.2chan.net/70/res/4752.htm", { "url": "f49aa31340e9a3429226af24e19e01f5b819ca1f", "keyword": "44599c21b248e79692b2eb2da12699bd0ed5640a", diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 8c6fa09..88ceaeb 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -21,13 +21,13 @@ class _500pxExtractor(Extractor): filename_fmt = "{id}_{name}.{extension}" archive_fmt = "{id}" root = "https://500px.com" + cookiedomain = ".500px.com" def __init__(self, match): Extractor.__init__(self, match) self.session.headers["Referer"] = self.root + "/" def items(self): - first = True data = self.metadata() for photo in self.photos(): @@ -35,9 +35,7 @@ class _500pxExtractor(Extractor): photo["extension"] = photo["image_format"] if data: photo.update(data) - if first: - first = False - yield Message.Directory, photo + yield Message.Directory, photo yield Message.Url, url, photo def metadata(self): @@ -72,24 +70,33 @@ class _500pxExtractor(Extractor): self.log.warning("Unable to fetch photo %s", pid) ] - def _request_api(self, url, params, csrf_token=None): - headers = {"Origin": self.root, "X-CSRF-Token": csrf_token} + def _request_api(self, url, params): + headers = { + "Origin": self.root, + "x-csrf-token": self.session.cookies.get( + "x-csrf-token", domain=".500px.com"), + } return self.request(url, headers=headers, params=params).json() def _request_graphql(self, opname, variables): url = "https://api.500px.com/graphql" + headers = { + "x-csrf-token": self.session.cookies.get( + "x-csrf-token", domain=".500px.com"), + } data = { "operationName": opname, "variables" : json.dumps(variables), "query" : QUERIES[opname], } - return self.request(url, method="POST", json=data).json()["data"] + return self.request( + url, method="POST", headers=headers, json=data).json()["data"] class _500pxUserExtractor(_500pxExtractor): """Extractor for photos from a user's photostream on 500px.com""" subcategory = "user" - pattern = BASE_PATTERN + r"/(?!photo/)(?:p/)?([^/?#]+)/?(?:$|[?#])" + pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])" test = ( ("https://500px.com/p/light_expression_photography", { "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2", @@ -137,10 +144,6 @@ class _500pxGalleryExtractor(_500pxExtractor): "user": dict, }, }), - # unavailable photos (#1335) - ("https://500px.com/p/Light_Expression_Photography/galleries/street", { - "count": 4, - }), ("https://500px.com/fashvamp/galleries/lera"), ) @@ -194,6 +197,30 @@ class _500pxGalleryExtractor(_500pxExtractor): )["galleryByOwnerIdAndSlugOrToken"]["photos"] +class _500pxFavoriteExtractor(_500pxExtractor): + """Extractor for favorite 500px photos""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/liked/?$" + test = ("https://500px.com/liked",) + + def photos(self): + variables = {"pageSize": 20} + photos = self._request_graphql( + "LikedPhotosQueryRendererQuery", variables, + )["likedPhotos"] + + while True: + yield from self._extend(photos["edges"]) + + if not photos["pageInfo"]["hasNextPage"]: + return + + variables["cursor"] = photos["pageInfo"]["endCursor"] + photos = self._request_graphql( + "LikedPhotosPaginationContainerQuery", variables, + )["likedPhotos"] + + class _500pxImageExtractor(_500pxExtractor): """Extractor for individual images from 500px.com""" subcategory = "image" @@ -638,6 +665,124 @@ fragment GalleriesDetailPaginationContainer_gallery_3e6UuE on Gallery { } } } +""", + + "LikedPhotosQueryRendererQuery": """\ +query LikedPhotosQueryRendererQuery($pageSize: Int) { + ...LikedPhotosPaginationContainer_query_RlXb8 +} + +fragment LikedPhotosPaginationContainer_query_RlXb8 on Query { + likedPhotos(first: $pageSize) { + edges { + node { + id + legacyId + canonicalPath + name + description + category + uploadedAt + location + width + height + isLikedByMe + notSafeForWork + tags + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + avatar { + images { + url + id + } + id + } + followedByUsers { + totalCount + isFollowedByMe + } + } + images(sizes: [33, 35]) { + size + url + jpegUrl + webpUrl + id + } + __typename + } + cursor + } + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "LikedPhotosPaginationContainerQuery": """\ +query LikedPhotosPaginationContainerQuery($cursor: String, $pageSize: Int) { + ...LikedPhotosPaginationContainer_query_3e6UuE +} + +fragment LikedPhotosPaginationContainer_query_3e6UuE on Query { + likedPhotos(first: $pageSize, after: $cursor) { + edges { + node { + id + legacyId + canonicalPath + name + description + category + uploadedAt + location + width + height + isLikedByMe + notSafeForWork + tags + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + avatar { + images { + url + id + } + id + } + followedByUsers { + totalCount + isFollowedByMe + } + } + images(sizes: [33, 35]) { + size + url + jpegUrl + webpUrl + id + } + __typename + } + cursor + } + pageInfo { + endCursor + hasNextPage + } + } +} """, } diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index dd9da01..65c994d 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -108,6 +108,7 @@ modules = [ "readcomiconline", "reddit", "redgifs", + "rule34us", "sankaku", "sankakucomplex", "seiga", @@ -144,12 +145,14 @@ modules = [ "foolslide", "mastodon", "shopify", + "lolisafe", "imagehosts", "directlink", "recursive", "oauth", "test", "ytdl", + "generic", ] diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index f687ff8..5675081 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -29,12 +29,12 @@ class ArtstationExtractor(Extractor): def items(self): data = self.metadata() - yield Message.Directory, data for project in self.projects(): for asset in self.get_project_assets(project["hash_id"]): asset.update(data) adict = asset["asset"] + yield Message.Directory, asset if adict["has_embedded_player"] and self.external: player = adict["player_embedded"] diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 7e7c282..9a86cc4 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -15,7 +15,7 @@ import re BASE_PATTERN = ( r"(?:blogger:(?:https?://)?([^/]+)|" - r"(?:https?://)?([^.]+\.blogspot\.com))") + r"(?:https?://)?([\w-]+\.blogspot\.com))") class BloggerExtractor(Extractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index e80366e..c440aee 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -571,7 +571,11 @@ class BaseExtractor(Extractor): if not self.category: for index, group in enumerate(match.groups()): if group is not None: - self.category, self.root = self.instances[index] + if index: + self.category, self.root = self.instances[index-1] + else: + self.root = group + self.category = group.partition("://")[2] break Extractor.__init__(self, match) @@ -594,7 +598,10 @@ class BaseExtractor(Extractor): pattern = re.escape(root[root.index(":") + 3:]) pattern_list.append(pattern + "()") - return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")" + return ( + r"(?:" + cls.basecategory + r":(https?://[^/?#]+)|" + r"(?:https?://)?(?:" + "|".join(pattern_list) + r"))" + ) class HTTPSAdapter(HTTPAdapter): diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index dbaa97e..6d6e192 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -6,16 +6,13 @@ """Extractors for https://cyberdrop.me/""" -from .common import Extractor, Message +from . import lolisafe from .. import text -class CyberdropAlbumExtractor(Extractor): +class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): category = "cyberdrop" - subcategory = "album" root = "https://cyberdrop.me" - directory_fmt = ("{category}", "{album_name} ({album_id})") - archive_fmt = "{album_id}_{id}" pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" test = ( # images @@ -44,11 +41,7 @@ class CyberdropAlbumExtractor(Extractor): }), ) - def __init__(self, match): - Extractor.__init__(self, match) - self.album_id = match.group(1) - - def items(self): + def fetch_album(self, album_id): url = self.root + "/a/" + self.album_id extr = text.extract_from(self.request(url).text) @@ -58,9 +51,9 @@ class CyberdropAlbumExtractor(Extractor): url = extr('id="file" href="', '"') if not url: break - append(text.unescape(url)) + append({"file": text.unescape(url)}) - data = { + return files, { "album_id" : self.album_id, "album_name" : extr("name: '", "'"), "date" : text.parse_timestamp(extr("timestamp: ", ",")), @@ -68,9 +61,3 @@ class CyberdropAlbumExtractor(Extractor): "description": extr("description: `", "`"), "count" : len(files), } - - yield Message.Directory, data - for url in files: - text.nameext_from_url(url, data) - data["filename"], _, data["id"] = data["filename"].rpartition("-") - yield Message.Url, url, data diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 61affb5..94fec16 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -772,6 +772,7 @@ class DeviantartPopularExtractor(DeviantartExtractor): if trange.startswith("popular-"): trange = trange[8:] self.time_range = { + "newest" : "now", "most-recent" : "now", "this-week" : "1week", "this-month" : "1month", @@ -786,6 +787,8 @@ class DeviantartPopularExtractor(DeviantartExtractor): } def deviations(self): + if self.time_range == "now": + return self.api.browse_newest(self.search_term, self.offset) return self.api.browse_popular( self.search_term, self.time_range, self.offset) @@ -1034,21 +1037,32 @@ class DeviantartOAuthAPI(): def browse_deviantsyouwatch(self, offset=0): """Yield deviations from users you watch""" - endpoint = "browse/deviantsyouwatch" + endpoint = "/browse/deviantsyouwatch" params = {"limit": "50", "offset": offset, "mature_content": self.mature} return self._pagination(endpoint, params, public=False) def browse_posts_deviantsyouwatch(self, offset=0): """Yield posts from users you watch""" - endpoint = "browse/posts/deviantsyouwatch" + endpoint = "/browse/posts/deviantsyouwatch" params = {"limit": "50", "offset": offset, "mature_content": self.mature} return self._pagination(endpoint, params, public=False, unpack=True) + def browse_newest(self, query=None, offset=0): + """Browse newest deviations""" + endpoint = "/browse/newest" + params = { + "q" : query, + "limit" : 50 if self.metadata else 120, + "offset" : offset, + "mature_content": self.mature, + } + return self._pagination(endpoint, params) + def browse_popular(self, query=None, timerange=None, offset=0): """Yield popular deviations""" - endpoint = "browse/popular" + endpoint = "/browse/popular" params = { "q" : query, "limit" : 50 if self.metadata else 120, @@ -1060,7 +1074,7 @@ class DeviantartOAuthAPI(): def browse_tags(self, tag, offset=0): """ Browse a tag """ - endpoint = "browse/tags" + endpoint = "/browse/tags" params = { "tag" : tag, "offset" : offset, @@ -1071,14 +1085,14 @@ class DeviantartOAuthAPI(): def browse_user_journals(self, username, offset=0): """Yield all journal entries of a specific user""" - endpoint = "browse/user/journals" + endpoint = "/browse/user/journals" params = {"username": username, "offset": offset, "limit": 50, "mature_content": self.mature, "featured": "false"} return self._pagination(endpoint, params) def collections(self, username, folder_id, offset=0): """Yield all Deviation-objects contained in a collection folder""" - endpoint = "collections/" + folder_id + endpoint = "/collections/" + folder_id params = {"username": username, "offset": offset, "limit": 24, "mature_content": self.mature} return self._pagination(endpoint, params) @@ -1086,21 +1100,21 @@ class DeviantartOAuthAPI(): @memcache(keyarg=1) def collections_folders(self, username, offset=0): """Yield all collection folders of a specific user""" - endpoint = "collections/folders" + endpoint = "/collections/folders" params = {"username": username, "offset": offset, "limit": 50, "mature_content": self.mature} return self._pagination_list(endpoint, params) def comments_deviation(self, deviation_id, offset=0): """Fetch comments posted on a deviation""" - endpoint = "comments/deviation/" + deviation_id + endpoint = "/comments/deviation/" + deviation_id params = {"maxdepth": "5", "offset": offset, "limit": 50, "mature_content": self.mature} return self._pagination_list(endpoint, params=params, key="thread") def deviation(self, deviation_id, public=True): """Query and return info about a single Deviation""" - endpoint = "deviation/" + deviation_id + endpoint = "/deviation/" + deviation_id deviation = self._call(endpoint, public=public) if self.metadata: self._metadata((deviation,)) @@ -1110,13 +1124,13 @@ class DeviantartOAuthAPI(): def deviation_content(self, deviation_id, public=False): """Get extended content of a single Deviation""" - endpoint = "deviation/content" + endpoint = "/deviation/content" params = {"deviationid": deviation_id} return self._call(endpoint, params=params, public=public) def deviation_download(self, deviation_id, public=True): """Get the original file download (if allowed)""" - endpoint = "deviation/download/" + deviation_id + endpoint = "/deviation/download/" + deviation_id params = {"mature_content": self.mature} return self._call(endpoint, params=params, public=public) @@ -1124,7 +1138,7 @@ class DeviantartOAuthAPI(): """ Fetch deviation metadata for a set of deviations""" if not deviations: return [] - endpoint = "deviation/metadata?" + "&".join( + endpoint = "/deviation/metadata?" + "&".join( "deviationids[{}]={}".format(num, deviation["deviationid"]) for num, deviation in enumerate(deviations) ) @@ -1133,14 +1147,14 @@ class DeviantartOAuthAPI(): def gallery(self, username, folder_id, offset=0, extend=True, public=True): """Yield all Deviation-objects contained in a gallery folder""" - endpoint = "gallery/" + folder_id + endpoint = "/gallery/" + folder_id params = {"username": username, "offset": offset, "limit": 24, "mature_content": self.mature, "mode": "newest"} return self._pagination(endpoint, params, extend, public) def gallery_all(self, username, offset=0): """Yield all Deviation-objects of a specific user""" - endpoint = "gallery/all" + endpoint = "/gallery/all" params = {"username": username, "offset": offset, "limit": 24, "mature_content": self.mature} return self._pagination(endpoint, params) @@ -1148,7 +1162,7 @@ class DeviantartOAuthAPI(): @memcache(keyarg=1) def gallery_folders(self, username, offset=0): """Yield all gallery folders of a specific user""" - endpoint = "gallery/folders" + endpoint = "/gallery/folders" params = {"username": username, "offset": offset, "limit": 50, "mature_content": self.mature} return self._pagination_list(endpoint, params) @@ -1156,12 +1170,12 @@ class DeviantartOAuthAPI(): @memcache(keyarg=1) def user_profile(self, username): """Get user profile information""" - endpoint = "user/profile/" + username + endpoint = "/user/profile/" + username return self._call(endpoint, fatal=False) def user_friends_watch(self, username): """Watch a user""" - endpoint = "user/friends/watch/" + username + endpoint = "/user/friends/watch/" + username data = { "watch[friend]" : "0", "watch[deviations]" : "0", @@ -1179,7 +1193,7 @@ class DeviantartOAuthAPI(): def user_friends_unwatch(self, username): """Unwatch a user""" - endpoint = "user/friends/unwatch/" + username + endpoint = "/user/friends/unwatch/" + username return self._call( endpoint, method="POST", public=False, fatal=False, ).get("success") @@ -1217,7 +1231,7 @@ class DeviantartOAuthAPI(): def _call(self, endpoint, fatal=True, public=True, **kwargs): """Call an API endpoint""" - url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint + url = "https://www.deviantart.com/api/v1/oauth2" + endpoint kwargs["fatal"] = None while True: @@ -1357,7 +1371,7 @@ class DeviantartEclipseAPI(): self.log = extractor.log def deviation_extended_fetch(self, deviation_id, user=None, kind=None): - endpoint = "da-browse/shared_api/deviation/extended_fetch" + endpoint = "/da-browse/shared_api/deviation/extended_fetch" params = { "deviationid" : deviation_id, "username" : user, @@ -1367,7 +1381,7 @@ class DeviantartEclipseAPI(): return self._call(endpoint, params) def gallery_scraps(self, user, offset=None): - endpoint = "da-user-profile/api/gallery/contents" + endpoint = "/da-user-profile/api/gallery/contents" params = { "username" : user, "offset" : offset, @@ -1377,7 +1391,7 @@ class DeviantartEclipseAPI(): return self._pagination(endpoint, params) def user_watching(self, user, offset=None): - endpoint = "da-user-profile/api/module/watching" + endpoint = "/da-user-profile/api/module/watching" params = { "username": user, "moduleid": self._module_id_watching(user), @@ -1387,7 +1401,7 @@ class DeviantartEclipseAPI(): return self._pagination(endpoint, params) def _call(self, endpoint, params=None): - url = "https://www.deviantart.com/_napi/" + endpoint + url = "https://www.deviantart.com/_napi" + endpoint headers = {"Referer": "https://www.deviantart.com/"} response = self.extractor._limited_request( diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 7ffb214..cf9706b 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -176,6 +176,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.image_token = match.group(4) self.image_num = text.parse_int(match.group(6), 1) + source = self.config("source") + if source == "hitomi": + self.items = self._items_hitomi + def items(self): self.login() @@ -221,6 +225,18 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["_http_validate"] = None yield Message.Url, url, data + def _items_hitomi(self): + if self.config("metadata", False): + data = self.metadata_from_api() + data["date"] = text.parse_timestamp(data["posted"]) + else: + data = {} + + from .hitomi import HitomiGalleryExtractor + url = "https://hitomi.la/galleries/{}.html".format(self.gallery_id) + data["_extractor"] = HitomiGalleryExtractor + yield Message.Queue, url, data + def get_metadata(self, page): """Extract gallery metadata""" data = self.metadata_from_page(page) diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index cc6ee97..ef79808 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -33,7 +33,7 @@ class FanboxExtractor(Extractor): def items(self): if self._warning: - if "FANBOXSESSID" not in self.session.cookies: + if not self._check_cookies(("FANBOXSESSID",)): self.log.warning("no 'FANBOXSESSID' cookie set") FanboxExtractor._warning = False @@ -280,3 +280,24 @@ class FanboxPostExtractor(FanboxExtractor): def posts(self): return (self._get_post_data_from_id(self.post_id),) + + +class FanboxRedirectExtractor(Extractor): + """Extractor for pixiv redirects to fanbox.cc""" + category = "fanbox" + subcategory = "redirect" + pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)" + test = ("https://www.pixiv.net/fanbox/creator/52336352", { + "pattern": FanboxCreatorExtractor.pattern, + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_id = match.group(1) + + def items(self): + url = "https://www.pixiv.net/fanbox/creator/" + self.user_id + data = {"_extractor": FanboxCreatorExtractor} + response = self.request( + url, method="HEAD", allow_redirects=False, notfound="user") + yield Message.Queue, response.headers["Location"], data diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py index 62f7429..89a965f 100644 --- a/gallery_dl/extractor/fantia.py +++ b/gallery_dl/extractor/fantia.py @@ -22,7 +22,7 @@ class FantiaExtractor(Extractor): def items(self): if self._warning: - if "_session_id" not in self.session.cookies: + if not self._check_cookies(("_session_id",)): self.log.warning("no '_session_id' cookie set") FantiaExtractor._warning = False diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index 6c5c7df..2bd8c6b 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -56,7 +56,7 @@ class FlickrImageExtractor(FlickrExtractor): subcategory = "image" pattern = (r"(?:https?://)?(?:" r"(?:(?:www\.|m\.)?flickr\.com/photos/[^/]+/" - r"|[^.]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)" + r"|[\w-]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)" r"|flic\.kr/p/([A-Za-z1-9]+))") test = ( ("https://www.flickr.com/photos/departingyyz/16089302239", { diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index b5ecbd6..891e0c1 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -22,6 +22,7 @@ class FuraffinityExtractor(Extractor): archive_fmt = "{id}" cookiedomain = ".furaffinity.net" root = "https://www.furaffinity.net" + _warning = True def __init__(self, match): Extractor.__init__(self, match) @@ -32,6 +33,12 @@ class FuraffinityExtractor(Extractor): self._process_description = str.strip def items(self): + + if self._warning: + if not self._check_cookies(("a", "b")): + self.log.warning("no 'a' and 'b' session cookies set") + FuraffinityExtractor._warning = False + external = self.config("external", False) metadata = self.metadata() for post_id in util.advance(self.posts(), self.offset): diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py new file mode 100644 index 0000000..bece905 --- /dev/null +++ b/gallery_dl/extractor/generic.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- + +"""Extractor for images in a generic web page.""" + +from .common import Extractor, Message +from .. import config, text +import re +import os.path + + +class GenericExtractor(Extractor): + """Extractor for images in a generic web page.""" + + category = "generic" + directory_fmt = ("{category}", "{pageurl}") + archive_fmt = "{imageurl}" + + # By default, the generic extractor is disabled + # and the "g(eneric):" prefix in url is required. + # If the extractor is enabled, make the prefix optional + pattern = r"(?ix)(?Pg(?:eneric)?:)" + if config.get(("extractor", "generic"), "enabled"): + pattern += r"?" + + # The generic extractor pattern should match (almost) any valid url + # Based on: https://tools.ietf.org/html/rfc3986#appendix-B + pattern += r""" + (?Phttps?://)? # optional http(s) scheme + (?P[-\w\.]+) # required domain + (?P/[^?&#]*)? # optional path + (?:\?(?P[^/?#]*))? # optional query + (?:\#(?P.*))?$ # optional fragment + """ + + def __init__(self, match): + """Init.""" + Extractor.__init__(self, match) + + # Strip the "g(eneric):" prefix + # and inform about "forced" or "fallback" mode + if match.group('generic'): + self.log.info("Forcing use of generic information extractor.") + self.url = match.group(0).partition(":")[2] + else: + self.log.info("Falling back on generic information extractor.") + self.url = match.group(0) + + # Make sure we have a scheme, or use https + if match.group('scheme'): + self.scheme = match.group('scheme') + else: + self.scheme = 'https://' + self.url = self.scheme + self.url + + # Used to resolve relative image urls + self.root = self.scheme + match.group('domain') + + def items(self): + """Get page, extract metadata & images, yield them in suitable messages. + + Adapted from common.GalleryExtractor.items() + + """ + page = self.request(self.url).text + data = self.metadata(page) + imgs = self.images(page) + + try: + data["count"] = len(imgs) + except TypeError: + pass + images = enumerate(imgs, 1) + + yield Message.Version, 1 + yield Message.Directory, data + + for data["num"], (url, imgdata) in images: + if imgdata: + data.update(imgdata) + if "extension" not in imgdata: + text.nameext_from_url(url, data) + else: + text.nameext_from_url(url, data) + yield Message.Url, url, data + + def metadata(self, page): + """Extract generic webpage metadata, return them in a dict.""" + data = {} + data['pageurl'] = self.url + data['title'] = text.extract(page, '', "")[0] or "" + data['description'] = text.extract( + page, ',