From 8f7c87a2697113134c311aaeafd9c919555a2741 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sun, 13 Dec 2020 23:07:42 -0500 Subject: New upstream version 1.16.0. --- CHANGELOG.md | 26 ++ PKG-INFO | 63 +-- README.rst | 58 ++- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 87 ++-- docs/gallery-dl.conf | 7 - gallery_dl.egg-info/PKG-INFO | 63 +-- gallery_dl.egg-info/SOURCES.txt | 7 +- gallery_dl/downloader/http.py | 212 +++++---- gallery_dl/extractor/3dbooru.py | 39 +- gallery_dl/extractor/__init__.py | 8 +- gallery_dl/extractor/booru.py | 381 ++++++++-------- gallery_dl/extractor/common.py | 65 ++- gallery_dl/extractor/danbooru.py | 4 +- gallery_dl/extractor/e621.py | 15 +- gallery_dl/extractor/flickr.py | 47 +- gallery_dl/extractor/foolfuuka.py | 4 +- gallery_dl/extractor/foolslide.py | 3 +- gallery_dl/extractor/gelbooru.py | 111 ++--- gallery_dl/extractor/hentainexus.py | 36 +- gallery_dl/extractor/hypnohub.py | 68 --- gallery_dl/extractor/idolcomplex.py | 238 +++++++++- gallery_dl/extractor/imagehosts.py | 6 +- gallery_dl/extractor/instagram.py | 818 ++++++++++++++++++----------------- gallery_dl/extractor/konachan.py | 85 ---- gallery_dl/extractor/mangadex.py | 12 +- gallery_dl/extractor/moebooru.py | 257 +++++++++++ gallery_dl/extractor/nozomi.py | 13 +- gallery_dl/extractor/paheal.py | 24 +- gallery_dl/extractor/piczel.py | 10 +- gallery_dl/extractor/reactor.py | 6 +- gallery_dl/extractor/realbooru.py | 59 --- gallery_dl/extractor/rule34.py | 63 --- gallery_dl/extractor/safebooru.py | 61 --- gallery_dl/extractor/sankaku.py | 332 +++++--------- gallery_dl/extractor/shopify.py | 4 +- gallery_dl/extractor/twitter.py | 59 ++- gallery_dl/extractor/webtoons.py | 13 +- gallery_dl/extractor/yandere.py | 68 --- gallery_dl/job.py | 64 ++- gallery_dl/option.py | 3 +- gallery_dl/postprocessor/classify.py | 5 +- gallery_dl/postprocessor/common.py | 20 - gallery_dl/postprocessor/compare.py | 19 +- gallery_dl/postprocessor/exec.py | 72 +-- gallery_dl/postprocessor/metadata.py | 64 ++- gallery_dl/postprocessor/mtime.py | 1 + gallery_dl/postprocessor/ugoira.py | 5 +- gallery_dl/postprocessor/zip.py | 13 +- gallery_dl/util.py | 10 +- gallery_dl/version.py | 2 +- setup.py | 5 +- test/test_cookies.py | 10 +- test/test_postprocessor.py | 115 ++--- test/test_results.py | 7 +- 55 files changed, 2005 insertions(+), 1844 deletions(-) delete mode 100644 gallery_dl/extractor/hypnohub.py delete mode 100644 gallery_dl/extractor/konachan.py create mode 100644 gallery_dl/extractor/moebooru.py delete mode 100644 gallery_dl/extractor/realbooru.py delete mode 100644 gallery_dl/extractor/rule34.py delete mode 100644 gallery_dl/extractor/safebooru.py delete mode 100644 gallery_dl/extractor/yandere.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e08f243..c536269 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changelog +## 1.16.0 - 2020-12-12 +### Additions +- [booru] implement generalized extractors for `*booru` and `moebooru` sites + - add support for sakugabooru.com ([#1136](https://github.com/mikf/gallery-dl/issues/1136)) + - add support for lolibooru.moe ([#1050](https://github.com/mikf/gallery-dl/issues/1050)) + - provide formattable `date` metadata fields ([#1138](https://github.com/mikf/gallery-dl/issues/1138)) +- [postprocessor:metadata] add `event` and `filename` options ([#315](https://github.com/mikf/gallery-dl/issues/315), [#866](https://github.com/mikf/gallery-dl/issues/866), [#984](https://github.com/mikf/gallery-dl/issues/984)) +- [postprocessor:exec] add `event` option ([#992](https://github.com/mikf/gallery-dl/issues/992)) +### Changes +- [flickr] update default directories and improve metadata consistency ([#828](https://github.com/mikf/gallery-dl/issues/828)) +- [sankaku] use API endpoints from `beta.sankakucomplex.com` +- [downloader:http] improve filename extension handling ([#776](https://github.com/mikf/gallery-dl/issues/776)) +- replace all JPEG filename extensions with `jpg` by default +### Fixes +- [hentainexus] fix extraction ([#1166](https://github.com/mikf/gallery-dl/issues/1166)) +- [instagram] rewrite ([#1113](https://github.com/mikf/gallery-dl/issues/1113), [#1122](https://github.com/mikf/gallery-dl/issues/1122), [#1128](https://github.com/mikf/gallery-dl/issues/1128), [#1130](https://github.com/mikf/gallery-dl/issues/1130), [#1149](https://github.com/mikf/gallery-dl/issues/1149)) +- [mangadex] handle external chapters ([#1154](https://github.com/mikf/gallery-dl/issues/1154)) +- [nozomi] handle empty `date` fields ([#1163](https://github.com/mikf/gallery-dl/issues/1163)) +- [paheal] create directory for each post ([#1147](https://github.com/mikf/gallery-dl/issues/1147)) +- [piczel] update API URLs +- [twitter] update image URL format ([#1145](https://github.com/mikf/gallery-dl/issues/1145)) +- [twitter] improve `x-csrf-token` header handling ([#1170](https://github.com/mikf/gallery-dl/issues/1170)) +- [webtoons] update `ageGate` cookies +### Removals +- [sankaku] remove login support + ## 1.15.4 - 2020-11-27 ### Fixes - [2chan] skip external links diff --git a/PKG-INFO b/PKG-INFO index db9cba2..049e111 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,7 +1,7 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.15.4 -Summary: Command-line program to download image-galleries and -collections from several image hosting sites +Version: 1.16.0 +Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann Author-email: mike_faehrmann@web.de @@ -13,8 +13,8 @@ Description: ========== gallery-dl ========== - *gallery-dl* is a command-line program to download image-galleries and - -collections from several image hosting sites (see `Supported Sites`_). + *gallery-dl* is a command-line program to download image galleries and + collections from several image hosting sites (see `Supported Sites`_). It is a cross-platform tool with many configuration options and powerful filenaming capabilities. @@ -46,14 +46,14 @@ Description: ========== .. code:: bash - $ python3 -m pip install --upgrade gallery-dl + $ python3 -m pip install -U gallery-dl - Installing the latest dev-version directly from GitHub can be done with + Installing the latest dev version directly from GitHub can be done with pip_ as well: .. code:: bash - $ python3 -m pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.tar.gz + $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz Note: Windows users should use :code:`py -3` instead of :code:`python3`. @@ -94,10 +94,10 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ - These executables include a Python 3.8 interpreter + These executables include a Python interpreter and all required Python packages. @@ -192,19 +192,22 @@ Description: ========== see gallery-dl-example.conf_. | A list of all available configuration options and their descriptions can be found in configuration.rst_. + | *gallery-dl* searches for configuration files in the following places: - +--------------------------------------------+------------------------------------------+ - | Linux | Windows | - +--------------------------------------------+------------------------------------------+ - |* ``/etc/gallery-dl.conf`` |* ``%APPDATA%\gallery-dl\config.json`` | - |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| - |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | - +--------------------------------------------+------------------------------------------+ + Windows: + * ``%APPDATA%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl.conf`` - (``%USERPROFILE%`` usually refers to the user's home directory, - i.e. ``C:\Users\\``) + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\\``) + + Linux, macOS, etc.: + * ``/etc/gallery-dl.conf`` + * ``${HOME}/.config/gallery-dl/config.json`` + * ``${HOME}/.gallery-dl.conf`` Values in later configuration files will override previous ones. @@ -224,9 +227,18 @@ Description: ========== a username & password pair. This is necessary for ``pixiv``, ``nijie``, and ``seiga`` and optional for - ``aryion``, ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``, - ``instagram``, ``luscious``, ``pinterest``, ``sankaku``, ``subscribestar``, - ``tsumino``, and ``twitter``. + ``aryion``, + ``danbooru``, + ``e621``, + ``exhentai``, + ``idolcomplex``, + ``inkbunny``, + ``instagram``, + ``luscious``, + ``pinterest``, + ``subscribestar``, + ``tsumino``, + and ``twitter``. You can set the necessary information in your configuration file (cf. gallery-dl.conf_) @@ -319,7 +331,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -337,8 +349,8 @@ Description: ========== .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ - .. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master - :target: https://travis-ci.com/mikf/gallery-dl + .. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg + :target: https://github.com/mikf/gallery-dl/actions .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg :target: https://gitter.im/gallery-dl/main @@ -357,6 +369,7 @@ Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3 :: Only Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: Multimedia :: Graphics diff --git a/README.rst b/README.rst index 1cde544..4bfb821 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,8 @@ gallery-dl ========== -*gallery-dl* is a command-line program to download image-galleries and --collections from several image hosting sites (see `Supported Sites`_). +*gallery-dl* is a command-line program to download image galleries and +collections from several image hosting sites (see `Supported Sites`_). It is a cross-platform tool with many configuration options and powerful filenaming capabilities. @@ -35,14 +35,14 @@ easily installed or upgraded using pip_: .. code:: bash - $ python3 -m pip install --upgrade gallery-dl + $ python3 -m pip install -U gallery-dl -Installing the latest dev-version directly from GitHub can be done with +Installing the latest dev version directly from GitHub can be done with pip_ as well: .. code:: bash - $ python3 -m pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.tar.gz + $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz Note: Windows users should use :code:`py -3` instead of :code:`python3`. @@ -83,10 +83,10 @@ Download a standalone executable file, put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ -These executables include a Python 3.8 interpreter +These executables include a Python interpreter and all required Python packages. @@ -181,19 +181,22 @@ Configuration files for *gallery-dl* use a JSON-based file format. see gallery-dl-example.conf_. | A list of all available configuration options and their descriptions can be found in configuration.rst_. +| *gallery-dl* searches for configuration files in the following places: -+--------------------------------------------+------------------------------------------+ -| Linux | Windows | -+--------------------------------------------+------------------------------------------+ -|* ``/etc/gallery-dl.conf`` |* ``%APPDATA%\gallery-dl\config.json`` | -|* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| -|* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | -+--------------------------------------------+------------------------------------------+ +Windows: + * ``%APPDATA%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl.conf`` -(``%USERPROFILE%`` usually refers to the user's home directory, -i.e. ``C:\Users\\``) + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\\``) + +Linux, macOS, etc.: + * ``/etc/gallery-dl.conf`` + * ``${HOME}/.config/gallery-dl/config.json`` + * ``${HOME}/.gallery-dl.conf`` Values in later configuration files will override previous ones. @@ -213,9 +216,18 @@ Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for ``pixiv``, ``nijie``, and ``seiga`` and optional for -``aryion``, ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``, -``instagram``, ``luscious``, ``pinterest``, ``sankaku``, ``subscribestar``, -``tsumino``, and ``twitter``. +``aryion``, +``danbooru``, +``e621``, +``exhentai``, +``idolcomplex``, +``inkbunny``, +``instagram``, +``luscious``, +``pinterest``, +``subscribestar``, +``tsumino``, +and ``twitter``. You can set the necessary information in your configuration file (cf. gallery-dl.conf_) @@ -308,7 +320,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.4.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -326,8 +338,8 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ -.. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master - :target: https://travis-ci.com/mikf/gallery-dl +.. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg + :target: https://github.com/mikf/gallery-dl/actions .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg :target: https://gitter.im/gallery-dl/main diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 114502a..af6eaf3 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-11-27" "1.15.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-12-12" "1.16.0" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 34ac377..8c291fb 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-11-27" "1.15.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-12-12" "1.16.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -222,9 +222,6 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] \f[I]object\f[] .IP "Default:" 9 -\f[I]null\f[] - -.IP "Example:" 4 .. code:: json { @@ -236,7 +233,7 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] } .IP "Description:" 4 -A JSON \f[I]object\f[] mapping filename extensions to alternatives. +A JSON \f[I]object\f[] mapping filename extensions to their replacements. .SS extractor.*.skip @@ -349,8 +346,6 @@ and optional for .br * \f[I]pinterest\f[] .br -* \f[I]sankaku\f[] -.br * \f[I]subscribestar\f[] .br * \f[I]tsumino\f[] @@ -1537,21 +1532,6 @@ and \f[I]"gif"\f[] (in that order) will be tried instead, until an available format is found. -.SS extractor.sankaku.wait-min & .wait-max -.IP "Type:" 6 -\f[I]float\f[] - -.IP "Default:" 9 -\f[I]3.0\f[] and \f[I]6.0\f[] - -.IP "Description:" 4 -Minimum and maximum wait time in seconds between each image - -Sankaku Channel responds with \f[I]429 Too Many Requests\f[] if it -receives too many HTTP requests in a certain amount of time. -Waiting a few seconds between each request tries to prevent that. - - .SS extractor.sankakucomplex.embeds .IP "Type:" 6 \f[I]bool\f[] @@ -2243,7 +2223,7 @@ The command to run. * If this is a \f[I]string\f[], it will be executed using the system's shell, e.g. \f[I]/bin/sh\f[]. Any \f[I]{}\f[] will be replaced with the full path of a file or target directory, depending on -\f[I]exec.final\f[] +\f[I]exec.event\f[] .br * If this is a \f[I]list\f[], the first element specifies the program @@ -2253,17 +2233,17 @@ the files' metadata as well as \f[I]{_path}\f[], \f[I]{_directory}\f[], and \f[I]{_filename}\f[]. -.SS exec.final +.SS exec.event .IP "Type:" 6 -\f[I]bool\f[] +\f[I]string\f[] .IP "Default:" 9 -\f[I]false\f[] +\f[I]"after"\f[] .IP "Description:" 4 -Controls whether to execute \f[I]exec.command\f[] for each -downloaded file or only once after all files -have been downloaded successfully. +The event for which \f[I]exec.command\f[] is run. + +See \f[I]metadata.event\f[] for a list of available events. .SS metadata.mode @@ -2286,6 +2266,24 @@ Select how to write metadata. to a file's metadata dictionary +.SS metadata.filename +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Example:" 4 +"{id}.data.json" + +.IP "Description:" 4 +A \f[I]format string\f[] to build the filenames for metadata files with. +(see \f[I]extractor.filename\f[]) + +If this option is set, \f[I]metadata.extension\f[] and +\f[I]metadata.extension-format\f[] will be ignored. + + .SS metadata.directory .IP "Type:" 6 \f[I]string\f[] @@ -2330,6 +2328,37 @@ files with, which will replace the original filename extensions. Note: \f[I]metadata.extension\f[] is ignored if this option is set. +.SS metadata.event +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"file"\f[] + +.IP "Description:" 4 +The event for which metadata gets written to a file. + +The available events are: + +\f[I]init\f[] +After post procesor initialization +and before the first file download +\f[I]finalize\f[] +On extractor shutdown, e.g. after all files were downloaded +\f[I]prepare\f[] +Before a file download +\f[I]file\f[] +When completing a file download, +but before it gets moved to its target location +\f[I]after\f[] +After a file got moved to its target location +\f[I]skip\f[] +When skipping a file download +\f[I]post\f[] +When starting to download all files of a post, +e.g. a Tweet on Twitter or a post on Patreon. + + .SS metadata.content-format .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index ecb9f9b..bc9999b 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -128,13 +128,6 @@ { "format": "mp4" }, - "sankaku": - { - "username": null, - "password": null, - "wait-min": 3.0, - "wait-max": 6.0 - }, "seiga": { "username": null, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index d22ca6b..e0eda0d 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,7 +1,7 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.15.4 -Summary: Command-line program to download image-galleries and -collections from several image hosting sites +Version: 1.16.0 +Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann Author-email: mike_faehrmann@web.de @@ -13,8 +13,8 @@ Description: ========== gallery-dl ========== - *gallery-dl* is a command-line program to download image-galleries and - -collections from several image hosting sites (see `Supported Sites`_). + *gallery-dl* is a command-line program to download image galleries and + collections from several image hosting sites (see `Supported Sites`_). It is a cross-platform tool with many configuration options and powerful filenaming capabilities. @@ -46,14 +46,14 @@ Description: ========== .. code:: bash - $ python3 -m pip install --upgrade gallery-dl + $ python3 -m pip install -U gallery-dl - Installing the latest dev-version directly from GitHub can be done with + Installing the latest dev version directly from GitHub can be done with pip_ as well: .. code:: bash - $ python3 -m pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.tar.gz + $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz Note: Windows users should use :code:`py -3` instead of :code:`python3`. @@ -94,10 +94,10 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ - These executables include a Python 3.8 interpreter + These executables include a Python interpreter and all required Python packages. @@ -192,19 +192,22 @@ Description: ========== see gallery-dl-example.conf_. | A list of all available configuration options and their descriptions can be found in configuration.rst_. + | *gallery-dl* searches for configuration files in the following places: - +--------------------------------------------+------------------------------------------+ - | Linux | Windows | - +--------------------------------------------+------------------------------------------+ - |* ``/etc/gallery-dl.conf`` |* ``%APPDATA%\gallery-dl\config.json`` | - |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| - |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | - +--------------------------------------------+------------------------------------------+ + Windows: + * ``%APPDATA%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl\config.json`` + * ``%USERPROFILE%\gallery-dl.conf`` - (``%USERPROFILE%`` usually refers to the user's home directory, - i.e. ``C:\Users\\``) + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\\``) + + Linux, macOS, etc.: + * ``/etc/gallery-dl.conf`` + * ``${HOME}/.config/gallery-dl/config.json`` + * ``${HOME}/.gallery-dl.conf`` Values in later configuration files will override previous ones. @@ -224,9 +227,18 @@ Description: ========== a username & password pair. This is necessary for ``pixiv``, ``nijie``, and ``seiga`` and optional for - ``aryion``, ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``, - ``instagram``, ``luscious``, ``pinterest``, ``sankaku``, ``subscribestar``, - ``tsumino``, and ``twitter``. + ``aryion``, + ``danbooru``, + ``e621``, + ``exhentai``, + ``idolcomplex``, + ``inkbunny``, + ``instagram``, + ``luscious``, + ``pinterest``, + ``subscribestar``, + ``tsumino``, + and ``twitter``. You can set the necessary information in your configuration file (cf. gallery-dl.conf_) @@ -319,7 +331,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -337,8 +349,8 @@ Description: ========== .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ - .. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master - :target: https://travis-ci.com/mikf/gallery-dl + .. |build| image:: https://github.com/mikf/gallery-dl/workflows/tests/badge.svg + :target: https://github.com/mikf/gallery-dl/actions .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg :target: https://gitter.im/gallery-dl/main @@ -357,6 +369,7 @@ Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3 :: Only Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: Multimedia :: Graphics diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index c2e5cb4..fd1b4a1 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -74,7 +74,6 @@ gallery_dl/extractor/hentaihere.py gallery_dl/extractor/hentainexus.py gallery_dl/extractor/hiperdex.py gallery_dl/extractor/hitomi.py -gallery_dl/extractor/hypnohub.py gallery_dl/extractor/idolcomplex.py gallery_dl/extractor/imagebam.py gallery_dl/extractor/imagechest.py @@ -91,7 +90,6 @@ gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/khinsider.py gallery_dl/extractor/komikcast.py -gallery_dl/extractor/konachan.py gallery_dl/extractor/lineblog.py gallery_dl/extractor/livedoor.py gallery_dl/extractor/luscious.py @@ -106,6 +104,7 @@ gallery_dl/extractor/mangastream.py gallery_dl/extractor/mangoxo.py gallery_dl/extractor/mastodon.py gallery_dl/extractor/message.py +gallery_dl/extractor/moebooru.py gallery_dl/extractor/myhentaigallery.py gallery_dl/extractor/myportfolio.py gallery_dl/extractor/naver.py @@ -128,12 +127,9 @@ gallery_dl/extractor/pornhub.py gallery_dl/extractor/pururin.py gallery_dl/extractor/reactor.py gallery_dl/extractor/readcomiconline.py -gallery_dl/extractor/realbooru.py gallery_dl/extractor/recursive.py gallery_dl/extractor/reddit.py gallery_dl/extractor/redgifs.py -gallery_dl/extractor/rule34.py -gallery_dl/extractor/safebooru.py gallery_dl/extractor/sankaku.py gallery_dl/extractor/sankakucomplex.py gallery_dl/extractor/seiga.py @@ -160,7 +156,6 @@ gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py -gallery_dl/extractor/yandere.py gallery_dl/extractor/yuki.py gallery_dl/postprocessor/__init__.py gallery_dl/postprocessor/classify.py diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 0e67330..b8546a8 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -44,12 +44,14 @@ class HttpDownloader(DownloaderBase): if self.minsize: minsize = text.parse_bytes(self.minsize) if not minsize: - self.log.warning("Invalid minimum filesize (%r)", self.minsize) + self.log.warning( + "Invalid minimum file size (%r)", self.minsize) self.minsize = minsize if self.maxsize: maxsize = text.parse_bytes(self.maxsize) if not maxsize: - self.log.warning("Invalid maximum filesize (%r)", self.maxsize) + self.log.warning( + "Invalid maximum file size (%r)", self.maxsize) self.maxsize = maxsize if self.rate: rate = text.parse_bytes(self.rate) @@ -84,17 +86,20 @@ class HttpDownloader(DownloaderBase): if tries: if response: response.close() + response = None self.log.warning("%s (%s/%s)", msg, tries, self.retries+1) if tries > self.retries: return False time.sleep(tries) - tries += 1 + tries += 1 headers = {} + file_header = None + # check for .part file - filesize = pathfmt.part_size() - if filesize: - headers["Range"] = "bytes={}-".format(filesize) + file_size = pathfmt.part_size() + if file_size: + headers["Range"] = "bytes={}-".format(file_size) # file-specific headers extra = pathfmt.kwdict.get("_http_headers") if extra: @@ -118,9 +123,9 @@ class HttpDownloader(DownloaderBase): offset = 0 size = response.headers.get("Content-Length") elif code == 206: # Partial Content - offset = filesize + offset = file_size size = response.headers["Content-Range"].rpartition("/")[2] - elif code == 416 and filesize: # Requested Range Not Satisfiable + elif code == 416 and file_size: # Requested Range Not Satisfiable break else: msg = "'{} {}' for '{}'".format(code, response.reason, url) @@ -129,7 +134,14 @@ class HttpDownloader(DownloaderBase): self.log.warning(msg) return False - # check filesize + # set missing filename extension from MIME type + if not pathfmt.extension: + pathfmt.set_extension(self._find_extension(response)) + if pathfmt.exists(): + pathfmt.temppath = "" + return True + + # check file size size = text.parse_int(size, None) if size is not None: if self.minsize and size < self.minsize: @@ -143,50 +155,59 @@ class HttpDownloader(DownloaderBase): size, self.maxsize) return False - # set missing filename extension - if not pathfmt.extension: - pathfmt.set_extension(self.get_extension(response)) - if pathfmt.exists(): + content = response.iter_content(self.chunk_size) + + # check filename extension against file header + if self.adjust_extension and not offset and \ + pathfmt.extension in FILE_SIGNATURES: + try: + file_header = next( + content if response.raw.chunked + else response.iter_content(16), b"") + except (RequestException, SSLError, OpenSSLError) as exc: + msg = str(exc) + print() + continue + if self._adjust_extension(pathfmt, file_header) and \ + pathfmt.exists(): pathfmt.temppath = "" return True # set open mode if not offset: mode = "w+b" - if filesize: + if file_size: self.log.debug("Unable to resume partial download") else: mode = "r+b" self.log.debug("Resuming download at byte %d", offset) - # start downloading - self.out.start(pathfmt.path) + # download content self.downloading = True - with pathfmt.open(mode) as file: - if offset: - file.seek(offset) - - # download content + with pathfmt.open(mode) as fp: + if file_header: + fp.write(file_header) + elif offset: + if self.adjust_extension and \ + pathfmt.extension in FILE_SIGNATURES: + self._adjust_extension(pathfmt, fp.read(16)) + fp.seek(offset) + + self.out.start(pathfmt.path) try: - self.receive(response, file) + self.receive(fp, content) except (RequestException, SSLError, OpenSSLError) as exc: msg = str(exc) print() continue - # check filesize - if size and file.tell() < size: - msg = "filesize mismatch ({} < {})".format( - file.tell(), size) + # check file size + if size and fp.tell() < size: + msg = "file size mismatch ({} < {})".format( + fp.tell(), size) print() continue - # check filename extension - if self.adjust_extension: - adj_ext = self.check_extension(file, pathfmt.extension) - if adj_ext: - pathfmt.set_extension(adj_ext) - break self.downloading = False @@ -198,16 +219,18 @@ class HttpDownloader(DownloaderBase): return True - def receive(self, response, file): - for data in response.iter_content(self.chunk_size): - file.write(data) + @staticmethod + def receive(fp, content): + write = fp.write + for data in content: + write(data) - def _receive_rate(self, response, file): - t1 = time.time() + def _receive_rate(self, fp, content): rt = self.rate + t1 = time.time() - for data in response.iter_content(self.chunk_size): - file.write(data) + for data in content: + fp.write(data) t2 = time.time() # current time actual = t2 - t1 # actual elapsed time @@ -220,81 +243,98 @@ class HttpDownloader(DownloaderBase): else: t1 = t2 - def get_extension(self, response): + def _find_extension(self, response): + """Get filename extension from MIME type""" mtype = response.headers.get("Content-Type", "image/jpeg") mtype = mtype.partition(";")[0] if "/" not in mtype: mtype = "image/" + mtype - if mtype in MIMETYPE_MAP: - return MIMETYPE_MAP[mtype] + if mtype in MIME_TYPES: + return MIME_TYPES[mtype] - exts = mimetypes.guess_all_extensions(mtype, strict=False) - if exts: - exts.sort() - return exts[-1][1:] + ext = mimetypes.guess_extension(mtype, strict=False) + if ext: + return ext[1:] - self.log.warning( - "No filename extension found for MIME type '%s'", mtype) - return "txt" + self.log.warning("Unknown MIME type '%s'", mtype) + return "bin" @staticmethod - def check_extension(file, extension): - """Check filename extension against fileheader""" - if extension in FILETYPE_CHECK: - file.seek(0) - header = file.read(8) - if len(header) >= 8 and not FILETYPE_CHECK[extension](header): - for ext, check in FILETYPE_CHECK.items(): - if ext != extension and check(header): - return ext - return None - - -FILETYPE_CHECK = { - "jpg": lambda h: h[0:2] == b"\xff\xd8", - "png": lambda h: h[0:8] == b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", - "gif": lambda h: h[0:4] == b"GIF8" and h[5] == 97, -} + def _adjust_extension(pathfmt, file_header): + """Check filename extension against file header""" + sig = FILE_SIGNATURES[pathfmt.extension] + if not file_header.startswith(sig): + for ext, sig in FILE_SIGNATURES.items(): + if file_header.startswith(sig): + pathfmt.set_extension(ext) + return True + return False -MIMETYPE_MAP = { - "image/jpeg": "jpg", - "image/jpg": "jpg", - "image/png": "png", - "image/gif": "gif", - "image/bmp": "bmp", - "image/x-bmp": "bmp", +MIME_TYPES = { + "image/jpeg" : "jpg", + "image/jpg" : "jpg", + "image/png" : "png", + "image/gif" : "gif", + "image/bmp" : "bmp", + "image/x-bmp" : "bmp", "image/x-ms-bmp": "bmp", - "image/webp": "webp", - "image/svg+xml": "svg", + "image/webp" : "webp", + "image/svg+xml" : "svg", + "image/x-photoshop" : "psd", + "application/x-photoshop" : "psd", "image/vnd.adobe.photoshop": "psd", - "image/x-photoshop": "psd", - "application/x-photoshop": "psd", "video/webm": "webm", - "video/ogg": "ogg", - "video/mp4": "mp4", + "video/ogg" : "ogg", + "video/mp4" : "mp4", - "audio/wav": "wav", + "audio/wav" : "wav", "audio/x-wav": "wav", - "audio/webm": "webm", - "audio/ogg": "ogg", - "audio/mpeg": "mp3", + "audio/webm" : "webm", + "audio/ogg" : "ogg", + "audio/mpeg" : "mp3", - "application/zip": "zip", + "application/zip" : "zip", "application/x-zip": "zip", "application/x-zip-compressed": "zip", - "application/rar": "rar", + "application/rar" : "rar", "application/x-rar": "rar", "application/x-rar-compressed": "rar", - "application/x-7z-compressed": "7z", + "application/x-7z-compressed" : "7z", + + "application/pdf" : "pdf", + "application/x-pdf": "pdf", + "application/x-shockwave-flash": "swf", "application/ogg": "ogg", "application/octet-stream": "bin", } +# taken from https://en.wikipedia.org/wiki/List_of_file_signatures +FILE_SIGNATURES = { + "jpg" : b"\xFF\xD8\xFF", + "png" : b"\x89PNG\r\n\x1A\n", + "gif" : (b"GIF87a", b"GIF89a"), + "bmp" : b"BM", + "webp": b"RIFF", + "svg" : b"[^&#]+)") @@ -35,8 +34,12 @@ class _3dbooruTagExtractor(booru.TagMixin, _3dbooruExtractor): "content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a", }) + def posts(self): + params = {"tags": self.tags} + return self._pagination(self.root + "/post/index.json", params) -class _3dbooruPoolExtractor(booru.PoolMixin, _3dbooruExtractor): + +class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor): """Extractor for image-pools from behoimi.org""" pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P\d+)" test = ("http://behoimi.org/pool/show/27", { @@ -44,8 +47,12 @@ class _3dbooruPoolExtractor(booru.PoolMixin, _3dbooruExtractor): "content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554", }) + def posts(self): + params = {"tags": "pool:" + self.pool_id} + return self._pagination(self.root + "/post/index.json", params) + -class _3dbooruPostExtractor(booru.PostMixin, _3dbooruExtractor): +class _3dbooruPostExtractor(_3dbooruBase, moebooru.MoebooruPostExtractor): """Extractor for single images from behoimi.org""" pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P\d+)" test = ("http://behoimi.org/post/show/140852", { @@ -60,8 +67,13 @@ class _3dbooruPostExtractor(booru.PostMixin, _3dbooruExtractor): }, }) + def posts(self): + params = {"tags": "id:" + self.post_id} + return self._pagination(self.root + "/post/index.json", params) + -class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor): +class _3dbooruPopularExtractor( + _3dbooruBase, moebooru.MoebooruPopularExtractor): """Extractor for popular images from behoimi.org""" pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org" r"/post/popular_(?Pby_(?:day|week|month)|recent)" @@ -70,8 +82,3 @@ class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor): "pattern": r"http://behoimi\.org/data/../../[0-9a-f]{32}\.jpg", "count": 20, }) - - def __init__(self, match): - super().__init__(match) - self.api_url = "http://behoimi.org/post/popular_{scale}.json".format( - scale=self.scale) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d0c327a..611603e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -44,7 +44,6 @@ modules = [ "hentainexus", "hiperdex", "hitomi", - "hypnohub", "idolcomplex", "imagebam", "imagechest", @@ -60,7 +59,6 @@ modules = [ "keenspot", "khinsider", "komikcast", - "konachan", "lineblog", "livedoor", "luscious", @@ -94,11 +92,8 @@ modules = [ "pururin", "reactor", "readcomiconline", - "realbooru", "reddit", "redgifs", - "rule34", - "safebooru", "sankaku", "sankakucomplex", "seiga", @@ -123,8 +118,9 @@ modules = [ "wikiart", "xhamster", "xvideos", - "yandere", "yuki", + "booru", + "moebooru", "foolfuuka", "foolslide", "mastodon", diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 0176d76..517df93 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -1,247 +1,248 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Base classes for extractors for danbooru and co""" +"""Extractors for *booru sites""" + +from .common import Extractor, Message, generate_extractors +from .. import text, util, exception -from .common import Extractor, Message, SharedConfigMixin -from .. import text, exception from xml.etree import ElementTree import collections -import datetime -import operator import re -class BooruExtractor(SharedConfigMixin, Extractor): - """Base class for all booru extractors""" +class BooruExtractor(Extractor): + """Base class for *booru extractors""" basecategory = "booru" filename_fmt = "{category}_{id}_{md5}.{extension}" - api_url = "" - post_url = "" - per_page = 50 - page_start = 1 - page_limit = None - sort = False + page_start = 0 + per_page = 100 - def __init__(self, match): - super().__init__(match) - self.params = {} - self.extags = self.post_url and self.config("tags", False) + def items(self): + self.login() + extended_tags = self.config("tags", False) + data = self.metadata() + for post in self.posts(): + try: + url = self._prepare_post(post, extended_tags) + except KeyError: + continue + post.update(data) + text.nameext_from_url(url, post) + yield Message.Directory, post + yield Message.Url, url, post def skip(self, num): pages = num // self.per_page - if self.page_limit and pages + self.page_start > self.page_limit: - pages = self.page_limit - self.page_start self.page_start += pages return pages * self.per_page - def items(self): - yield Message.Version, 1 - data = self.get_metadata() + def login(self): + """Login and set necessary cookies""" - self.reset_page() - while True: - images = self.parse_response( - self.request(self.api_url, params=self.params)) - - for image in images: - try: - url = self.get_file_url(image) - except KeyError: - continue - if url.startswith("/"): - url = text.urljoin(self.api_url, url) - image.update(data) - text.nameext_from_url(url, image) - if self.extags: - self.extended_tags(image) - yield Message.Directory, image - yield Message.Url, url, image - - if len(images) < self.per_page: - return - self.update_page(image) + def metadata(self): + """Return a dict with general metadata""" + return () - def reset_page(self): - """Initialize params to point to the first page""" - self.params["page"] = self.page_start + def posts(self): + """Return an iterable with post objects""" + return () - def update_page(self, data): - """Update params to point to the next page""" + def _prepare_post(self, post, extended_tags=False): + url = post["file_url"] + if url[0] == "/": + url = self.root + url + if extended_tags: + self._fetch_extended_tags(post) + post["date"] = text.parse_datetime( + post["created_at"], "%a %b %d %H:%M:%S %z %Y") + return url - def parse_response(self, response): - """Parse JSON API response""" - images = response.json() - if self.sort: - images.sort(key=operator.itemgetter("score", "id"), - reverse=True) - return images + def _fetch_extended_tags(self, post, page=None): + if not page: + url = "{}/index.php?page=post&s=view&id={}".format( + self.root, post["id"]) + page = self.request(url).text + html = text.extract(page, '