From 3201d77a148367d739862b4f07868a76eaeb7cb1 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 13 Mar 2021 16:26:30 -0500 Subject: New upstream version 1.17.0. --- CHANGELOG.md | 37 ++++ PKG-INFO | 73 ++++---- README.rst | 71 ++++---- data/completion/_gallery-dl | 5 +- data/completion/gallery-dl | 2 +- data/man/gallery-dl.1 | 10 +- data/man/gallery-dl.conf.5 | 166 +++++++++++------- docs/gallery-dl.conf | 164 +++++++++++++++--- gallery_dl.egg-info/PKG-INFO | 73 ++++---- gallery_dl.egg-info/SOURCES.txt | 6 +- gallery_dl/__init__.py | 25 +-- gallery_dl/cloudflare.py | 201 ---------------------- gallery_dl/downloader/__init__.py | 16 +- gallery_dl/downloader/http.py | 8 +- gallery_dl/downloader/ytdl.py | 16 +- gallery_dl/extractor/500px.py | 16 +- gallery_dl/extractor/__init__.py | 11 +- gallery_dl/extractor/booru.py | 201 +--------------------- gallery_dl/extractor/common.py | 293 ++++++++++++++++++++------------ gallery_dl/extractor/cyberdrop.py | 58 +++++++ gallery_dl/extractor/deviantart.py | 10 +- gallery_dl/extractor/erome.py | 15 +- gallery_dl/extractor/exhentai.py | 116 ++++++++----- gallery_dl/extractor/foolfuuka.py | 232 ++++++++++++------------- gallery_dl/extractor/foolslide.py | 190 +++++++++++---------- gallery_dl/extractor/gelbooru.py | 14 +- gallery_dl/extractor/gelbooru_v01.py | 143 ++++++++++++++++ gallery_dl/extractor/gelbooru_v02.py | 194 +++++++++++++++++++++ gallery_dl/extractor/hentaicafe.py | 103 +++++++++-- gallery_dl/extractor/hentainexus.py | 10 +- gallery_dl/extractor/idolcomplex.py | 15 +- gallery_dl/extractor/imgur.py | 2 - gallery_dl/extractor/instagram.py | 144 +++++++++------- gallery_dl/extractor/komikcast.py | 2 +- gallery_dl/extractor/mangadex.py | 8 +- gallery_dl/extractor/mastodon.py | 216 ++++++++++------------- gallery_dl/extractor/message.py | 4 +- gallery_dl/extractor/moebooru.py | 245 +++++++++++++------------- gallery_dl/extractor/naverwebtoon.py | 128 ++++++++++++++ gallery_dl/extractor/oauth.py | 80 +++------ gallery_dl/extractor/patreon.py | 9 +- gallery_dl/extractor/pixiv.py | 5 +- gallery_dl/extractor/reactor.py | 23 +-- gallery_dl/extractor/readcomiconline.py | 5 +- gallery_dl/extractor/sankakucomplex.py | 11 +- gallery_dl/extractor/shopify.py | 79 ++++----- gallery_dl/extractor/tumblrgallery.py | 149 ++++++++++++++++ gallery_dl/extractor/twitter.py | 80 +++++++-- gallery_dl/extractor/unsplash.py | 6 +- gallery_dl/extractor/wallhaven.py | 146 +++++++++++----- gallery_dl/job.py | 71 +++++--- gallery_dl/option.py | 16 +- gallery_dl/postprocessor/__init__.py | 14 +- gallery_dl/postprocessor/exec.py | 6 +- gallery_dl/postprocessor/metadata.py | 6 +- gallery_dl/util.py | 44 +++-- gallery_dl/version.py | 2 +- test/test_downloader.py | 8 +- test/test_extractor.py | 4 - test/test_postprocessor.py | 4 +- test/test_results.py | 12 +- test/test_util.py | 12 +- 62 files changed, 2396 insertions(+), 1639 deletions(-) delete mode 100644 gallery_dl/cloudflare.py create mode 100644 gallery_dl/extractor/cyberdrop.py create mode 100644 gallery_dl/extractor/gelbooru_v01.py create mode 100644 gallery_dl/extractor/gelbooru_v02.py create mode 100644 gallery_dl/extractor/naverwebtoon.py create mode 100644 gallery_dl/extractor/tumblrgallery.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 893b944..ef4148a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +## 1.17.0 - 2021-03-05 +### Additions +- [cyberdrop] add support for `https://cyberdrop.me/` ([#1328](https://github.com/mikf/gallery-dl/issues/1328)) +- [exhentai] add `metadata` option; extract more metadata from gallery pages ([#1325](https://github.com/mikf/gallery-dl/issues/1325)) +- [hentaicafe] add `search` and `tag` extractors ([#1345](https://github.com/mikf/gallery-dl/issues/1345)) +- [hentainexus] add `original` option ([#1322](https://github.com/mikf/gallery-dl/issues/1322)) +- [instagram] support `/user/reels/` URLs ([#1329](https://github.com/mikf/gallery-dl/issues/1329)) +- [naverwebtoon] add support for `https://comic.naver.com/` ([#1331](https://github.com/mikf/gallery-dl/issues/1331)) +- [pixiv] add `translated-tags` option ([#1354](https://github.com/mikf/gallery-dl/issues/1354)) +- [tbib] add support for `https://tbib.org/` ([#473](https://github.com/mikf/gallery-dl/issues/473), [#1082](https://github.com/mikf/gallery-dl/issues/1082)) +- [tumblrgallery] add support for `https://tumblrgallery.xyz/` ([#1298](https://github.com/mikf/gallery-dl/issues/1298)) +- [twitter] add extractor for followed users ([#1337](https://github.com/mikf/gallery-dl/issues/1337)) +- [twitter] add option to download all media from conversations ([#1319](https://github.com/mikf/gallery-dl/issues/1319)) +- [wallhaven] add `collections` extractor ([#1351](https://github.com/mikf/gallery-dl/issues/1351)) +- [snap] allow access to user's .netrc for site authentication ([#1352](https://github.com/mikf/gallery-dl/issues/1352)) +- add extractors for Gelbooru v0.1 sites ([#234](https://github.com/mikf/gallery-dl/issues/234), [#426](https://github.com/mikf/gallery-dl/issues/426), [#473](https://github.com/mikf/gallery-dl/issues/473), [#767](https://github.com/mikf/gallery-dl/issues/767), [#1238](https://github.com/mikf/gallery-dl/issues/1238)) +- add `-E/--extractor-info` command-line option ([#875](https://github.com/mikf/gallery-dl/issues/875)) +- add GitHub Actions workflow for building standalone executables ([#1312](https://github.com/mikf/gallery-dl/issues/1312)) +- add `browser` and `headers` options ([#1117](https://github.com/mikf/gallery-dl/issues/1117)) +- add option to use different youtube-dl forks ([#1330](https://github.com/mikf/gallery-dl/issues/1330)) +- support using multiple input files at once ([#1353](https://github.com/mikf/gallery-dl/issues/1353)) +### Changes +- [deviantart] extend `extra` option to also download embedded DeviantArt posts. +- [exhentai] rename metadata fields to match API results ([#1325](https://github.com/mikf/gallery-dl/issues/1325)) +- [mangadex] use `api.mangadex.org` as default API server +- [mastodon] cache OAuth tokens ([#616](https://github.com/mikf/gallery-dl/issues/616)) +- replace `wait-min` and `wait-max` with `sleep-request` +### Fixes +- [500px] skip unavailable photos ([#1335](https://github.com/mikf/gallery-dl/issues/1335)) +- [komikcast] fix extraction +- [readcomiconline] download high quality image versions ([#1347](https://github.com/mikf/gallery-dl/issues/1347)) +- [twitter] update GraphQL endpoints +- fix crash when `base-directory` is an empty string ([#1339](https://github.com/mikf/gallery-dl/issues/1339)) +### Removals +- remove support for formerly deprecated options +- remove `cloudflare` module + ## 1.16.5 - 2021-02-14 ### Additions - [behance] support `video` modules ([#1282](https://github.com/mikf/gallery-dl/issues/1282)) diff --git a/PKG-INFO b/PKG-INFO index a89521e..7a9a43a 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.16.5 +Version: 1.17.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -21,6 +21,8 @@ Description: ========== |pypi| |build| |gitter| + .. contents:: + Dependencies ============ @@ -38,6 +40,7 @@ Description: ========== Installation ============ + Pip --- @@ -57,48 +60,26 @@ Description: ========== Note: Windows users should use :code:`py -3` instead of :code:`python3`. - | It is advised to use the latest version of pip_, - including the essential packages :code:`setuptools` and :code:`wheel`. - | To ensure that these packages are up-to-date, run + It is advised to use the latest version of pip_, + including the essential packages :code:`setuptools` and :code:`wheel`. + To ensure these packages are up-to-date, run .. code:: bash $ python3 -m pip install --upgrade pip setuptools wheel - From Source - ----------- - - Get the code by either - - * Downloading a stable_ or dev_ archive and unpacking it - * Or via :code:`git clone https://github.com/mikf/gallery-dl.git` - - Navigate into the respective directory and run the :code:`setup.py` file. - - .. code:: bash - - $ wget https://github.com/mikf/gallery-dl/archive/master.tar.gz - $ tar -xf master.tar.gz - # or - $ git clone https://github.com/mikf/gallery-dl.git - - $ cd gallery-dl* - $ python3 setup.py install - - Standalone Executable --------------------- - Download a standalone executable file, - put it into your `PATH `__, - and run it inside a command prompt (like ``cmd.exe``). + Prebuilt executable files with a Python interpreter and + required Python packages included are available for - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ - These executables include a Python interpreter - and all required Python packages. + | Executables build from the latest commit can be found at + | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml Snap @@ -110,6 +91,7 @@ Description: ========== $ snap install gallery-dl + Chocolatey ---------- @@ -119,15 +101,17 @@ Description: ========== $ choco install gallery-dl + Scoop - ---------- + ----- - Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users. + *gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: .. code:: powershell $ scoop install gallery-dl + Usage ===== @@ -232,9 +216,10 @@ Description: ========== ``e621``, ``exhentai``, ``idolcomplex``, + ``imgbb``, ``inkbunny``, ``instagram``, - ``luscious``, + ``mangoxo``, ``pinterest``, ``sankaku``, ``subscribestar``, @@ -264,6 +249,7 @@ Description: ========== $ gallery-dl -u -p URL $ gallery-dl -o username= -o password= URL + Cookies ------- @@ -307,12 +293,14 @@ Description: ========== $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + OAuth ----- *gallery-dl* supports user authentication via OAuth_ for - ``deviantart``, ``flickr``, ``reddit``, ``smugmug`` and ``tumblr``. - This is entirely optional, but grants *gallery-dl* the ability + ``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, + and ``mastodon`` instances. + This is mostly optional, but grants *gallery-dl* the ability to issue requests on your account's behalf and enables it to access resources which would otherwise be unavailable to a public user. @@ -327,13 +315,20 @@ Description: ========== access to *gallery-dl*. Authorize it and you will be shown one or more "tokens", which should be added to your configuration file. + To authenticate with a ``mastodon`` instance, run *gallery-dl* with + ``oauth:mastodon:`` as argument. For example: + + .. code:: bash + + $ gallery-dl oauth:mastodon:pawoo.net + $ gallery-dl oauth:mastodon:https://mastodon.social/ + + .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz - .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ diff --git a/README.rst b/README.rst index cec53a4..20ed222 100644 --- a/README.rst +++ b/README.rst @@ -10,6 +10,8 @@ and powerful filenaming capabilities. |pypi| |build| |gitter| +.. contents:: + Dependencies ============ @@ -27,6 +29,7 @@ Optional Installation ============ + Pip --- @@ -46,48 +49,26 @@ pip_ as well: Note: Windows users should use :code:`py -3` instead of :code:`python3`. -| It is advised to use the latest version of pip_, - including the essential packages :code:`setuptools` and :code:`wheel`. -| To ensure that these packages are up-to-date, run +It is advised to use the latest version of pip_, +including the essential packages :code:`setuptools` and :code:`wheel`. +To ensure these packages are up-to-date, run .. code:: bash $ python3 -m pip install --upgrade pip setuptools wheel -From Source ------------ - -Get the code by either - -* Downloading a stable_ or dev_ archive and unpacking it -* Or via :code:`git clone https://github.com/mikf/gallery-dl.git` - -Navigate into the respective directory and run the :code:`setup.py` file. - -.. code:: bash - - $ wget https://github.com/mikf/gallery-dl/archive/master.tar.gz - $ tar -xf master.tar.gz - # or - $ git clone https://github.com/mikf/gallery-dl.git - - $ cd gallery-dl* - $ python3 setup.py install - - Standalone Executable --------------------- -Download a standalone executable file, -put it into your `PATH `__, -and run it inside a command prompt (like ``cmd.exe``). +Prebuilt executable files with a Python interpreter and +required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ -These executables include a Python interpreter -and all required Python packages. +| Executables build from the latest commit can be found at +| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml Snap @@ -99,6 +80,7 @@ Linux users that are using a distro that is supported by Snapd_ can install *gal $ snap install gallery-dl + Chocolatey ---------- @@ -108,15 +90,17 @@ Windows users that have Chocolatey_ installed can install *gallery-dl* from the $ choco install gallery-dl + Scoop ----------- +----- -Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users. +*gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: .. code:: powershell $ scoop install gallery-dl + Usage ===== @@ -221,9 +205,10 @@ and optional for ``e621``, ``exhentai``, ``idolcomplex``, +``imgbb``, ``inkbunny``, ``instagram``, -``luscious``, +``mangoxo``, ``pinterest``, ``sankaku``, ``subscribestar``, @@ -253,6 +238,7 @@ or you can provide them directly via the $ gallery-dl -u -p URL $ gallery-dl -o username= -o password= URL + Cookies ------- @@ -296,12 +282,14 @@ the :code:`--cookies` command-line option: $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + OAuth ----- *gallery-dl* supports user authentication via OAuth_ for -``deviantart``, ``flickr``, ``reddit``, ``smugmug`` and ``tumblr``. -This is entirely optional, but grants *gallery-dl* the ability +``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, +and ``mastodon`` instances. +This is mostly optional, but grants *gallery-dl* the ability to issue requests on your account's behalf and enables it to access resources which would otherwise be unavailable to a public user. @@ -316,13 +304,20 @@ You will be sent to the site's authorization page and asked to grant read access to *gallery-dl*. Authorize it and you will be shown one or more "tokens", which should be added to your configuration file. +To authenticate with a ``mastodon`` instance, run *gallery-dl* with +``oauth:mastodon:`` as argument. For example: + +.. code:: bash + + $ gallery-dl oauth:mastodon:pawoo.net + $ gallery-dl oauth:mastodon:https://mastodon.social/ + + .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz -.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index f134f63..76afd8a 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -8,16 +8,17 @@ _arguments -C -S \ {-h,--help}'[Print this help message and exit]' \ --version'[Print program version and exit]' \ {-d,--dest}'[Destination directory]':'':_files \ -{-i,--input-file}'[Download URLs found in FILE ("-" for stdin)]':'':_files \ +{-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'':_files \ --cookies'[File to load additional cookies from]':'':_files \ --proxy'[Use the specified proxy]':'' \ --clear-cache'[Delete all cached login sessions, cookies, etc.]' \ {-q,--quiet}'[Activate quiet mode]' \ {-v,--verbose}'[Print various debugging information]' \ {-g,--get-urls}'[Print URLs instead of downloading]' \ --G'[==SUPPRESS==]' \ +{-G,--resolve-urls}'[Print URLs instead of downloading; resolve intermediary URLs]' \ {-j,--dump-json}'[Print JSON information]' \ {-s,--simulate}'[Simulate data extraction; do not download anything]' \ +{-E,--extractor-info}'[Print extractor defaults and settings]' \ {-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \ --list-modules'[Print a list of available extractor modules]' \ --list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 19cb39f..9a3a63e 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 02639b8..c420d9b 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-02-14" "1.16.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-03-05" "1.17.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -27,7 +27,7 @@ Print program version and exit Destination directory .TP .B "\-i, \-\-input\-file" \f[I]FILE\f[] -Download URLs found in FILE ('-' for stdin) +Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified .TP .B "\-\-cookies" \f[I]FILE\f[] File to load additional cookies from @@ -47,12 +47,18 @@ Print various debugging information .B "\-g, \-\-get\-urls" Print URLs instead of downloading .TP +.B "\-G, \-\-resolve\-urls" +Print URLs instead of downloading; resolve intermediary URLs +.TP .B "\-j, \-\-dump\-json" Print JSON information .TP .B "\-s, \-\-simulate" Simulate data extraction; do not download anything .TP +.B "\-E, \-\-extractor\-info" +Print extractor defaults and settings +.TP .B "\-K, \-\-list\-keywords" Print a list of available keywords and example values for the given URLs .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 408cb61..c0629bb 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-02-14" "1.16.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-03-05" "1.17.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -338,11 +338,13 @@ and optional for .br * \f[I]idolcomplex\f[] .br +* \f[I]imgbb\f[] +.br * \f[I]inkbunny\f[] .br * \f[I]instagram\f[] .br -* \f[I]luscious\f[] +* \f[I]mangoxo\f[] .br * \f[I]pinterest\f[] .br @@ -451,7 +453,7 @@ otherwise \f[I]http://\f[] is assumed. \f[I]string\f[] .IP "Default:" 9 -\f[I]"Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0"\f[] +\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"\f[] .IP "Description:" 4 User-Agent header value to be used for HTTP requests. @@ -460,6 +462,28 @@ Note: This option has no effect on pixiv extractors, as these need specific values to function correctly. +.SS extractor.*.browser +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"firefox"\f[] for \f[I]patreon\f[], \f[I]null\f[] everywhere else + +.IP "Example:" 4 +.br +* "chrome:macos" + +.IP "Description:" 4 +Try to emulate a real browser (\f[I]firefox\f[] or \f[I]chrome\f[]) +by using their default HTTP headers and TLS ciphers for HTTP requests. + +Optionally, the operating system used in the \f[I]User-Agent\f[] header can be +specified after a \f[I]:\f[] (\f[I]windows\f[], \f[I]linux\f[], or \f[I]macos\f[]). + +Note: \f[I]requests\f[] and \f[I]urllib3\f[] only support HTTP/1.1, while a real +browser would use HTTP/2. + + .SS extractor.*.keywords .IP "Type:" 6 \f[I]object\f[] @@ -839,7 +863,7 @@ See \f[I]Filters\f[] for details. \f[I]false\f[] .IP "Description:" 4 -Download extra Sta.sh resources from +Download embedded Deviations and Sta.sh resources from description texts and journals. Note: Enabling this option also enables deviantart.metadata_. @@ -1037,31 +1061,30 @@ If this value is an \f[I]integer\f[], it gets used as the limit maximum instead of the value listed on \f[I]https://e-hentai.org/home.php\f[] -.SS extractor.exhentai.original +.SS extractor.exhentai.metadata .IP "Type:" 6 \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 -Download full-sized original images if available. +Load extended gallery metadata from the +\f[I]API\f[]. +Adds \f[I]archiver_key\f[], \f[I]posted\f[], and \f[I]torrents\f[]. +Makes \f[I]date\f[] and \f[I]filesize\f[] more precise. -.SS extractor.exhentai.wait-min & .wait-max + +.SS extractor.exhentai.original .IP "Type:" 6 -\f[I]float\f[] +\f[I]bool\f[] .IP "Default:" 9 -\f[I]3.0\f[] and \f[I]6.0\f[] +\f[I]true\f[] .IP "Description:" 4 -Minimum and maximum wait time in seconds between each image - -ExHentai detects and blocks automated downloaders. -*gallery-dl* waits a randomly selected number of -seconds between \f[I]wait-min\f[] and \f[I]wait-max\f[] after -each image to prevent getting blocked. +Download full-sized original images if available. .SS extractor.flickr.access-token & .access-token-secret @@ -1142,20 +1165,6 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. -.SS extractor.gelbooru.api -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -Enable use of Gelbooru's API. - -Set this value to false if the API has been disabled to switch -to manual information extraction. - - .SS extractor.gfycat.format .IP "Type:" 6 \f[I]string\f[] @@ -1192,6 +1201,17 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.hentainexus.original +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download original files instead of WebP versions. + + .SS extractor.hitomi.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -1285,6 +1305,17 @@ If the selected format is not available, the first in the list gets chosen (usually mp3). +.SS extractor.mangadex.api-server +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"https://api.mangadex.org"\f[] + +.IP "Description:" 4 +The server to use for API requests. + + .SS extractor.newgrounds.flash .IP "Type:" 6 \f[I]bool\f[] @@ -1444,6 +1475,17 @@ Download user avatars. Also download related artworks. +.SS extractor.pixiv.translated-tags +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Provide translated ´tags`. + + .SS extractor.pixiv.ugoira .IP "Type:" 6 \f[I]bool\f[] @@ -1472,18 +1514,6 @@ to watchable videos. (Example__) Also search Plurk comments for URLs. -.SS extractor.reactor.wait-min & .wait-max -.IP "Type:" 6 -\f[I]float\f[] - -.IP "Default:" 9 -\f[I]3.0\f[] and \f[I]6.0\f[] - -.IP "Description:" 4 -Minimum and maximum wait time in seconds between HTTP requests -during the extraction process. - - .SS extractor.readcomiconline.captcha .IP "Type:" 6 \f[I]string\f[] @@ -1755,6 +1785,18 @@ You can use \f[I]"all"\f[] instead of listing all types separately. Fetch media from \f[I]Cards\f[]. +.SS extractor.twitter.conversations +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Fetch media from all Tweets and replies in a \f[I]conversation +\f[]. + + .SS extractor.twitter.quoted .IP "Type:" 6 \f[I]bool\f[] @@ -2064,6 +2106,17 @@ Check the file headers of \f[I]jpg\f[], \f[I]png\f[], and \f[I]gif\f[] files and adjust their filename extensions if they do not match. +.SS downloader.http.headers +.IP "Type:" 6 +\f[I]object\f[] + +.IP "Example:" 4 +{"Accept": "image/webp,*/*", "Referer": "https://example.org/"} + +.IP "Description:" 4 +Additional HTTP headers to send when downloading files, + + .SS downloader.ytdl.format .IP "Type:" 6 \f[I]string\f[] @@ -2103,6 +2156,17 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in \f[I]downloader.ytdl.raw-options\f[] to \f[I]true\f[] to suppress all output. +.SS downloader.ytdl.module +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"youtube_dl"\f[] + +.IP "Description:" 4 +Name of the youtube-dl Python module to import. + + .SS downloader.ytdl.outtmpl .IP "Type:" 6 \f[I]string\f[] @@ -2687,24 +2751,6 @@ Set this option to \f[I]null\f[] or an invalid path to disable this cache. -.SS ciphers -.IP "Type:" 6 -\f[I]bool\f[] or \f[I]string\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -.br -* \f[I]true\f[]: Update urllib3's default cipher list -.br -* \f[I]false\f[]: Leave the default cipher list as is -.br -* Any \f[I]string\f[]: Replace urllib3's default ciphers with these -(See \f[I]SSLContext.set_ciphers()\f[] -for details) - - .SS pyopenssl .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index bc9999b..acf60c7 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -2,17 +2,34 @@ "extractor": { "base-directory": "./gallery-dl/", + "parent-directory": false, "postprocessors": null, "archive": null, "cookies": null, - "cookies-update": false, + "cookies-update": true, "proxy": null, "skip": true, + + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0", + "retries": 4, + "timeout": 30.0, + "verify": true, + "sleep": 0, + "sleep-request": 0, + "sleep-extractor": 0, + "path-restrict": "auto", "path-replace": "_", "path-remove": "\\u0000-\\u001f\\u007f", - "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0", + "extension-map": { + "jpeg": "jpg", + "jpe" : "jpg", + "jfif": "jpg", + "jif" : "jpg", + "jfi" : "jpg" + }, + "artstation": { @@ -21,7 +38,8 @@ "aryion": { "username": null, - "password": null + "password": null, + "recursive": true }, "blogger": { @@ -33,11 +51,17 @@ "password": null, "ugoira": false }, + "derpibooru": + { + "api-key": null, + "filter": 56027 + }, "deviantart": { "extra": false, "flat": true, "folders": false, + "include": "gallery", "journals": "html", "mature": true, "metadata": false, @@ -45,29 +69,43 @@ "quality": 100, "wait-min": 0 }, + "e621": + { + "username": null, + "password": null + }, "exhentai": { "username": null, "password": null, "domain": "auto", "limits": true, + "metadata": false, "original": true, - "wait-min": 3.0, - "wait-max": 6.0 + "sleep-request": 5.0 }, "flickr": { "videos": true, "size-max": null }, - "gelbooru": + "furaffinity": { - "api": true + "descriptions": "text", + "include": "gallery" }, "gfycat": { "format": "mp4" }, + "hentaifoundry": + { + "include": "pictures" + }, + "hentainexus": + { + "original": true + }, "hitomi": { "metadata": true @@ -76,40 +114,81 @@ { "username": null, "password": null, - "wait-min": 3.0, - "wait-max": 6.0 + "sleep-request": 5.0 + }, + "imgbb": + { + "username": null, + "password": null }, "imgur": { "mp4": true }, + "inkbunny": + { + "username": null, + "password": null, + "orderby": "create_datetime" + }, "instagram": { - "highlights": false, + "username": null, + "password": null, + "include": "posts", + "sleep-request": 5.0, "videos": true }, - "nijie": + "khinsider": + { + "format": "mp3" + }, + "mangadex": + { + "api-server": "https://api.mangadex.org" + }, + "mangoxo": { "username": null, "password": null }, + "newgrounds": + { + "username": null, + "password": null, + "flash": true, + "include": "art" + }, + "nijie": + { + "username": null, + "password": null, + "include": "illustration,doujin" + }, "oauth": { "browser": true, "cache": true, "port": 6414 }, + "pillowfort": + { + "reblogs": false + }, + "pinterest": + { + "sections": true, + "videos": true + }, "pixiv": { - "username": null, - "password": null, "avatar": false, + "translated-tags": false, "ugoira": true }, "reactor": { - "wait-min": 3.0, - "wait-max": 6.0 + "sleep-request": 5.0 }, "reddit": { @@ -128,11 +207,35 @@ { "format": "mp4" }, + "sankakucomplex": + { + "embeds": false, + "videos": true + }, + "sankaku": + { + "username": null, + "password": null + }, + "smugmug": + { + "videos": true + }, "seiga": { "username": null, "password": null }, + "subscribestar": + { + "username": null, + "password": null + }, + "tsumino": + { + "username": null, + "password": null + }, "tumblr": { "avatar": false, @@ -143,12 +246,20 @@ }, "twitter": { + "username": null, + "password": null, + "cards": false, + "conversations": false, "quoted": true, "replies": true, "retweets": true, "twitpic": false, "videos": true }, + "unsplash": + { + "format": "raw" + }, "vsco": { "videos": true @@ -157,6 +268,10 @@ { "api-key": null }, + "weasyl": + { + "api-key": null + }, "weibo": { "retweets": true, @@ -172,29 +287,28 @@ { "filesize-min": null, "filesize-max": null, + "mtime": true, "part": true, "part-directory": null, + "rate": null, + "retries": 4, + "timeout": 30.0, + "verify": true, "http": { "adjust-extensions": true, - "mtime": true, - "rate": null, - "retries": 4, - "timeout": 30.0, - "verify": true + "headers": null }, "ytdl": { "format": null, "forward-cookies": false, - "mtime": true, + "logging": true, + "module": "youtube_dl", "outtmpl": null, - "rate": null, - "retries": 4, - "timeout": 30.0, - "verify": true + "raw-options": null } }, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index b87c59d..fbf67fe 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.16.5 +Version: 1.17.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -21,6 +21,8 @@ Description: ========== |pypi| |build| |gitter| + .. contents:: + Dependencies ============ @@ -38,6 +40,7 @@ Description: ========== Installation ============ + Pip --- @@ -57,48 +60,26 @@ Description: ========== Note: Windows users should use :code:`py -3` instead of :code:`python3`. - | It is advised to use the latest version of pip_, - including the essential packages :code:`setuptools` and :code:`wheel`. - | To ensure that these packages are up-to-date, run + It is advised to use the latest version of pip_, + including the essential packages :code:`setuptools` and :code:`wheel`. + To ensure these packages are up-to-date, run .. code:: bash $ python3 -m pip install --upgrade pip setuptools wheel - From Source - ----------- - - Get the code by either - - * Downloading a stable_ or dev_ archive and unpacking it - * Or via :code:`git clone https://github.com/mikf/gallery-dl.git` - - Navigate into the respective directory and run the :code:`setup.py` file. - - .. code:: bash - - $ wget https://github.com/mikf/gallery-dl/archive/master.tar.gz - $ tar -xf master.tar.gz - # or - $ git clone https://github.com/mikf/gallery-dl.git - - $ cd gallery-dl* - $ python3 setup.py install - - Standalone Executable --------------------- - Download a standalone executable file, - put it into your `PATH `__, - and run it inside a command prompt (like ``cmd.exe``). + Prebuilt executable files with a Python interpreter and + required Python packages included are available for - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ - These executables include a Python interpreter - and all required Python packages. + | Executables build from the latest commit can be found at + | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml Snap @@ -110,6 +91,7 @@ Description: ========== $ snap install gallery-dl + Chocolatey ---------- @@ -119,15 +101,17 @@ Description: ========== $ choco install gallery-dl + Scoop - ---------- + ----- - Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users. + *gallery-dl* is also available in the Scoop_ "main" bucket for Windows users: .. code:: powershell $ scoop install gallery-dl + Usage ===== @@ -232,9 +216,10 @@ Description: ========== ``e621``, ``exhentai``, ``idolcomplex``, + ``imgbb``, ``inkbunny``, ``instagram``, - ``luscious``, + ``mangoxo``, ``pinterest``, ``sankaku``, ``subscribestar``, @@ -264,6 +249,7 @@ Description: ========== $ gallery-dl -u -p URL $ gallery-dl -o username= -o password= URL + Cookies ------- @@ -307,12 +293,14 @@ Description: ========== $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + OAuth ----- *gallery-dl* supports user authentication via OAuth_ for - ``deviantart``, ``flickr``, ``reddit``, ``smugmug`` and ``tumblr``. - This is entirely optional, but grants *gallery-dl* the ability + ``deviantart``, ``flickr``, ``reddit``, ``smugmug``, ``tumblr``, + and ``mastodon`` instances. + This is mostly optional, but grants *gallery-dl* the ability to issue requests on your account's behalf and enables it to access resources which would otherwise be unavailable to a public user. @@ -327,13 +315,20 @@ Description: ========== access to *gallery-dl*. Authorize it and you will be shown one or more "tokens", which should be added to your configuration file. + To authenticate with a ``mastodon`` instance, run *gallery-dl* with + ``oauth:mastodon:`` as argument. For example: + + .. code:: bash + + $ gallery-dl oauth:mastodon:pawoo.net + $ gallery-dl oauth:mastodon:https://mastodon.social/ + + .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz - .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 066ac90..89ae8ed 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -13,7 +13,6 @@ docs/gallery-dl.conf gallery_dl/__init__.py gallery_dl/__main__.py gallery_dl/cache.py -gallery_dl/cloudflare.py gallery_dl/config.py gallery_dl/exception.py gallery_dl/job.py @@ -50,6 +49,7 @@ gallery_dl/extractor/behance.py gallery_dl/extractor/blogger.py gallery_dl/extractor/booru.py gallery_dl/extractor/common.py +gallery_dl/extractor/cyberdrop.py gallery_dl/extractor/danbooru.py gallery_dl/extractor/derpibooru.py gallery_dl/extractor/deviantart.py @@ -65,6 +65,8 @@ gallery_dl/extractor/foolslide.py gallery_dl/extractor/furaffinity.py gallery_dl/extractor/fuskator.py gallery_dl/extractor/gelbooru.py +gallery_dl/extractor/gelbooru_v01.py +gallery_dl/extractor/gelbooru_v02.py gallery_dl/extractor/gfycat.py gallery_dl/extractor/hbrowse.py gallery_dl/extractor/hentai2read.py @@ -110,6 +112,7 @@ gallery_dl/extractor/moebooru.py gallery_dl/extractor/myhentaigallery.py gallery_dl/extractor/myportfolio.py gallery_dl/extractor/naver.py +gallery_dl/extractor/naverwebtoon.py gallery_dl/extractor/newgrounds.py gallery_dl/extractor/ngomik.py gallery_dl/extractor/nhentai.py @@ -149,6 +152,7 @@ gallery_dl/extractor/subscribestar.py gallery_dl/extractor/test.py gallery_dl/extractor/tsumino.py gallery_dl/extractor/tumblr.py +gallery_dl/extractor/tumblrgallery.py gallery_dl/extractor/twitter.py gallery_dl/extractor/unsplash.py gallery_dl/extractor/vanillarock.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 6c2c713..c1f80b6 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -196,7 +196,7 @@ def main(): cnt, "entry" if cnt == 1 else "entries", cache._path(), ) else: - if not args.urls and not args.inputfile: + if not args.urls and not args.inputfiles: parser.error( "The following arguments are required: URL\n" "Use 'gallery-dl --help' to get a list of all options.") @@ -208,18 +208,19 @@ def main(): jobtype = args.jobtype or job.DownloadJob urls = args.urls - if args.inputfile: - try: - if args.inputfile == "-": - if sys.stdin: - urls += parse_inputfile(sys.stdin, log) + if args.inputfiles: + for inputfile in args.inputfiles: + try: + if inputfile == "-": + if sys.stdin: + urls += parse_inputfile(sys.stdin, log) + else: + log.warning("input file: stdin is not readable") else: - log.warning("input file: stdin is not readable") - else: - with open(args.inputfile, encoding="utf-8") as file: - urls += parse_inputfile(file, log) - except OSError as exc: - log.warning("input file: %s", exc) + with open(inputfile, encoding="utf-8") as file: + urls += parse_inputfile(file, log) + except OSError as exc: + log.warning("input file: %s", exc) # unsupported file logging handler handler = output.setup_logging_handler( diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py deleted file mode 100644 index 0f49d61..0000000 --- a/gallery_dl/cloudflare.py +++ /dev/null @@ -1,201 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015-2020 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Methods to access sites behind Cloudflare protection""" - -import time -import operator -import collections -import urllib.parse -from xml.etree import ElementTree -from . import text -from .cache import memcache - - -def is_challenge(response): - return (response.status_code == 503 and - response.headers.get("Server", "").startswith("cloudflare") and - b"jschl-answer" in response.content) - - -def is_captcha(response): - return (response.status_code == 403 and - b'name="captcha-bypass"' in response.content) - - -def solve_challenge(session, response, kwargs): - """Solve Cloudflare challenge and get cfclearance cookie""" - parsed = urllib.parse.urlsplit(response.url) - root = parsed.scheme + "://" + parsed.netloc - page = response.text - - cf_kwargs = {} - headers = cf_kwargs["headers"] = collections.OrderedDict() - params = cf_kwargs["data"] = collections.OrderedDict() - headers["Referer"] = response.url - - form = text.extract(page, 'id="challenge-form"', '')[0] - for element in ElementTree.fromstring( - "" + form + "").findall("input"): - name = element.attrib.get("name") - if not name: - continue - if name == "jschl_answer": - try: - value = solve_js_challenge(page, parsed.netloc) - except Exception: - return response, None, None - else: - value = element.attrib.get("value") - params[name] = value - - try: - params = {"ray": text.extract(page, '?ray=', '"')[0]} - - url = root + "/cdn-cgi/images/trace/jschal/nojs/transparent.gif" - session.request("GET", url, params=params) - - url = root + "/cdn-cgi/images/trace/jschal/js/nocookie/transparent.gif" - session.request("GET", url, params=params) - except Exception: - pass - - time.sleep(4) - url = root + text.unescape(text.extract(page, 'action="', '"')[0]) - cf_response = session.request("POST", url, **cf_kwargs) - - if cf_response.history: - initial_response = cf_response.history[0] - else: - initial_response = cf_response - - cookies = { - cookie.name: cookie.value - for cookie in initial_response.cookies - } - - if not cookies: - import logging - log = logging.getLogger("cloudflare") - log.debug("Headers:\n%s", initial_response.headers) - log.debug("Content:\n%s", initial_response.text) - return cf_response, None, None - - domain = next(iter(initial_response.cookies)).domain - cookies["__cfduid"] = response.cookies.get("__cfduid", "") - return cf_response, domain, cookies - - -def solve_js_challenge(page, netloc): - """Evaluate JS challenge in 'page' to get 'jschl_answer' value""" - - # build variable name - # e.g. '...f, wqnVscP={"DERKbJk":+(...' --> wqnVscP.DERKbJk - data, pos = text.extract_all(page, ( - ('var' , ',f, ', '='), - ('key' , '"' , '"'), - ('expr', ':' , '}'), - )) - variable = "{}.{}".format(data["var"], data["key"]) - vlength = len(variable) - - k = text.extract(page, "k = '", "'")[0] - - # evaluate the initial expression - solution = evaluate_expression(data["expr"], page, netloc) - - # iterator over all remaining expressions - # and combine their values in 'solution' - expressions = text.extract( - page, "'challenge-form');", "f.submit();", pos)[0] - for expr in expressions.split(";")[1:]: - - if expr.startswith(variable): - # select arithmetc function based on operator (+/-/*) - func = OPERATORS[expr[vlength]] - # evaluate the rest of the expression - value = evaluate_expression(expr[vlength+2:], page, netloc, k) - # combine expression value with our current solution - solution = func(solution, value) - - elif expr.startswith("a.value"): - if "t.length)" in expr: - # add length of hostname - solution += len(netloc) - if ".toFixed(" in expr: - # trim solution to 10 decimal places - solution = "{:.10f}".format(solution) - return solution - - elif expr.startswith("k+="): - k += str(evaluate_expression(expr[3:], page, netloc)) - - -def evaluate_expression(expr, page, netloc, k=""): - """Evaluate a single Javascript expression for the challenge""" - - if expr.startswith("function(p)"): - # get HTML element with ID k and evaluate the expression inside - # 'eval(eval("document.getElementById(k).innerHTML"))' - expr = text.extract(page, 'id="'+k+'"', '<')[0] - return evaluate_expression(expr.partition(">")[2], page, netloc) - - if "/" in expr: - # split the expression in numerator and denominator subexpressions, - # evaluate them separately, - # and return their fraction-result - num, _, denom = expr.partition("/") - num = evaluate_expression(num, page, netloc) - denom = evaluate_expression(denom, page, netloc) - return num / denom - - if "function(p)" in expr: - # split initial expression and function code - initial, _, func = expr.partition("function(p)") - # evaluate said expression - initial = evaluate_expression(initial, page, netloc) - # get function argument and use it as index into 'netloc' - index = evaluate_expression(func[func.index("}")+1:], page, netloc) - return initial + ord(netloc[int(index)]) - - # iterate over all subexpressions, - # evaluate them, - # and accumulate their values in 'result' - result = "" - for subexpr in expr.strip("+()").split(")+("): - value = 0 - for part in subexpr.split("+"): - if "-" in part: - p1, _, p2 = part.partition("-") - value += VALUES[p1] - VALUES[p2] - else: - value += VALUES[part] - result += str(value) - return int(result) - - -OPERATORS = { - "+": operator.add, - "-": operator.sub, - "*": operator.mul, -} - - -VALUES = { - "": 0, - "!": 1, - "[]": 0, - "!![]": 1, - "(!![]": 1, - "(!![])": 1, -} - - -@memcache(keyarg=0) -def cookies(category): - return None diff --git a/gallery_dl/downloader/__init__.py b/gallery_dl/downloader/__init__.py index 6fb09e1..e1b936e 100644 --- a/gallery_dl/downloader/__init__.py +++ b/gallery_dl/downloader/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,8 +8,6 @@ """Downloader modules""" -import importlib - modules = [ "http", "text", @@ -24,22 +22,22 @@ def find(scheme): except KeyError: pass - klass = None + cls = None if scheme == "https": scheme = "http" if scheme in modules: # prevent unwanted imports try: - module = importlib.import_module("." + scheme, __package__) + module = __import__(scheme, globals(), None, (), 1) except ImportError: pass else: - klass = module.__downloader__ + cls = module.__downloader__ if scheme == "http": - _cache["http"] = _cache["https"] = klass + _cache["http"] = _cache["https"] = cls else: - _cache[scheme] = klass - return klass + _cache[scheme] = cls + return cls # -------------------------------------------------------------------- diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 8d72dc2..bc42d7c 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,6 +31,7 @@ class HttpDownloader(DownloaderBase): self.downloading = False self.adjust_extension = self.config("adjust-extensions", True) + self.headers = self.config("headers") self.minsize = self.config("filesize-min") self.maxsize = self.config("filesize-max") self.retries = self.config("retries", extractor._retries) @@ -93,13 +94,16 @@ class HttpDownloader(DownloaderBase): time.sleep(tries) tries += 1 - headers = {} + headers = {"Accept": "*/*"} file_header = None # check for .part file file_size = pathfmt.part_size() if file_size: headers["Range"] = "bytes={}-".format(file_size) + # general headers + if self.headers: + headers.update(self.headers) # file-specific headers extra = pathfmt.kwdict.get("_http_headers") if extra: diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 8086b5d..e116188 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,6 @@ """Downloader module for URLs requiring youtube-dl support""" -from youtube_dl import YoutubeDL, DEFAULT_OUTTMPL from .common import DownloaderBase from .. import text import os @@ -16,8 +15,14 @@ import os class YoutubeDLDownloader(DownloaderBase): scheme = "ytdl" + module = None def __init__(self, job): + module = self.module + if not module: + module_name = self.config("module") or "youtube_dl" + module = YoutubeDLDownloader.module = __import__(module_name) + DownloaderBase.__init__(self, job) extractor = job.extractor @@ -42,10 +47,11 @@ class YoutubeDLDownloader(DownloaderBase): options["logger"] = self.log self.forward_cookies = self.config("forward-cookies", False) - outtmpl = self.config("outtmpl") - self.outtmpl = DEFAULT_OUTTMPL if outtmpl == "default" else outtmpl + self.outtmpl = self.config("outtmpl") + if self.outtmpl == "default": + self.outtmpl = module.DEFAULT_OUTTMPL - self.ytdl = YoutubeDL(options) + self.ytdl = module.YoutubeDL(options) def download(self, url, pathfmt): if self.forward_cookies: diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 81b11fd..aa0e8ad 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -50,6 +50,8 @@ class _500pxExtractor(Extractor): def _extend(self, edges): """Extend photos with additional metadata and higher resolution URLs""" + ids = [str(edge["node"]["legacyId"]) for edge in edges] + url = "https://api.500px.com/v1/photos" params = { "expanded_user_info" : "true", @@ -62,14 +64,14 @@ class _500pxExtractor(Extractor): "liked_by" : "1", "following_sample" : "100", "image_size" : "4096", - "ids" : ",".join( - str(edge["node"]["legacyId"]) for edge in edges), + "ids" : ",".join(ids), } - data = self._request_api(url, params)["photos"] + photos = self._request_api(url, params)["photos"] return [ - data[str(edge["node"]["legacyId"])] - for edge in edges + photos[pid] for pid in ids + if pid in photos or + self.log.warning("Unable to fetch photo %s", pid) ] def _request_api(self, url, params, csrf_token=None): @@ -142,6 +144,10 @@ class _500pxGalleryExtractor(_500pxExtractor): "user": dict, }, }), + # unavailable photos (#1335) + ("https://500px.com/p/Light_Expression_Photography/galleries/street", { + "count": 0, + }), ("https://500px.com/fashvamp/galleries/lera"), ) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 923a78b..57794d0 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -7,7 +7,6 @@ # published by the Free Software Foundation. import re -import importlib modules = [ "2chan", @@ -23,6 +22,7 @@ modules = [ "bcy", "behance", "blogger", + "cyberdrop", "danbooru", "derpibooru", "deviantart", @@ -35,6 +35,8 @@ modules = [ "furaffinity", "fuskator", "gelbooru", + "gelbooru_v01", + "gelbooru_v02", "gfycat", "hbrowse", "hentai2read", @@ -76,6 +78,7 @@ modules = [ "myhentaigallery", "myportfolio", "naver", + "naverwebtoon", "newgrounds", "ngomik", "nhentai", @@ -111,6 +114,7 @@ modules = [ "subscribestar", "tsumino", "tumblr", + "tumblrgallery", "twitter", "unsplash", "vanillarock", @@ -182,11 +186,12 @@ def _list_classes(): """Yield all available extractor classes""" yield from _cache + globals_ = globals() for module_name in _module_iter: - module = importlib.import_module("."+module_name, __package__) + module = __import__(module_name, globals_, None, (), 1) yield from add_module(module) - globals()["_list_classes"] = lambda : _cache + globals_["_list_classes"] = lambda : _cache def _get_classes(module): diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 64cde80..c3cf3f7 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,16 +8,12 @@ """Extractors for *booru sites""" -from .common import Extractor, Message, generate_extractors -from .. import text, util, exception - -from xml.etree import ElementTree -import collections +from .common import BaseExtractor, Message +from .. import text import operator -import re -class BooruExtractor(Extractor): +class BooruExtractor(BaseExtractor): """Base class for *booru extractors""" basecategory = "booru" filename_fmt = "{category}_{id}_{md5}.{extension}" @@ -66,191 +62,8 @@ class BooruExtractor(Extractor): _file_url = operator.itemgetter("file_url") - @staticmethod - def _prepare(post): - post["date"] = text.parse_datetime( - post["created_at"], "%a %b %d %H:%M:%S %z %Y") + def _prepare(self, post): + """Prepare the 'post's metadata""" def _extended_tags(self, post, page=None): - if not page: - url = "{}/index.php?page=post&s=view&id={}".format( - self.root, post["id"]) - page = self.request(url).text - html = text.extract(page, '